diff --git a/.buildkite/pipelines/periodic-packaging.template.yml b/.buildkite/pipelines/periodic-packaging.template.yml index 64c5fa5060e6c..7d2b4df893266 100644 --- a/.buildkite/pipelines/periodic-packaging.template.yml +++ b/.buildkite/pipelines/periodic-packaging.template.yml @@ -8,7 +8,6 @@ steps: setup: image: - centos-7 - - debian-10 - debian-11 - opensuse-leap-15 - oraclelinux-7 diff --git a/.buildkite/pipelines/periodic-packaging.yml b/.buildkite/pipelines/periodic-packaging.yml index 1c33337dc821f..425a550b9d5e1 100644 --- a/.buildkite/pipelines/periodic-packaging.yml +++ b/.buildkite/pipelines/periodic-packaging.yml @@ -9,7 +9,6 @@ steps: setup: image: - centos-7 - - debian-10 - debian-11 - opensuse-leap-15 - oraclelinux-7 diff --git a/.buildkite/pipelines/periodic-platform-support.yml b/.buildkite/pipelines/periodic-platform-support.yml index 867ebe41ed6af..2e6f789f907fa 100644 --- a/.buildkite/pipelines/periodic-platform-support.yml +++ b/.buildkite/pipelines/periodic-platform-support.yml @@ -8,7 +8,6 @@ steps: setup: image: - centos-7 - - debian-10 - debian-11 - opensuse-leap-15 - oraclelinux-7 diff --git a/.buildkite/pipelines/pull-request/packaging-tests-unix.yml b/.buildkite/pipelines/pull-request/packaging-tests-unix.yml index d5c937aa4b5a2..963596220442a 100644 --- a/.buildkite/pipelines/pull-request/packaging-tests-unix.yml +++ b/.buildkite/pipelines/pull-request/packaging-tests-unix.yml @@ -11,7 +11,6 @@ steps: setup: image: - centos-7 - - debian-10 - debian-11 - opensuse-leap-15 - oraclelinux-7 @@ -40,7 +39,6 @@ steps: setup: image: - centos-7 - - debian-10 - debian-11 - opensuse-leap-15 - oraclelinux-7 @@ -69,7 +67,6 @@ steps: setup: image: - centos-7 - - debian-10 - debian-11 - opensuse-leap-15 - oraclelinux-7 diff --git a/.buildkite/pull-requests.json b/.buildkite/pull-requests.json index de0212685a8a7..235a4b2dbb4ad 100644 --- a/.buildkite/pull-requests.json +++ b/.buildkite/pull-requests.json @@ -14,21 +14,6 @@ "trigger_comment_regex": "(run\\W+elasticsearch-ci.+)|(^\\s*((buildkite|@elastic(search)?machine)\\s*)?test\\s+this(\\s+please)?)", "cancel_intermediate_builds": true, "cancel_intermediate_builds_on_comment": false - }, - { - "enabled": true, - "pipeline_slug": "elasticsearch-pull-request-check-serverless-submodule", - "allow_org_users": true, - "allowed_repo_permissions": [ - "admin", - "write" - ], - "set_commit_status": false, - "build_on_commit": true, - "build_on_comment": false, - "labels": [ - "test-update-serverless" - ] } ] } diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 930de97a3c213..5f7999e243777 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -111,9 +111,9 @@ Contributing to the Elasticsearch codebase **Repository:** [https://github.com/elastic/elasticsearch](https://github.com/elastic/elasticsearch) -JDK 17 is required to build Elasticsearch. You must have a JDK 17 installation +JDK 21 is required to build Elasticsearch. You must have a JDK 21 installation with the environment variable `JAVA_HOME` referencing the path to Java home for -your JDK 17 installation. +your JDK 21 installation. Elasticsearch uses the Gradle wrapper for its build. You can execute Gradle using the wrapper via the `gradlew` script on Unix systems or `gradlew.bat` @@ -152,9 +152,9 @@ The definition of this Elasticsearch cluster can be found [here](build-tools-int ### Importing the project into IntelliJ IDEA The minimum IntelliJ IDEA version required to import the Elasticsearch project is 2020.1. -Elasticsearch builds using Java 17. When importing into IntelliJ you will need +Elasticsearch builds using Java 21. When importing into IntelliJ you will need to define an appropriate SDK. The convention is that **this SDK should be named -"17"** so that the project import will detect it automatically. For more details +"21"** so that the project import will detect it automatically. For more details on defining an SDK in IntelliJ please refer to [their documentation](https://www.jetbrains.com/help/idea/sdk.html#define-sdk). SDK definitions are global, so you can add the JDK from any project, or after project import. Importing with a missing JDK will still work, IntelliJ will @@ -660,51 +660,11 @@ node cannot continue to operate as a member of the cluster: Errors like this should be very rare. When in doubt, prefer `WARN` to `ERROR`. -### Version numbers in the Elasticsearch codebase - -Starting in 8.8.0, we have separated out the version number representations -of various aspects of Elasticsearch into their own classes, using their own -numbering scheme separate to release version. The main ones are -`TransportVersion` and `IndexVersion`, representing the version of the -inter-node binary protocol and index data + metadata respectively. - -Separated version numbers are comprised of an integer number. The semantic -meaning of a version number are defined within each `*Version` class. There -is no direct mapping between separated version numbers and the release version. -The versions used by any particular instance of Elasticsearch can be obtained -by querying `/_nodes/info` on the node. - -#### Using separated version numbers - -Whenever a change is made to a component versioned using a separated version -number, there are a few rules that need to be followed: - -1. Each version number represents a specific modification to that component, - and should not be modified once it is defined. Each version is immutable - once merged into `main`. -2. To create a new component version, add a new constant to the respective class - with a descriptive name of the change being made. Increment the integer - number according to the particular `*Version` class. - -If your pull request has a conflict around your new version constant, -you need to update your PR from `main` and change your PR to use the next -available version number. - -### Checking for cluster features - -As part of developing a new feature or change, you might need to determine -if all nodes in a cluster have been upgraded to support your new feature. -This can be done using `FeatureService`. To define and check for a new -feature in a cluster: - -1. Define a new `NodeFeature` constant with a unique id for the feature - in a class related to the change you're doing. -2. Return that constant from an instance of `FeatureSpecification.getFeatures`, - either an existing implementation or a new implementation. Make sure - the implementation is added as an SPI implementation in `module-info.java` - and `META-INF/services`. -3. To check if all nodes in the cluster support the new feature, call -`FeatureService.clusterHasFeature(ClusterState, NodeFeature)` +### Versioning Elasticsearch + +There are various concepts used to identify running node versions, +and the capabilities and compatibility of those nodes. For more information, +see `docs/internal/Versioning.md` ### Creating a distribution diff --git a/build-tools-internal/gradle/wrapper/gradle-wrapper.properties b/build-tools-internal/gradle/wrapper/gradle-wrapper.properties index e955ee28dd349..6acc1431eaec1 100644 --- a/build-tools-internal/gradle/wrapper/gradle-wrapper.properties +++ b/build-tools-internal/gradle/wrapper/gradle-wrapper.properties @@ -1,7 +1,7 @@ distributionBase=GRADLE_USER_HOME distributionPath=wrapper/dists -distributionSha256Sum=fdfca5dbc2834f0ece5020465737538e5ba679deeff5ab6c09621d67f8bb1a15 -distributionUrl=https\://services.gradle.org/distributions/gradle-8.10.1-all.zip +distributionSha256Sum=2ab88d6de2c23e6adae7363ae6e29cbdd2a709e992929b48b6530fd0c7133bd6 +distributionUrl=https\://services.gradle.org/distributions/gradle-8.10.2-all.zip networkTimeout=10000 validateDistributionUrl=true zipStoreBase=GRADLE_USER_HOME diff --git a/build-tools-internal/src/main/groovy/elasticsearch.ide.gradle b/build-tools-internal/src/main/groovy/elasticsearch.ide.gradle index d4bb0c1189e8d..d3209ff27ce06 100644 --- a/build-tools-internal/src/main/groovy/elasticsearch.ide.gradle +++ b/build-tools-internal/src/main/groovy/elasticsearch.ide.gradle @@ -169,6 +169,7 @@ if (providers.systemProperty('idea.active').getOrNull() == 'true') { '-ea', '-Djava.security.manager=allow', '-Djava.locale.providers=CLDR', + '-Dtests.testfeatures.enabled=true', '-Des.nativelibs.path="' + testLibraryPath + '"', // TODO: only open these for mockito when it is modularized '--add-opens=java.base/java.security.cert=ALL-UNNAMED', diff --git a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/DockerBase.java b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/DockerBase.java index aa9aaa3064024..ed1689cfb0eb9 100644 --- a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/DockerBase.java +++ b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/DockerBase.java @@ -16,7 +16,7 @@ public enum DockerBase { DEFAULT("ubuntu:20.04", "", "apt-get"), // "latest" here is intentional, since the image name specifies "8" - UBI("docker.elastic.co/ubi8/ubi-minimal:latest", "-ubi8", "microdnf"), + UBI("docker.elastic.co/ubi8/ubi-minimal:latest", "-ubi", "microdnf"), // The Iron Bank base image is UBI (albeit hardened), but we are required to parameterize the Docker build IRON_BANK("${BASE_REGISTRY}/${BASE_IMAGE}:${BASE_TAG}", "-ironbank", "yum"), diff --git a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/ElasticsearchTestBasePlugin.java b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/ElasticsearchTestBasePlugin.java index 19ab49a851907..3fd59dc7a95f1 100644 --- a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/ElasticsearchTestBasePlugin.java +++ b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/ElasticsearchTestBasePlugin.java @@ -108,6 +108,7 @@ public void execute(Task t) { "-Xmx" + System.getProperty("tests.heap.size", "512m"), "-Xms" + System.getProperty("tests.heap.size", "512m"), "-Djava.security.manager=allow", + "-Dtests.testfeatures.enabled=true", "--add-opens=java.base/java.util=ALL-UNNAMED", // TODO: only open these for mockito when it is modularized "--add-opens=java.base/java.security.cert=ALL-UNNAMED", diff --git a/build-tools-internal/src/main/resources/minimumGradleVersion b/build-tools-internal/src/main/resources/minimumGradleVersion index 2eb8a97206651..dd78a707858a7 100644 --- a/build-tools-internal/src/main/resources/minimumGradleVersion +++ b/build-tools-internal/src/main/resources/minimumGradleVersion @@ -1 +1 @@ -8.10.1 \ No newline at end of file +8.10.2 \ No newline at end of file diff --git a/build-tools-internal/src/test/java/org/elasticsearch/gradle/internal/test/rest/transform/header/InjectHeaderTests.java b/build-tools-internal/src/test/java/org/elasticsearch/gradle/internal/test/rest/transform/header/InjectHeaderTests.java index 3ba9ab0f697e0..0ef7cc7108bce 100644 --- a/build-tools-internal/src/test/java/org/elasticsearch/gradle/internal/test/rest/transform/header/InjectHeaderTests.java +++ b/build-tools-internal/src/test/java/org/elasticsearch/gradle/internal/test/rest/transform/header/InjectHeaderTests.java @@ -26,9 +26,9 @@ public class InjectHeaderTests extends InjectFeatureTests { private static final Map headers = Map.of( "Content-Type", - "application/vnd.elasticsearch+json;compatible-with=7", + "application/vnd.elasticsearch+json;compatible-with=8", "Accept", - "application/vnd.elasticsearch+json;compatible-with=7" + "application/vnd.elasticsearch+json;compatible-with=8" ); /** diff --git a/docs/changelog/111834.yaml b/docs/changelog/111834.yaml new file mode 100644 index 0000000000000..4548dee5f91e5 --- /dev/null +++ b/docs/changelog/111834.yaml @@ -0,0 +1,5 @@ +pr: 111834 +summary: Add inner hits support to semantic query +area: Search +type: enhancement +issues: [] diff --git a/docs/changelog/112092.yaml b/docs/changelog/112092.yaml deleted file mode 100644 index 35c731074d760..0000000000000 --- a/docs/changelog/112092.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 112092 -summary: "Apply auto-flattening to `subobjects: auto`" -area: Mapping -type: enhancement -issues: [] diff --git a/docs/changelog/113143.yaml b/docs/changelog/113143.yaml new file mode 100644 index 0000000000000..4a2044cca0ce4 --- /dev/null +++ b/docs/changelog/113143.yaml @@ -0,0 +1,10 @@ +pr: 113143 +summary: Deprecate dutch_kp and lovins stemmer as they are removed in Lucene 10 +area: Analysis +type: deprecation +issues: [] +deprecation: + title: Deprecate dutch_kp and lovins stemmer as they are removed in Lucene 10 + area: Analysis + details: kp, dutch_kp, dutchKp and lovins stemmers are deprecated and will be removed. + impact: These stemmers will be removed and will be no longer supported. diff --git a/docs/changelog/113187.yaml b/docs/changelog/113187.yaml new file mode 100644 index 0000000000000..397179c4bc3bb --- /dev/null +++ b/docs/changelog/113187.yaml @@ -0,0 +1,5 @@ +pr: 113187 +summary: Preserve Step Info Across ILM Auto Retries +area: ILM+SLM +type: enhancement +issues: [] diff --git a/docs/changelog/113413.yaml b/docs/changelog/113413.yaml new file mode 100644 index 0000000000000..8b1104ba61fe4 --- /dev/null +++ b/docs/changelog/113413.yaml @@ -0,0 +1,6 @@ +pr: 113413 +summary: Fixed a `NullPointerException` in `_capabilities` API when the `path` parameter is null. +area: Infra/REST API +type: bug +issues: + - 113413 diff --git a/docs/changelog/113437.yaml b/docs/changelog/113437.yaml new file mode 100644 index 0000000000000..98831958e63f8 --- /dev/null +++ b/docs/changelog/113437.yaml @@ -0,0 +1,6 @@ +pr: 113437 +summary: Fix check on E5 model platform compatibility +area: Machine Learning +type: bug +issues: + - 113577 diff --git a/docs/changelog/113552.yaml b/docs/changelog/113552.yaml new file mode 100644 index 0000000000000..48f7da309e82e --- /dev/null +++ b/docs/changelog/113552.yaml @@ -0,0 +1,5 @@ +pr: 113552 +summary: Tag redacted document in ingest metadata +area: Ingest Node +type: enhancement +issues: [] diff --git a/docs/changelog/113570.yaml b/docs/changelog/113570.yaml new file mode 100644 index 0000000000000..8cfad9195c5cd --- /dev/null +++ b/docs/changelog/113570.yaml @@ -0,0 +1,7 @@ +pr: 113570 +summary: Fix `ignore_above` handling in synthetic source when index level setting + is used +area: Logs +type: bug +issues: + - 113538 diff --git a/docs/changelog/113699.yaml b/docs/changelog/113699.yaml new file mode 100644 index 0000000000000..3876c8147e7eb --- /dev/null +++ b/docs/changelog/113699.yaml @@ -0,0 +1,5 @@ +pr: 113699 +summary: "[ESQL] Fix init value in max float aggregation" +area: ES|QL +type: bug +issues: [] diff --git a/docs/changelog/113723.yaml b/docs/changelog/113723.yaml new file mode 100644 index 0000000000000..2cbcf49102719 --- /dev/null +++ b/docs/changelog/113723.yaml @@ -0,0 +1,6 @@ +pr: 113723 +summary: Fix max file size check to use `getMaxFileSize` +area: Infra/Core +type: bug +issues: + - 113705 diff --git a/docs/internal/Versioning.md b/docs/internal/Versioning.md new file mode 100644 index 0000000000000..f0f730f618259 --- /dev/null +++ b/docs/internal/Versioning.md @@ -0,0 +1,297 @@ +Versioning Elasticsearch +======================== + +Elasticsearch is a complicated product, and is run in many different scenarios. +A single version number is not sufficient to cover the whole of the product, +instead we need different concepts to provide versioning capabilities +for different aspects of Elasticsearch, depending on their scope, updatability, +responsiveness, and maintenance. + +## Release version + +This is the version number used for published releases of Elasticsearch, +and the Elastic stack. This takes the form _major.minor.patch_, +with a corresponding version id. + +Uses of this version number should be avoided, as it does not apply to +some scenarios, and use of release version will break Elasticsearch nodes. + +The release version is accessible in code through `Build.current().version()`, +but it **should not** be assumed that this is a semantic version number, +it could be any arbitrary string. + +## Transport protocol + +The transport protocol is used to send binary data between Elasticsearch nodes; +`TransportVersion` is the version number used for this protocol. +This version number is negotiated between each pair of nodes in the cluster +on first connection, and is set as the lower of the highest transport version +understood by each node. +This version is then accessible through the `getTransportVersion` method +on `StreamInput` and `StreamOutput`, so serialization code can read/write +objects in a form that will be understood by the other node. + +Every change to the transport protocol is represented by a new transport version, +higher than all previous transport versions, which then becomes the highest version +recognized by that build of Elasticsearch. The version ids are stored +as constants in the `TransportVersions` class. +Each id has a standard pattern `M_NNN_SS_P`, where: +* `M` is the major version +* `NNN` is an incrementing id +* `SS` is used in subsidiary repos amending the default transport protocol +* `P` is used for patches and backports + +When you make a change to the serialization form of any object, +you need to create a new sequential constant in `TransportVersions`, +introduced in the same PR that adds the change, that increments +the `NNN` component from the previous highest version, +with other components set to zero. +For example, if the previous version number is `8_413_00_1`, +the next version number should be `8_414_00_0`. + +Once you have defined your constant, you then need to use it +in serialization code. If the transport version is at or above the new id, +the modified protocol should be used: + + str = in.readString(); + bool = in.readBoolean(); + if (in.getTransportVersion().onOrAfter(TransportVersions.NEW_CONSTANT)) { + num = in.readVInt(); + } + +If a transport version change needs to be reverted, a **new** version constant +should be added representing the revert, and the version id checks +adjusted appropriately to only use the modified protocol between the version id +the change was added, and the new version id used for the revert (exclusive). +The `between` method can be used for this. + +Once a transport change with a new version has been merged into main or a release branch, +it **must not** be modified - this is so the meaning of that specific +transport version does not change. + +_Elastic developers_ - please see corresponding documentation for Serverless +on creating transport versions for Serverless changes. + +### Collapsing transport versions + +As each change adds a new constant, the list of constants in `TransportVersions` +will keep growing. However, once there has been an official release of Elasticsearch, +that includes that change, that specific transport version is no longer needed, +apart from constants that happen to be used for release builds. +As part of managing transport versions, consecutive transport versions can be +periodically collapsed together into those that are only used for release builds. +This task is normally performed by Core/Infra on a semi-regular basis, +usually after each new minor release, to collapse the transport versions +for the previous minor release. An example of such an operation can be found +[here](https://github.com/elastic/elasticsearch/pull/104937). + +### Minimum compatibility versions + +The transport version used between two nodes is determined by the initial handshake +(see `TransportHandshaker`, where the two nodes swap their highest known transport version). +The lowest transport version that is compatible with the current node +is determined by `TransportVersions.MINIMUM_COMPATIBLE`, +and the node is prevented from joining the cluster if it is below that version. +This constant should be updated manually on a major release. + +The minimum version that can be used for CCS is determined by +`TransportVersions.MINIMUM_CCS_VERSION`, but this is not actively checked +before queries are performed. Only if a query cannot be serialized at that +version is an action rejected. This constant is updated automatically +as part of performing a release. + +### Mapping to release versions + +For releases that do use a version number, it can be confusing to encounter +a log or exception message that references an arbitrary transport version, +where you don't know which release version that corresponds to. This is where +the `.toReleaseVersion()` method comes in. It uses metadata stored in a csv file +(`TransportVersions.csv`) to map from the transport version id to the corresponding +release version. For any transport versions it encounters without a direct map, +it performs a best guess based on the information it has. The csv file +is updated automatically as part of performing a release. + +In releases that do not have a release version number, that method becomes +a no-op. + +### Managing patches and backports + +Backporting transport version changes to previous releases +should only be done if absolutely necessary, as it is very easy to get wrong +and break the release in a way that is very hard to recover from. + +If we consider the version number as an incrementing line, what we are doing is +grafting a change that takes effect at a certain point in the line, +to additionally take effect in a fixed window earlier in the line. + +To take an example, using indicative version numbers, when the latest +transport version is 52, we decide we need to backport a change done in +transport version 50 to transport version 45. We use the `P` version id component +to create version 45.1 with the backported change. +This change will apply for version ids 45.1 to 45.9 (should they exist in the future). + +The serialization code in the backport needs to use the backported protocol +for all version numbers 45.1 to 45.9. The `TransportVersion.isPatchFrom` method +can be used to easily determine if this is the case: `streamVersion.isPatchFrom(45.1)`. +However, the `onOrAfter` also does what is needed on patch branches. + +The serialization code in version 53 then needs to additionally check +version numbers 45.1-45.9 to use the backported protocol, also using the `isPatchFrom` method. + +As an example, [this transport change](https://github.com/elastic/elasticsearch/pull/107862) +was backported from 8.15 to [8.14.0](https://github.com/elastic/elasticsearch/pull/108251) +and [8.13.4](https://github.com/elastic/elasticsearch/pull/108250) at the same time +(8.14 was a build candidate at the time). + +The 8.13 PR has: + + if (transportVersion.onOrAfter(8.13_backport_id)) + +The 8.14 PR has: + + if (transportVersion.isPatchFrom(8.13_backport_id) + || transportVersion.onOrAfter(8.14_backport_id)) + +The 8.15 PR has: + + if (transportVersion.isPatchFrom(8.13_backport_id) + || transportVersion.isPatchFrom(8.14_backport_id) + || transportVersion.onOrAfter(8.15_transport_id)) + +In particular, if you are backporting a change to a patch release, +you also need to make sure that any subsequent released version on any branch +also has that change, and knows about the patch backport ids and what they mean. + +## Index version + +Index version is a single incrementing version number for the index data format, +metadata, and associated mappings. It is declared the same way as the +transport version - with the pattern `M_NNN_SS_P`, for the major version, version id, +subsidiary version id, and patch number respectively. + +Index version is stored in index metadata when an index is created, +and it is used to determine the storage format and what functionality that index supports. +The index version does not change once an index is created. + +In the same way as transport versions, when a change is needed to the index +data format or metadata, or new mapping types are added, create a new version constant +below the last one, incrementing the `NNN` version component. + +Unlike transport version, version constants cannot be collapsed together, +as an index keeps its creation version id once it is created. +Fortunately, new index versions are only created once a month or so, +so we don’t have a large list of index versions that need managing. + +Similar to transport version, index version has a `toReleaseVersion` to map +onto release versions, in appropriate situations. + +## Cluster Features + +Cluster features are identifiers, published by a node in cluster state, +indicating they support a particular top-level operation or set of functionality. +They are used for internal checks within Elasticsearch, and for gating tests +on certain functionality. For example, to check all nodes have upgraded +to a certain point before running a large migration operation to a new data format. +Cluster features should not be referenced by anything outside the Elasticsearch codebase. + +Cluster features are indicative of top-level functionality introduced to +Elasticsearch - e.g. a new transport endpoint, or new operations. + +It is also used to check nodes can join a cluster - once all nodes in a cluster +support a particular feature, no nodes can then join the cluster that do not +support that feature. This is to ensure that once a feature is supported +by a cluster, it will then always be supported in the future. + +To declare a new cluster feature, add an implementation of the `FeatureSpecification` SPI, +suitably registered (or use an existing one for your code area), and add the feature +as a constant to be returned by getFeatures. To then check whether all nodes +in the cluster support that feature, use the method `clusterHasFeature` on `FeatureService`. +It is only possible to check whether all nodes in the cluster have a feature; +individual node checks should not be done. + +Once a cluster feature is declared and deployed, it cannot be modified or removed, +else new nodes will not be able to join existing clusters. +If functionality represented by a cluster feature needs to be removed, +a new cluster feature should be added indicating that functionality is no longer +supported, and the code modified accordingly (bearing in mind additional BwC constraints). + +The cluster features infrastructure is only designed to support a few hundred features +per major release, and once features are added to a cluster they can not be removed. +Cluster features should therefore be used sparingly. +Adding too many cluster features risks increasing cluster instability. + +When we release a new major version N, we limit our backwards compatibility +to the highest minor of the previous major N-1. Therefore, any cluster formed +with the new major version is guaranteed to have all features introduced during +releases of major N-1. All such features can be deemed to be met by the cluster, +and the features themselves can be removed from cluster state over time, +and the feature checks removed from the code of major version N. + +### Testing + +Tests often want to check if a certain feature is implemented / available on all nodes, +particularly BwC or mixed cluster test. + +Rather than introducing a production feature just for a test condition, +this can be done by adding a _test feature_ in an implementation of +`FeatureSpecification.getTestFeatures`. These features will only be set +on clusters running as part of an integration test. Even so, cluster features +should be used sparingly if possible; Capabilities is generally a better +option for test conditions. + +In Java Rest tests, checking cluster features can be done using +`ESRestTestCase.clusterHasFeature(feature)` + +In YAML Rest tests, conditions can be defined in the `requires` or `skip` sections +that use cluster features; see [here](https://github.com/elastic/elasticsearch/blob/main/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/README.asciidoc#skipping-tests) for more information. + +To aid with backwards compatibility tests, the test framework adds synthetic features +for each previously released Elasticsearch version, of the form `gte_v{VERSION}` +(for example `gte_v8.14.2`). +This can be used to add conditions based on previous releases. It _cannot_ be used +to check the current snapshot version; real features or capabilities should be +used instead. + +## Capabilities + +The Capabilities API is a REST API for external clients to check the capabilities +of an Elasticsearch cluster. As it is dynamically calculated for every query, +it is not limited in size or usage. + +A capabilities query can be used to query for 3 things: +* Is this endpoint supported for this HTTP method? +* Are these parameters of this endpoint supported? +* Are these capabilities (arbitrary string ids) of this endpoint supported? + +The API will return with a simple true/false, indicating if all specified aspects +of the endpoint are supported by all nodes in the cluster. +If any aspect is not supported by any one node, the API returns `false`. + +The API can also return `supported: null` (indicating unknown) +if there was a problem communicating with one or more nodes in the cluster. + +All registered endpoints automatically work with the endpoint existence check. +To add support for parameter and feature capability queries to your REST endpoint, +implement the `supportedQueryParameters` and `supportedCapabilities` methods in your rest handler. + +To perform a capability query, perform a REST call to the `_capabilities` API, +with parameters `method`, `path`, `parameters`, `capabilities`. +The call will query every node in the cluster, and return `{supported: true}` +if all nodes support that specific combination of method, path, query parameters, +and endpoint capabilities. If any single aspect is not supported, +the query will return `{supported: false}`. If there are any problems +communicating with nodes in the cluster, the response will be `{supported: null}` +indicating support or lack thereof cannot currently be determined. +Capabilities can be checked using the clusterHasCapability method in ESRestTestCase. + +Similar to cluster features, YAML tests can have skip and requires conditions +specified with capabilities like the following: + + - requires: + capabilities: + - method: GET + path: /_endpoint + parameters: [param1, param2] + capabilities: [cap1, cap2] + +method: GET is the default, and does not need to be explicitly specified. diff --git a/docs/reference/analysis/tokenfilters/snowball-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/snowball-tokenfilter.asciidoc index 57e402988cc5a..d8300288c9f4b 100644 --- a/docs/reference/analysis/tokenfilters/snowball-tokenfilter.asciidoc +++ b/docs/reference/analysis/tokenfilters/snowball-tokenfilter.asciidoc @@ -11,6 +11,8 @@ values: `Arabic`, `Armenian`, `Basque`, `Catalan`, `Danish`, `Dutch`, `English`, `Lithuanian`, `Lovins`, `Norwegian`, `Porter`, `Portuguese`, `Romanian`, `Russian`, `Serbian`, `Spanish`, `Swedish`, `Turkish`. +deprecated:[8.16.0, `Kp` and `Lovins` support will be removed in a future version] + For example: [source,console] @@ -28,7 +30,7 @@ PUT /my-index-000001 "filter": { "my_snow": { "type": "snowball", - "language": "Lovins" + "language": "English" } } } diff --git a/docs/reference/analysis/tokenfilters/stemmer-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/stemmer-tokenfilter.asciidoc index 42ac594fca3bf..4cd088935af19 100644 --- a/docs/reference/analysis/tokenfilters/stemmer-tokenfilter.asciidoc +++ b/docs/reference/analysis/tokenfilters/stemmer-tokenfilter.asciidoc @@ -144,12 +144,12 @@ https://snowballstem.org/algorithms/danish/stemmer.html[*`danish`*] Dutch:: https://snowballstem.org/algorithms/dutch/stemmer.html[*`dutch`*], -https://snowballstem.org/algorithms/kraaij_pohlmann/stemmer.html[`dutch_kp`] +https://snowballstem.org/algorithms/kraaij_pohlmann/stemmer.html[`dutch_kp`] deprecated:[8.16.0, `dutch_kp` will be removed in a future version] English:: https://snowballstem.org/algorithms/porter/stemmer.html[*`english`*], https://ciir.cs.umass.edu/pubfiles/ir-35.pdf[`light_english`], -https://snowballstem.org/algorithms/lovins/stemmer.html[`lovins`], +https://snowballstem.org/algorithms/lovins/stemmer.html[`lovins`] deprecated:[8.16.0, `lovins` will be removed in a future version], https://www.researchgate.net/publication/220433848_How_effective_is_suffixing[`minimal_english`], https://snowballstem.org/algorithms/english/stemmer.html[`porter2`], {lucene-analysis-docs}/en/EnglishPossessiveFilter.html[`possessive_english`] diff --git a/docs/reference/cluster/nodes-stats.asciidoc b/docs/reference/cluster/nodes-stats.asciidoc index 61c58cea95b83..adf8229712ecc 100644 --- a/docs/reference/cluster/nodes-stats.asciidoc +++ b/docs/reference/cluster/nodes-stats.asciidoc @@ -1716,6 +1716,10 @@ See <> for more information about disk watermarks a `io_stats` (Linux only):: (objects) Contains I/O statistics for the node. + +NOTE: These statistics are derived from the `/proc/diskstats` kernel interface. +This interface accounts for IO performed by all processes on the system, even +if you are running {es} within a container. + .Properties of `io_stats` [%collapsible%open] diff --git a/docs/reference/cluster/stats.asciidoc b/docs/reference/cluster/stats.asciidoc index 575a6457804a6..8e4f630ef7da4 100644 --- a/docs/reference/cluster/stats.asciidoc +++ b/docs/reference/cluster/stats.asciidoc @@ -40,6 +40,10 @@ If a node does not respond before its timeout expires, the response does not inc However, timed out nodes are included in the response's `_nodes.failed` property. Defaults to no timeout. +`include_remotes`:: +(Optional, Boolean) If `true`, includes remote cluster information in the response. +Defaults to `false`, so no remote cluster information is returned. + [role="child_attributes"] [[cluster-stats-api-response-body]] ==== {api-response-body-title} @@ -183,12 +187,11 @@ This number is based on documents in Lucene segments and may include documents f This number is based on documents in Lucene segments. {es} reclaims the disk space of deleted Lucene documents when a segment is merged. `total_size_in_bytes`:: -(integer) -Total size in bytes across all primary shards assigned to selected nodes. +(integer) Total size in bytes across all primary shards assigned to selected nodes. `total_size`:: -(string) -Total size across all primary shards assigned to selected nodes, as a human-readable string. +(string) Total size across all primary shards assigned to selected nodes, as a human-readable string. + ===== `store`:: @@ -1285,8 +1288,7 @@ They are included here for expert users, but should otherwise be ignored. ==== `repositories`:: -(object) Contains statistics about the <> repositories defined in the cluster, broken down -by repository type. +(object) Contains statistics about the <> repositories defined in the cluster, broken down by repository type. + .Properties of `repositories` [%collapsible%open] @@ -1314,13 +1316,74 @@ Each repository type may also include other statistics about the repositories of [%collapsible%open] ===== +`clusters`::: +(object) Contains remote cluster settings and metrics collected from them. +The keys are cluster names, and the values are per-cluster data. +Only present if `include_remotes` option is set to `true`. + ++ +.Properties of `clusters` +[%collapsible%open] +====== + +`cluster_uuid`::: +(string) The UUID of the remote cluster. + +`mode`::: +(string) The <> used to communicate with the remote cluster. + +`skip_unavailable`::: +(Boolean) The `skip_unavailable` <> used for this remote cluster. + +`transport.compress`::: +(string) Transport compression setting used for this remote cluster. + +`version`::: +(array of strings) The list of {es} versions used by the nodes on the remote cluster. + +`status`::: +include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=cluster-health-status] ++ +See <>. + +`nodes_count`::: +(integer) The total count of nodes in the remote cluster. + +`shards_count`::: +(integer) The total number of shards in the remote cluster. + +`indices_count`::: +(integer) The total number of indices in the remote cluster. + +`indices_total_size_in_bytes`::: +(integer) Total data set size, in bytes, of all shards assigned to selected nodes. + +`indices_total_size`::: +(string) Total data set size, in bytes, of all shards assigned to selected nodes, as a human-readable string. + +`max_heap_in_bytes`::: +(integer) Maximum amount of memory, in bytes, available for use by the heap across the nodes of the remote cluster. + +`max_heap`::: +(string) Maximum amount of memory, in bytes, available for use by the heap across the nodes of the remote cluster, +as a human-readable string. + +`mem_total_in_bytes`::: +(integer) Total amount, in bytes, of physical memory across the nodes of the remote cluster. + +`mem_total`::: +(string) Total amount, in bytes, of physical memory across the nodes of the remote cluster, as a human-readable string. + +====== + `_search`::: -(object) Contains the telemetry information about the <> usage in the cluster. +(object) Contains the information about the <> usage in the cluster. + .Properties of `_search` [%collapsible%open] ====== + `total`::: (integer) The total number of {ccs} requests that have been executed by the cluster. @@ -1336,6 +1399,7 @@ Each repository type may also include other statistics about the repositories of .Properties of `took` [%collapsible%open] ======= + `max`::: (integer) The maximum time taken to execute a {ccs} request, in milliseconds. @@ -1344,6 +1408,7 @@ Each repository type may also include other statistics about the repositories of `p90`::: (integer) The 90th percentile of the time taken to execute {ccs} requests, in milliseconds. + ======= `took_mrt_true`:: @@ -1361,6 +1426,7 @@ Each repository type may also include other statistics about the repositories of `p90`::: (integer) The 90th percentile of the time taken to execute {ccs} requests, in milliseconds. + ======= `took_mrt_false`:: @@ -1378,6 +1444,7 @@ Each repository type may also include other statistics about the repositories of `p90`::: (integer) The 90th percentile of the time taken to execute {ccs} requests, in milliseconds. + ======= `remotes_per_search_max`:: @@ -1391,9 +1458,10 @@ Each repository type may also include other statistics about the repositories of The keys are the failure reason names and the values are the number of requests that failed for that reason. `features`:: -(object) Contains statistics about the features used in {ccs} requests. The keys are the names of the search feature, -and the values are the number of requests that used that feature. Single request can use more than one feature -(e.g. both `async` and `wildcard`). Known features are: +(object) Contains statistics about the features used in {ccs} requests. +The keys are the names of the search feature, and the values are the number of requests that used that feature. +Single request can use more than one feature (e.g. both `async` and `wildcard`). +Known features are: * `async` - <> @@ -1427,6 +1495,7 @@ This may include requests where partial results were returned, but not requests .Properties of `took` [%collapsible%open] ======== + `max`::: (integer) The maximum time taken to execute a {ccs} request, in milliseconds. @@ -1435,6 +1504,7 @@ This may include requests where partial results were returned, but not requests `p90`::: (integer) The 90th percentile of the time taken to execute {ccs} requests, in milliseconds. + ======== ======= @@ -1812,3 +1882,37 @@ This API can be restricted to a subset of the nodes using <>. + [[cancel-connector-sync-job-api-request]] ==== {api-request-title} @@ -17,7 +18,7 @@ To get started with Connector APIs, check out the {enterprise-search-ref}/connec [[cancel-connector-sync-job-api-prereqs]] ==== {api-prereq-title} -* To sync data using self-managed connectors, you need to deploy the {enterprise-search-ref}/build-connector.html[Elastic connector service] on your own infrastructure. This service runs automatically on Elastic Cloud for native connectors. +* To sync data using self-managed connectors, you need to deploy the <>. on your own infrastructure. This service runs automatically on Elastic Cloud for Elastic managed connectors. * The `connector_sync_job_id` parameter should reference an existing connector sync job. [[cancel-connector-sync-job-api-desc]] diff --git a/docs/reference/connector/apis/check-in-connector-api.asciidoc b/docs/reference/connector/apis/check-in-connector-api.asciidoc index 8c6b5161a3a72..15e65b10074d8 100644 --- a/docs/reference/connector/apis/check-in-connector-api.asciidoc +++ b/docs/reference/connector/apis/check-in-connector-api.asciidoc @@ -8,7 +8,8 @@ preview::[] Updates the `last_seen` field of a connector with current timestamp. -To get started with Connector APIs, check out the {enterprise-search-ref}/connectors-tutorial-api.html[tutorial^]. +To get started with Connector APIs, check out <>. + [[check-in-connector-api-request]] ==== {api-request-title} @@ -18,7 +19,7 @@ To get started with Connector APIs, check out the {enterprise-search-ref}/connec [[check-in-connector-api-prereq]] ==== {api-prereq-title} -* To sync data using self-managed connectors, you need to deploy the {enterprise-search-ref}/build-connector.html[Elastic connector service] on your own infrastructure. This service runs automatically on Elastic Cloud for native connectors. +* To sync data using self-managed connectors, you need to deploy the <>. on your own infrastructure. This service runs automatically on Elastic Cloud for Elastic managed connectors. * The `connector_id` parameter should reference an existing connector. [[check-in-connector-api-path-params]] diff --git a/docs/reference/connector/apis/check-in-connector-sync-job-api.asciidoc b/docs/reference/connector/apis/check-in-connector-sync-job-api.asciidoc index a052fbb2418cc..8d7d0a36ad88a 100644 --- a/docs/reference/connector/apis/check-in-connector-sync-job-api.asciidoc +++ b/docs/reference/connector/apis/check-in-connector-sync-job-api.asciidoc @@ -8,7 +8,8 @@ preview::[] Checks in a connector sync job (updates `last_seen` to the current time). -To get started with Connector APIs, check out the {enterprise-search-ref}/connectors-tutorial-api.html[tutorial^]. +To get started with Connector APIs, check out <>. + [[check-in-connector-sync-job-api-request]] ==== {api-request-title} @@ -17,7 +18,7 @@ To get started with Connector APIs, check out the {enterprise-search-ref}/connec [[check-in-connector-sync-job-api-prereqs]] ==== {api-prereq-title} -* To sync data using self-managed connectors, you need to deploy the {enterprise-search-ref}/build-connector.html[Elastic connector service] on your own infrastructure. This service runs automatically on Elastic Cloud for native connectors. +* To sync data using self-managed connectors, you need to deploy the <>. on your own infrastructure. This service runs automatically on Elastic Cloud for Elastic managed connectors. * The `connector_sync_job_id` parameter should reference an existing connector sync job. [[check-in-connector-sync-job-api-desc]] diff --git a/docs/reference/connector/apis/claim-connector-sync-job-api.asciidoc b/docs/reference/connector/apis/claim-connector-sync-job-api.asciidoc index 2fb28f9e9fb37..62491582ce757 100644 --- a/docs/reference/connector/apis/claim-connector-sync-job-api.asciidoc +++ b/docs/reference/connector/apis/claim-connector-sync-job-api.asciidoc @@ -10,7 +10,8 @@ Claims a connector sync job. The `_claim` endpoint is not intended for direct connector management by users. It is there to support the implementation of services that utilize the https://github.com/elastic/connectors/blob/main/docs/CONNECTOR_PROTOCOL.md[Connector Protocol] to communicate with {es}. -To get started with Connector APIs, check out the {enterprise-search-ref}/connectors-tutorial-api.html[tutorial^]. +To get started with Connector APIs, check out <>. + [[claim-connector-sync-job-api-request]] ==== {api-request-title} @@ -19,7 +20,7 @@ To get started with Connector APIs, check out the {enterprise-search-ref}/connec [[claim-connector-sync-job-api-prereqs]] ==== {api-prereq-title} -* To sync data using self-managed connectors, you need to deploy the {enterprise-search-ref}/build-connector.html[Elastic connector service] on your own infrastructure. This service runs automatically on Elastic Cloud for native connectors. +* To sync data using self-managed connectors, you need to deploy the <>. on your own infrastructure. This service runs automatically on Elastic Cloud for Elastic managed connectors. * The `connector_sync_job_id` parameter should reference an existing connector sync job. [[claim-connector-sync-job-api-desc]] diff --git a/docs/reference/connector/apis/connector-apis.asciidoc b/docs/reference/connector/apis/connector-apis.asciidoc index 3de4483adcfd1..15ce31a605986 100644 --- a/docs/reference/connector/apis/connector-apis.asciidoc +++ b/docs/reference/connector/apis/connector-apis.asciidoc @@ -3,14 +3,16 @@ beta::[] -The connector and sync jobs APIs provide a convenient way to create and manage Elastic {enterprise-search-ref}/connectors.html[connectors^] and sync jobs in an internal index. To get started with Connector APIs, check out the {enterprise-search-ref}/connectors-tutorial-api.html[tutorial^]. +The connector and sync jobs APIs provide a convenient way to create and manage Elastic <>. + and sync jobs in an internal index. To get started with Connector APIs, check out <>. + Connectors are {es} integrations that bring content from third-party data sources, which can be deployed on {ecloud} or hosted on your own infrastructure: -* *Native connectors* are a managed service on {ecloud} -* *Connector clients* are self-managed on your infrastructure +* *Managed connectors* are a managed service on {ecloud} +* *Self-managed connectors* are self-hosted on your infrastructure -Find a list of all supported service types in the {enterprise-search-ref}/connectors.html[connectors documentation^]. +Find a list of all supported service types in the <>. This API provides an alternative to relying solely on {kib} UI for connector and sync job management. The API comes with a set of validations and assertions to ensure that the state representation in the internal index remains valid. diff --git a/docs/reference/connector/apis/create-connector-api.asciidoc b/docs/reference/connector/apis/create-connector-api.asciidoc index 9bd49a3c5ef94..a115eab8853c0 100644 --- a/docs/reference/connector/apis/create-connector-api.asciidoc +++ b/docs/reference/connector/apis/create-connector-api.asciidoc @@ -9,12 +9,13 @@ beta::[] Creates an Elastic connector. Connectors are {es} integrations that bring content from third-party data sources, which can be deployed on {ecloud} or hosted on your own infrastructure: -* *Native connectors* are a managed service on {ecloud} -* *Connector clients* are self-managed on your infrastructure +* *Managed connectors* are a managed service on {ecloud} +* *Self-managed connectors* are self-hosted on your infrastructure -Find a list of all supported service types in the {enterprise-search-ref}/connectors.html[connectors documentation^]. +Find a list of all supported service types in the <>. + +To get started with Connector APIs, check out <>. -To get started with Connector APIs, check out the {enterprise-search-ref}/connectors-tutorial-api.html[tutorial^]. [source,console] -------------------------------------------------- @@ -43,8 +44,8 @@ DELETE _connector/my-connector [[create-connector-api-prereqs]] ==== {api-prereq-title} -* To sync data using self-managed connectors, you need to deploy the {enterprise-search-ref}/build-connector.html[Elastic connector service] on your own infrastructure. This service runs automatically on Elastic Cloud for native connectors. -* The `service_type` parameter should reference a supported third-party service. See the available service types for {enterprise-search-ref}/native-connectors.html[native] and {enterprise-search-ref}/build-connector.html[self-managed] connectors. This can also reference the service type of your custom connector. +* To sync data using self-managed connectors, you need to deploy the <>. on your own infrastructure. This service runs automatically on Elastic Cloud for Elastic managed connectors. +* The `service_type` parameter should reference a supported third-party service. See the available service types for <> and <> connectors. This can also reference the service type of your custom connector. [[create-connector-api-desc]] @@ -73,13 +74,13 @@ Creates a connector document in the internal index and initializes its configura (Optional, string) The name of the connector. Setting the connector name is recommended when managing connectors in {kib}. `is_native`:: -(Optional, boolean) Indicates if it's a native connector. Defaults to `false`. +(Optional, boolean) Indicates if it's a managed connector. Defaults to `false`. `language`:: (Optional, string) Language analyzer for the data. Limited to supported languages. `service_type`:: -(Optional, string) Connector service type. Can reference Elastic-supported third-party services or a custom connector type. See the available service types for {enterprise-search-ref}/native-connectors.html[native] and {enterprise-search-ref}/build-connector.html[self-managed] connectors. +(Optional, string) Connector service type. Can reference Elastic-supported third-party services or a custom connector type. See the available service types for <> and <> connectors. [role="child_attributes"] diff --git a/docs/reference/connector/apis/create-connector-sync-job-api.asciidoc b/docs/reference/connector/apis/create-connector-sync-job-api.asciidoc index c7cc866930dfb..240ab696954f3 100644 --- a/docs/reference/connector/apis/create-connector-sync-job-api.asciidoc +++ b/docs/reference/connector/apis/create-connector-sync-job-api.asciidoc @@ -9,7 +9,8 @@ beta::[] Creates a connector sync job. -To get started with Connector APIs, check out the {enterprise-search-ref}/connectors-tutorial-api.html[tutorial^]. +To get started with Connector APIs, check out <>. + [source, console] -------------------------------------------------- @@ -31,7 +32,7 @@ POST _connector/_sync_job [[create-connector-sync-job-api-prereqs]] ==== {api-prereq-title} -* To sync data using self-managed connectors, you need to deploy the {enterprise-search-ref}/build-connector.html[Elastic connector service] on your own infrastructure. This service runs automatically on Elastic Cloud for native connectors. +* To sync data using self-managed connectors, you need to deploy the <>. on your own infrastructure. This service runs automatically on Elastic Cloud for Elastic managed connectors. * The `id` parameter should reference an existing connector. [[create-connector-sync-job-api-desc]] diff --git a/docs/reference/connector/apis/delete-connector-api.asciidoc b/docs/reference/connector/apis/delete-connector-api.asciidoc index 23acd1b4755b1..76621d7f1843b 100644 --- a/docs/reference/connector/apis/delete-connector-api.asciidoc +++ b/docs/reference/connector/apis/delete-connector-api.asciidoc @@ -11,7 +11,8 @@ This is a destructive action that is not recoverable. Note: this action doesn't delete any API key, ingest pipeline or data index associated with the connector. These need to be removed manually. -To get started with Connector APIs, check out the {enterprise-search-ref}/connectors-tutorial-api.html[tutorial^]. +To get started with Connector APIs, check out <>. + [[delete-connector-api-request]] ==== {api-request-title} @@ -21,7 +22,7 @@ To get started with Connector APIs, check out the {enterprise-search-ref}/connec [[delete-connector-api-prereq]] ==== {api-prereq-title} -* To sync data using self-managed connectors, you need to deploy the {enterprise-search-ref}/build-connector.html[Elastic connector service] on your own infrastructure. This service runs automatically on Elastic Cloud for native connectors. +* To sync data using self-managed connectors, you need to deploy the <>. on your own infrastructure. This service runs automatically on Elastic Cloud for Elastic managed connectors. * The `connector_id` parameter should reference an existing connector. [[delete-connector-api-path-params]] diff --git a/docs/reference/connector/apis/delete-connector-sync-job-api.asciidoc b/docs/reference/connector/apis/delete-connector-sync-job-api.asciidoc index 7cdabb22f05ee..eeea40f430abd 100644 --- a/docs/reference/connector/apis/delete-connector-sync-job-api.asciidoc +++ b/docs/reference/connector/apis/delete-connector-sync-job-api.asciidoc @@ -9,7 +9,8 @@ beta::[] Removes a connector sync job and its associated data. This is a destructive action that is not recoverable. -To get started with Connector APIs, check out the {enterprise-search-ref}/connectors-tutorial-api.html[tutorial^]. +To get started with Connector APIs, check out <>. + [[delete-connector-sync-job-api-request]] ==== {api-request-title} @@ -19,7 +20,7 @@ To get started with Connector APIs, check out the {enterprise-search-ref}/connec [[delete-connector-sync-job-api-prereq]] ==== {api-prereq-title} -* To sync data using self-managed connectors, you need to deploy the {enterprise-search-ref}/build-connector.html[Elastic connector service] on your own infrastructure. This service runs automatically on Elastic Cloud for native connectors. +* To sync data using self-managed connectors, you need to deploy the <>. on your own infrastructure. This service runs automatically on Elastic Cloud for Elastic managed connectors. [[delete-connector-sync-job-api-path-params]] ==== {api-path-parms-title} diff --git a/docs/reference/connector/apis/get-connector-api.asciidoc b/docs/reference/connector/apis/get-connector-api.asciidoc index 4df792c8a0a1a..302773e0af831 100644 --- a/docs/reference/connector/apis/get-connector-api.asciidoc +++ b/docs/reference/connector/apis/get-connector-api.asciidoc @@ -8,7 +8,8 @@ beta::[] Retrieves the details about a connector. -To get started with Connector APIs, check out the {enterprise-search-ref}/connectors-tutorial-api.html[tutorial^]. +To get started with Connector APIs, check out <>. + [[get-connector-api-request]] ==== {api-request-title} @@ -18,7 +19,7 @@ To get started with Connector APIs, check out the {enterprise-search-ref}/connec [[get-connector-api-prereq]] ==== {api-prereq-title} -* To sync data using self-managed connectors, you need to deploy the {enterprise-search-ref}/build-connector.html[Elastic connector service] on your own infrastructure. This service runs automatically on Elastic Cloud for native connectors. +* To sync data using self-managed connectors, you need to deploy the <>. on your own infrastructure. This service runs automatically on Elastic Cloud for Elastic managed connectors. [[get-connector-api-path-params]] ==== {api-path-parms-title} diff --git a/docs/reference/connector/apis/get-connector-sync-job-api.asciidoc b/docs/reference/connector/apis/get-connector-sync-job-api.asciidoc index fffdada2a2a82..a524c1291c26a 100644 --- a/docs/reference/connector/apis/get-connector-sync-job-api.asciidoc +++ b/docs/reference/connector/apis/get-connector-sync-job-api.asciidoc @@ -8,7 +8,8 @@ beta::[] Retrieves the details about a connector sync job. -To get started with Connector APIs, check out the {enterprise-search-ref}/connectors-tutorial-api.html[tutorial^]. +To get started with Connector APIs, check out <>. + [[get-connector-sync-job-api-request]] ==== {api-request-title} @@ -18,7 +19,7 @@ To get started with Connector APIs, check out the {enterprise-search-ref}/connec [[get-connector-sync-job-api-prereq]] ==== {api-prereq-title} -* To sync data using self-managed connectors, you need to deploy the {enterprise-search-ref}/build-connector.html[Elastic connector service] on your own infrastructure. This service runs automatically on Elastic Cloud for native connectors. +* To sync data using self-managed connectors, you need to deploy the <>. on your own infrastructure. This service runs automatically on Elastic Cloud for Elastic managed connectors. [[get-connector-sync-job-api-path-params]] ==== {api-path-parms-title} diff --git a/docs/reference/connector/apis/list-connector-sync-jobs-api.asciidoc b/docs/reference/connector/apis/list-connector-sync-jobs-api.asciidoc index 730dad852adee..4a4fa5a22dcc1 100644 --- a/docs/reference/connector/apis/list-connector-sync-jobs-api.asciidoc +++ b/docs/reference/connector/apis/list-connector-sync-jobs-api.asciidoc @@ -9,7 +9,8 @@ beta::[] Returns information about all stored connector sync jobs ordered by their creation date in ascending order. -To get started with Connector APIs, check out the {enterprise-search-ref}/connectors-tutorial-api.html[tutorial^]. +To get started with Connector APIs, check out <>. + [[list-connector-sync-jobs-api-request]] ==== {api-request-title} @@ -19,7 +20,7 @@ To get started with Connector APIs, check out the {enterprise-search-ref}/connec [[list-connector-sync-jobs-api-prereq]] ==== {api-prereq-title} -* To sync data using self-managed connectors, you need to deploy the {enterprise-search-ref}/build-connector.html[Elastic connector service] on your own infrastructure. This service runs automatically on Elastic Cloud for native connectors. +* To sync data using self-managed connectors, you need to deploy the <>. on your own infrastructure. This service runs automatically on Elastic Cloud for Elastic managed connectors. [[list-connector-sync-jobs-api-path-params]] ==== {api-path-parms-title} diff --git a/docs/reference/connector/apis/list-connectors-api.asciidoc b/docs/reference/connector/apis/list-connectors-api.asciidoc index c7ea2afd8102f..4a93ecf2b0109 100644 --- a/docs/reference/connector/apis/list-connectors-api.asciidoc +++ b/docs/reference/connector/apis/list-connectors-api.asciidoc @@ -9,7 +9,8 @@ beta::[] Returns information about all created connectors. -To get started with Connector APIs, check out the {enterprise-search-ref}/connectors-tutorial-api.html[tutorial^]. +To get started with Connector APIs, check out <>. + [[list-connector-api-request]] @@ -20,7 +21,7 @@ To get started with Connector APIs, check out the {enterprise-search-ref}/connec [[list-connector-api-prereq]] ==== {api-prereq-title} -* To sync data using self-managed connectors, you need to deploy the {enterprise-search-ref}/build-connector.html[Elastic connector service] on your own infrastructure. This service runs automatically on Elastic Cloud for native connectors. +* To sync data using self-managed connectors, you need to deploy the <>. on your own infrastructure. This service runs automatically on Elastic Cloud for Elastic managed connectors. [[list-connector-api-path-params]] ==== {api-path-parms-title} diff --git a/docs/reference/connector/apis/set-connector-sync-job-error-api.asciidoc b/docs/reference/connector/apis/set-connector-sync-job-error-api.asciidoc index 42203ed8e6103..e6ad9e8cc93db 100644 --- a/docs/reference/connector/apis/set-connector-sync-job-error-api.asciidoc +++ b/docs/reference/connector/apis/set-connector-sync-job-error-api.asciidoc @@ -8,7 +8,8 @@ preview::[] Sets a connector sync job error. -To get started with Connector APIs, check out the {enterprise-search-ref}/connectors-tutorial-api.html[tutorial^]. +To get started with Connector APIs, check out <>. + [[set-connector-sync-job-error-api-request]] ==== {api-request-title} @@ -17,7 +18,7 @@ To get started with Connector APIs, check out the {enterprise-search-ref}/connec [[set-connector-sync-job-error-api-prereqs]] ==== {api-prereq-title} -* To sync data using self-managed connectors, you need to deploy the {enterprise-search-ref}/build-connector.html[Elastic connector service] on your own infrastructure. This service runs automatically on Elastic Cloud for native connectors. +* To sync data using self-managed connectors, you need to deploy the <>. on your own infrastructure. This service runs automatically on Elastic Cloud for Elastic managed connectors. * The `connector_sync_job_id` parameter should reference an existing connector sync job. [[set-connector-sync-job-error-api-desc]] diff --git a/docs/reference/connector/apis/set-connector-sync-job-stats-api.asciidoc b/docs/reference/connector/apis/set-connector-sync-job-stats-api.asciidoc index 1427269d22b86..7e22f657ba6b6 100644 --- a/docs/reference/connector/apis/set-connector-sync-job-stats-api.asciidoc +++ b/docs/reference/connector/apis/set-connector-sync-job-stats-api.asciidoc @@ -8,7 +8,8 @@ preview::[] Sets connector sync job stats. -To get started with Connector APIs, check out the {enterprise-search-ref}/connectors-tutorial-api.html[tutorial^]. +To get started with Connector APIs, check out <>. + [[set-connector-sync-job-stats-api-request]] ==== {api-request-title} @@ -17,7 +18,7 @@ To get started with Connector APIs, check out the {enterprise-search-ref}/connec [[set-connector-sync-job-stats-api-prereqs]] ==== {api-prereq-title} -* To sync data using self-managed connectors, you need to deploy the {enterprise-search-ref}/build-connector.html[Elastic connector service] on your own infrastructure. This service runs automatically on Elastic Cloud for native connectors. +* To sync data using self-managed connectors, you need to deploy the <>. on your own infrastructure. This service runs automatically on Elastic Cloud for Elastic managed connectors. * The `connector_sync_job_id` parameter should reference an existing connector sync job. [[set-connector-sync-job-stats-api-desc]] diff --git a/docs/reference/connector/apis/update-connector-api-key-id-api.asciidoc b/docs/reference/connector/apis/update-connector-api-key-id-api.asciidoc index 112ec821df7c9..fbd3f887758f2 100644 --- a/docs/reference/connector/apis/update-connector-api-key-id-api.asciidoc +++ b/docs/reference/connector/apis/update-connector-api-key-id-api.asciidoc @@ -11,11 +11,12 @@ Updates the `api_key_id` and/or `api_key_secret_id` field(s) of a connector, spe . The ID of the API key used for authorization . The ID of the Connector Secret where the API key is stored -The Connector Secret ID is only required for native connectors. -Connector clients do not use this field. -See the documentation for {enterprise-search-ref}/native-connectors.html#native-connectors-manage-API-keys-programmatically[managing native connector API keys programmatically^] for more details. +The Connector Secret ID is only required for Elastic managed connectors. +Self-managed connectors do not use this field. +See the documentation for <> for more details. + +To get started with Connector APIs, check out <>. -To get started with Connector APIs, check out the {enterprise-search-ref}/connectors-tutorial-api.html[tutorial^]. [[update-connector-api-key-id-api-request]] ==== {api-request-title} @@ -25,7 +26,7 @@ To get started with Connector APIs, check out the {enterprise-search-ref}/connec [[update-connector-api-key-id-api-prereq]] ==== {api-prereq-title} -* To sync data using self-managed connectors, you need to deploy the {enterprise-search-ref}/build-connector.html[Elastic connector service] on your own infrastructure. This service runs automatically on Elastic Cloud for native connectors. +* To sync data using self-managed connectors, you need to deploy the <>. on your own infrastructure. This service runs automatically on Elastic Cloud for Elastic managed connectors. * The `connector_id` parameter should reference an existing connector. * The `api_key_id` parameter should reference an existing API key. * The `api_key_secret_id` parameter should reference an existing Connector Secret containing an encoded API key value. @@ -44,7 +45,7 @@ To get started with Connector APIs, check out the {enterprise-search-ref}/connec (Optional, string) ID of the API key that the connector will use to authorize access to required indices. Each connector can be associated with at most one API key. `api_key_secret_id`:: -(Optional, string) ID of the Connector Secret that contains the encoded API key. This should be the same API key as `api_key_id` references. This is only required for native connectors. +(Optional, string) ID of the Connector Secret that contains the encoded API key. This should be the same API key as `api_key_id` references. This is only required for Elastic managed connectors. [[update-connector-api-key-id-api-response-codes]] ==== {api-response-codes-title} diff --git a/docs/reference/connector/apis/update-connector-configuration-api.asciidoc b/docs/reference/connector/apis/update-connector-configuration-api.asciidoc index e8a710cdacff0..4b25f9e71ae4b 100644 --- a/docs/reference/connector/apis/update-connector-configuration-api.asciidoc +++ b/docs/reference/connector/apis/update-connector-configuration-api.asciidoc @@ -8,7 +8,8 @@ beta::[] Updates a connector's `configuration`, allowing for config value updates within a registered configuration schema. -To get started with Connector APIs, check out the {enterprise-search-ref}/connectors-tutorial-api.html[tutorial^]. +To get started with Connector APIs, check out <>. + [[update-connector-configuration-api-request]] ==== {api-request-title} @@ -18,10 +19,10 @@ To get started with Connector APIs, check out the {enterprise-search-ref}/connec [[update-connector-configuration-api-prereq]] ==== {api-prereq-title} -* To sync data using self-managed connectors, you need to deploy the {enterprise-search-ref}/build-connector.html[Elastic connector service] on your own infrastructure. This service runs automatically on Elastic Cloud for native connectors. +* To sync data using self-managed connectors, you need to deploy the <>. on your own infrastructure. This service runs automatically on Elastic Cloud for Elastic managed connectors. * The `connector_id` parameter should reference an existing connector. * To update configuration `values`, the connector `configuration` schema must be first registered by a running instance of Elastic connector service. -* Make sure configuration fields are compatible with the configuration schema for the third-party data source. Refer to the individual {enterprise-search-ref}/connectors-references.html[connectors references] for details. +* Make sure configuration fields are compatible with the configuration schema for the third-party data source. Refer to the individual <> for details. [[update-connector-configuration-api-path-params]] ==== {api-path-parms-title} @@ -55,7 +56,7 @@ No connector matching `connector_id` could be found. [[update-connector-configuration-api-example]] ==== {api-examples-title} -The following example configures a `sharepoint_online` connector. Find the supported configuration options in the {enterprise-search-ref}/connectors-sharepoint-online.html[Sharepoint Online connector documentation] or by inspecting the schema in the connector's `configuration` field using the <>. +The following example configures a `sharepoint_online` connector. Find the supported configuration options in the <>, or by inspecting the schema in the connector's `configuration` field using the <>. //// [source, console] diff --git a/docs/reference/connector/apis/update-connector-error-api.asciidoc b/docs/reference/connector/apis/update-connector-error-api.asciidoc index c6ac0c9a1ac22..29358b243041a 100644 --- a/docs/reference/connector/apis/update-connector-error-api.asciidoc +++ b/docs/reference/connector/apis/update-connector-error-api.asciidoc @@ -8,7 +8,8 @@ preview::[] Updates the `error` field of a connector. -To get started with Connector APIs, check out the {enterprise-search-ref}/connectors-tutorial-api.html[tutorial^]. +To get started with Connector APIs, check out <>. + [[update-connector-error-api-request]] ==== {api-request-title} @@ -18,7 +19,7 @@ To get started with Connector APIs, check out the {enterprise-search-ref}/connec [[update-connector-error-api-prereq]] ==== {api-prereq-title} -* To sync data using self-managed connectors, you need to deploy the {enterprise-search-ref}/build-connector.html[Elastic connector service] on your own infrastructure. This service runs automatically on Elastic Cloud for native connectors. +* To sync data using self-managed connectors, you need to deploy the <>. on your own infrastructure. This service runs automatically on Elastic Cloud for Elastic managed connectors. * The `connector_id` parameter should reference an existing connector. [[update-connector-error-api-desc]] diff --git a/docs/reference/connector/apis/update-connector-features-api.asciidoc b/docs/reference/connector/apis/update-connector-features-api.asciidoc index 0d3457b9bd584..77571fcd7d5a0 100644 --- a/docs/reference/connector/apis/update-connector-features-api.asciidoc +++ b/docs/reference/connector/apis/update-connector-features-api.asciidoc @@ -15,7 +15,8 @@ Manages the `features` of a connector. This endpoint can be used to control the Normally, the running connector service automatically manages these features. However, you can use this API to override the default behavior. -To get started with Connector APIs, check out the {enterprise-search-ref}/connectors-tutorial-api.html[tutorial^]. +To get started with Connector APIs, check out <>. + [[update-connector-features-api-request]] ==== {api-request-title} @@ -25,7 +26,7 @@ To get started with Connector APIs, check out the {enterprise-search-ref}/connec [[update-connector-features-api-prereq]] ==== {api-prereq-title} -* To sync data using self-managed connectors, you need to deploy the {enterprise-search-ref}/build-connector.html[Elastic connector service] on your own infrastructure. This service runs automatically on Elastic Cloud for native connectors. +* To sync data using self-managed connectors, you need to deploy the <>. on your own infrastructure. This service runs automatically on Elastic Cloud for Elastic managed connectors. * The `connector_id` parameter should reference an existing connector. [[update-connector-features-api-path-params]] @@ -43,7 +44,7 @@ To get started with Connector APIs, check out the {enterprise-search-ref}/connec * `document_level_security` (Optional, object) Controls whether document-level security is enabled with the `enabled` flag. * `incremental_sync` (Optional, object) Controls whether incremental syncs are enabled with the `enabled` flag. -* `native_connector_api_keys`(Optional, object) Controls whether native connector API keys are enabled with the `enabled` flag. +* `native_connector_api_keys`(Optional, object) Controls whether managed connector API keys are enabled with the `enabled` flag. * `sync_rules` (Optional, object) Controls sync rules. ** `advanced` (Optional, object) Controls whether advanced sync rules are enabled with the `enabled` flag. ** `basic`(Optional, object) Controls whether basic sync rules are enabled with the `enabled` flag. diff --git a/docs/reference/connector/apis/update-connector-filtering-api.asciidoc b/docs/reference/connector/apis/update-connector-filtering-api.asciidoc index 861e72481a59a..4820fa151901d 100644 --- a/docs/reference/connector/apis/update-connector-filtering-api.asciidoc +++ b/docs/reference/connector/apis/update-connector-filtering-api.asciidoc @@ -8,9 +8,10 @@ beta::[] Updates the draft `filtering` configuration of a connector and marks the draft validation state as `edited`. The filtering draft is activated once validated by the running Elastic connector service. -The filtering property is used to configure sync rules (both basic and advanced) for a connector. Learn more in the {enterprise-search-ref}/sync-rules.html[sync rules documentation]. +The filtering property is used to configure sync rules (both basic and advanced) for a connector. Learn more in the <>. + +To get started with Connector APIs, check out <>. -To get started with Connector APIs, check out the {enterprise-search-ref}/connectors-tutorial-api.html[tutorial^]. [[update-connector-filtering-api-request]] ==== {api-request-title} @@ -20,7 +21,7 @@ To get started with Connector APIs, check out the {enterprise-search-ref}/connec [[update-connector-filtering-api-prereq]] ==== {api-prereq-title} -* To sync data using self-managed connectors, you need to deploy the {enterprise-search-ref}/build-connector.html[Elastic connector service] on your own infrastructure. This service runs automatically on Elastic Cloud for native connectors. +* To sync data using self-managed connectors, you need to deploy the <>. on your own infrastructure. This service runs automatically on Elastic Cloud for Elastic managed connectors. * The `connector_id` parameter should reference an existing connector. * Filtering draft is activated once validated by the running Elastic connector service, the `draft.validation.state` must be `valid`. * If, after a validation attempt, the `draft.validation.state` equals to `invalid`, inspect `draft.validation.errors` and fix any issues. @@ -37,7 +38,7 @@ To get started with Connector APIs, check out the {enterprise-search-ref}/connec `rules`:: (Optional, array of objects) -An array of {enterprise-search-ref}/sync-rules.html#sync-rules-basic[basic sync rules], each with the following sub-attributes: +An array of <>, each with the following sub-attributes: * `id` (Required, string) + A unique identifier for the rule. * `policy` (Required, string) + @@ -57,7 +58,7 @@ The timestamp when the rule was last edited. Defaults to `now` UTC timestamp. `advanced_snippet`:: (Optional, object) -Used for {enterprise-search-ref}/sync-rules.html#sync-rules-advanced[advanced filtering] at query time, with the following sub-attributes: +Used for <> at query time, with the following sub-attributes: * `value` (Required, object or array) + A JSON object/array passed directly to the connector for advanced filtering. * `created_at` (Optional, datetime) + @@ -81,7 +82,7 @@ No connector matching `connector_id` could be found. [[update-connector-filtering-api-example]] ==== {api-examples-title} -The following example updates the draft {enterprise-search-ref}/sync-rules.html#sync-rules-basic[basic sync rules] for a Google Drive connector with ID `my-g-drive-connector`. All Google Drive files with `.txt` extension will be skipped: +The following example updates the draft <> for a Google Drive connector with ID `my-g-drive-connector`. All Google Drive files with `.txt` extension will be skipped: //// [source, console] @@ -143,7 +144,7 @@ PUT _connector/my-g-drive-connector/_filtering } ---- -The following example updates the draft advanced sync rules for a MySQL connector with id `my-sql-connector`. Advanced sync rules are specific to each connector type. Refer to the references for connectors that support {enterprise-search-ref}/sync-rules.html#sync-rules-advanced[advanced sync rules] for syntax and examples. +The following example updates the draft advanced sync rules for a MySQL connector with id `my-sql-connector`. Advanced sync rules are specific to each connector type. Refer to the references for connectors that support <> for syntax and examples. [source,console] ---- diff --git a/docs/reference/connector/apis/update-connector-index-name-api.asciidoc b/docs/reference/connector/apis/update-connector-index-name-api.asciidoc index d07007438e09c..6222baf6a6caf 100644 --- a/docs/reference/connector/apis/update-connector-index-name-api.asciidoc +++ b/docs/reference/connector/apis/update-connector-index-name-api.asciidoc @@ -8,7 +8,8 @@ beta::[] Updates the `index_name` field of a connector, specifying the index where the data ingested by the connector is stored. -To get started with Connector APIs, check out the {enterprise-search-ref}/connectors-tutorial-api.html[tutorial^]. +To get started with Connector APIs, check out <>. + [[update-connector-index-name-api-request]] ==== {api-request-title} @@ -18,7 +19,7 @@ To get started with Connector APIs, check out the {enterprise-search-ref}/connec [[update-connector-index-name-api-prereq]] ==== {api-prereq-title} -* To sync data using self-managed connectors, you need to deploy the {enterprise-search-ref}/build-connector.html[Elastic connector service] on your own infrastructure. This service runs automatically on Elastic Cloud for native connectors. +* To sync data using self-managed connectors, you need to deploy the <>. on your own infrastructure. This service runs automatically on Elastic Cloud for Elastic managed connectors. * The `connector_id` parameter should reference an existing connector. [[update-connector-index-name-api-path-params]] diff --git a/docs/reference/connector/apis/update-connector-last-sync-api.asciidoc b/docs/reference/connector/apis/update-connector-last-sync-api.asciidoc index 918bf4f80a010..17f892d852f4a 100644 --- a/docs/reference/connector/apis/update-connector-last-sync-api.asciidoc +++ b/docs/reference/connector/apis/update-connector-last-sync-api.asciidoc @@ -10,7 +10,8 @@ Updates the fields related to the last sync of a connector. This action is used for analytics and monitoring. -To get started with Connector APIs, check out the {enterprise-search-ref}/connectors-tutorial-api.html[tutorial^]. +To get started with Connector APIs, check out <>. + [[update-connector-last-sync-api-request]] ==== {api-request-title} @@ -20,7 +21,7 @@ To get started with Connector APIs, check out the {enterprise-search-ref}/connec [[update-connector-last-sync-api-prereq]] ==== {api-prereq-title} -* To sync data using self-managed connectors, you need to deploy the {enterprise-search-ref}/build-connector.html[Elastic connector service] on your own infrastructure. This service runs automatically on Elastic Cloud for native connectors. +* To sync data using self-managed connectors, you need to deploy the <>. on your own infrastructure. This service runs automatically on Elastic Cloud for Elastic managed connectors. * The `connector_id` parameter should reference an existing connector. [[update-connector-last-sync-api-path-params]] diff --git a/docs/reference/connector/apis/update-connector-name-description-api.asciidoc b/docs/reference/connector/apis/update-connector-name-description-api.asciidoc index 7e16874da9fb4..384cec2c73e24 100644 --- a/docs/reference/connector/apis/update-connector-name-description-api.asciidoc +++ b/docs/reference/connector/apis/update-connector-name-description-api.asciidoc @@ -9,7 +9,8 @@ beta::[] Updates the `name` and `description` fields of a connector. -To get started with Connector APIs, check out the {enterprise-search-ref}/connectors-tutorial-api.html[tutorial^]. +To get started with Connector APIs, check out <>. + [[update-connector-name-description-api-request]] ==== {api-request-title} @@ -19,7 +20,7 @@ To get started with Connector APIs, check out the {enterprise-search-ref}/connec [[update-connector-name-description-api-prereq]] ==== {api-prereq-title} -* To sync data using self-managed connectors, you need to deploy the {enterprise-search-ref}/build-connector.html[Elastic connector service] on your own infrastructure. This service runs automatically on Elastic Cloud for native connectors. +* To sync data using self-managed connectors, you need to deploy the <>. on your own infrastructure. This service runs automatically on Elastic Cloud for Elastic managed connectors. * The `connector_id` parameter should reference an existing connector. [[update-connector-name-description-api-path-params]] diff --git a/docs/reference/connector/apis/update-connector-pipeline-api.asciidoc b/docs/reference/connector/apis/update-connector-pipeline-api.asciidoc index 01ed2e39702ea..e54b01ec47d01 100644 --- a/docs/reference/connector/apis/update-connector-pipeline-api.asciidoc +++ b/docs/reference/connector/apis/update-connector-pipeline-api.asciidoc @@ -10,7 +10,8 @@ Updates the `pipeline` configuration of a connector. When you create a new connector, the configuration of an <> is populated with default settings. -To get started with Connector APIs, check out the {enterprise-search-ref}/connectors-tutorial-api.html[tutorial^]. +To get started with Connector APIs, check out <>. + [[update-connector-pipeline-api-request]] ==== {api-request-title} @@ -20,7 +21,7 @@ To get started with Connector APIs, check out the {enterprise-search-ref}/connec [[update-connector-pipeline-api-prereq]] ==== {api-prereq-title} -* To sync data using self-managed connectors, you need to deploy the {enterprise-search-ref}/build-connector.html[Elastic connector service] on your own infrastructure. This service runs automatically on Elastic Cloud for native connectors. +* To sync data using self-managed connectors, you need to deploy the <>. on your own infrastructure. This service runs automatically on Elastic Cloud for Elastic managed connectors. * The `connector_id` parameter should reference an existing connector. [[update-connector-pipeline-api-path-params]] diff --git a/docs/reference/connector/apis/update-connector-scheduling-api.asciidoc b/docs/reference/connector/apis/update-connector-scheduling-api.asciidoc index f932f4c959de2..64302c26a7231 100644 --- a/docs/reference/connector/apis/update-connector-scheduling-api.asciidoc +++ b/docs/reference/connector/apis/update-connector-scheduling-api.asciidoc @@ -8,7 +8,8 @@ beta::[] Updates the `scheduling` configuration of a connector. -To get started with Connector APIs, check out the {enterprise-search-ref}/connectors-tutorial-api.html[tutorial^]. +To get started with Connector APIs, check out <>. + [[update-connector-scheduling-api-request]] ==== {api-request-title} @@ -18,7 +19,7 @@ To get started with Connector APIs, check out the {enterprise-search-ref}/connec [[update-connector-scheduling-api-prereq]] ==== {api-prereq-title} -* To sync data using self-managed connectors, you need to deploy the {enterprise-search-ref}/build-connector.html[Elastic connector service] on your own infrastructure. This service runs automatically on Elastic Cloud for native connectors. +* To sync data using self-managed connectors, you need to deploy the <>. on your own infrastructure. This service runs automatically on Elastic Cloud for Elastic managed connectors. * The `connector_id` parameter should reference an existing connector. [[update-connector-scheduling-api-path-params]] diff --git a/docs/reference/connector/apis/update-connector-service-type-api.asciidoc b/docs/reference/connector/apis/update-connector-service-type-api.asciidoc index 139e9eddf4076..c02967d03e2dd 100644 --- a/docs/reference/connector/apis/update-connector-service-type-api.asciidoc +++ b/docs/reference/connector/apis/update-connector-service-type-api.asciidoc @@ -8,7 +8,8 @@ beta::[] Updates the `service_type` of a connector. -To get started with Connector APIs, check out the {enterprise-search-ref}/connectors-tutorial-api.html[tutorial^]. +To get started with Connector APIs, check out <>. + [[update-connector-service-type-api-request]] ==== {api-request-title} @@ -18,7 +19,7 @@ To get started with Connector APIs, check out the {enterprise-search-ref}/connec [[update-connector-service-type-api-prereq]] ==== {api-prereq-title} -* To sync data using self-managed connectors, you need to deploy the {enterprise-search-ref}/build-connector.html[Elastic connector service] on your own infrastructure. This service runs automatically on Elastic Cloud for native connectors. +* To sync data using self-managed connectors, you need to deploy the <>. on your own infrastructure. This service runs automatically on Elastic Cloud for Elastic managed connectors. * The `connector_id` parameter should reference an existing connector. * The `service_type` must be a valid type as defined by the Connector framework. ** When you change a configured connector's `service_type`, you'll also need to reset its configuration to ensure compatibility. diff --git a/docs/reference/connector/apis/update-connector-status-api.asciidoc b/docs/reference/connector/apis/update-connector-status-api.asciidoc index ee9dfcb5f880f..dadd93fe5f9c4 100644 --- a/docs/reference/connector/apis/update-connector-status-api.asciidoc +++ b/docs/reference/connector/apis/update-connector-status-api.asciidoc @@ -8,7 +8,8 @@ preview::[] Updates the `status` of a connector. -To get started with Connector APIs, check out the {enterprise-search-ref}/connectors-tutorial-api.html[tutorial^]. +To get started with Connector APIs, check out <>. + [[update-connector-status-api-request]] ==== {api-request-title} @@ -18,7 +19,7 @@ To get started with Connector APIs, check out the {enterprise-search-ref}/connec [[update-connector-status-api-prereq]] ==== {api-prereq-title} -* To sync data using self-managed connectors, you need to deploy the {enterprise-search-ref}/build-connector.html[Elastic connector service] on your own infrastructure. This service runs automatically on Elastic Cloud for native connectors. +* To sync data using self-managed connectors, you need to deploy the <>. on your own infrastructure. This service runs automatically on Elastic Cloud for Elastic managed connectors. * The `connector_id` parameter should reference an existing connector. * The change of `status` must be a valid status transition according to the https://github.com/elastic/connectors/blob/main/docs/CONNECTOR_PROTOCOL.md[Connector Protocol]. diff --git a/docs/reference/connector/docs/_connectors-create-client.asciidoc b/docs/reference/connector/docs/_connectors-create-client.asciidoc new file mode 100644 index 0000000000000..31e4468f7a6bc --- /dev/null +++ b/docs/reference/connector/docs/_connectors-create-client.asciidoc @@ -0,0 +1,76 @@ +[discrete#es-connectors-{service-name-stub}-client-create-use-the-ui] +==== Use the UI + +To create a new {service-name} connector: + +. Navigate to the *Search -> Connectors* page in the Kibana UI. +. Follow the instructions to create a new *{service-name}* self-managed connector. + +[discrete#es-connectors-{service-name-stub}-client-create-use-the-api] +==== Use the API + +You can use the {es} {ref}/connector-apis.html[Create connector API] to create a new self-managed {service-name} self-managed connector. + +For example: + +[source, console,subs="+attributes"] +---- +PUT _connector/my-{service-name-stub}-connector +{ + "index_name": "my-elasticsearch-index", + "name": "Content synced from {service-name}", + "service_type": "{service-name-stub}" +} +---- +// TEST[skip:can't test in isolation] + + +.You'll also need to *create an API key* for the connector to use. + + +[%collapsible] +=================================== + +[NOTE] +==== +The user needs the cluster privileges `manage_api_key`, `manage_connector` and `write_connector_secrets` to generate API keys programmatically. +==== + +To create an API key for the connector: + +. Run the following command, replacing values where indicated. +Note the `encoded` return values from the response: ++ +[source, console,subs="+attributes"] +---- +POST /_security/api_key +{ + "name": "connector_name-connector-api-key", + "role_descriptors": { + "connector_name-connector-role": { + "cluster": [ + "monitor", + "manage_connector" + ], + "indices": [ + { + "names": [ + "index_name", + ".search-acl-filter-index_name", + ".elastic-connectors*" + ], + "privileges": [ + "all" + ], + "allow_restricted_indices": false + } + ] + } + } +} +---- ++ +. Update your `config.yml` file with the API key `encoded` value. +=================================== + +Refer to the {ref}/connector-apis.html[{es} API documentation] for details of all available Connector APIs. diff --git a/docs/reference/connector/docs/_connectors-create-native-api-key.asciidoc b/docs/reference/connector/docs/_connectors-create-native-api-key.asciidoc new file mode 100644 index 0000000000000..99fde477eea5b --- /dev/null +++ b/docs/reference/connector/docs/_connectors-create-native-api-key.asciidoc @@ -0,0 +1,61 @@ +[NOTE] +==== +The user needs the cluster privileges `manage_api_key`, `manage_connector` and `write_connector_secrets` to generate API keys programmatically. +==== + +To create an API key for the connector: + +. Run the following command, replacing values where indicated. +Note the `id` and `encoded` return values from the response: ++ +[source, console,subs="+attributes"] +---- +POST /_security/api_key +{ + "name": "my-connector-api-key", + "role_descriptors": { + "my-connector-connector-role": { + "cluster": [ + "monitor", + "manage_connector" + ], + "indices": [ + { + "names": [ + "my-index_name", + ".search-acl-filter-my-index_name", + ".elastic-connectors*" + ], + "privileges": [ + "all" + ], + "allow_restricted_indices": false + } + ] + } + } +} +---- ++ +. Use the `encoded` value to store a connector secret, and note the `id` return value from this response: ++ +[source, console,subs="+attributes"] +---- +POST _connector/_secret +{ + "value": "encoded_api_key" +} +---- +// TEST[skip:need to retrieve ids from the response] ++ +. Use the API key `id` and the connector secret `id` to update the connector: ++ +[source, console,subs="+attributes"] +---- +PUT /_connector/my_connector_id>/_api_key_id +{ + "api_key_id": "API key_id", + "api_key_secret_id": "secret_id" +} +---- +// TEST[skip:need to retrieve ids from the response] diff --git a/docs/reference/connector/docs/_connectors-create-native.asciidoc b/docs/reference/connector/docs/_connectors-create-native.asciidoc new file mode 100644 index 0000000000000..1b7f5f22415fe --- /dev/null +++ b/docs/reference/connector/docs/_connectors-create-native.asciidoc @@ -0,0 +1,38 @@ +[discrete#es-connectors-{service-name-stub}-create-use-the-ui] +==== Use the UI + +To create a new {service-name} connector: + +. Navigate to the *Search -> Connectors* page in the Kibana UI. +. Follow the instructions to create a new native *{service-name}* connector. + +For additional operations, see <>. + +[discrete#es-connectors-{service-name-stub}-create-use-the-api] +==== Use the API + +You can use the {es} {ref}/connector-apis.html[Create connector API] to create a new native {service-name} connector. + +For example: + +[source, console,subs="+attributes"] +---- +PUT _connector/my-{service-name-stub}-connector +{ + "index_name": "my-elasticsearch-index", + "name": "Content synced from {service-name}", + "service_type": "{service-name-stub}", + "is_native": "true" +} +---- +// TEST[skip:can't test in isolation] + +.You'll also need to *create an API key* for the connector to use. + +[%collapsible] +=================================== +include::_connectors-create-native-api-key.asciidoc[] +=================================== + +Refer to the {ref}/connector-apis.html[{es} API documentation] for details of all available Connector APIs. + diff --git a/docs/reference/connector/docs/_connectors-docker-instructions.asciidoc b/docs/reference/connector/docs/_connectors-docker-instructions.asciidoc new file mode 100644 index 0000000000000..db536099f1aad --- /dev/null +++ b/docs/reference/connector/docs/_connectors-docker-instructions.asciidoc @@ -0,0 +1,76 @@ +You can deploy the {service-name} connector as a self-managed connector using Docker. +Follow these instructions. + +.*Step 1: Download sample configuration file* +[%collapsible] +==== +Download the sample configuration file. +You can either download it manually or run the following command: + +[source,sh] +---- +curl https://raw.githubusercontent.com/elastic/connectors/main/config.yml.example --output ~/connectors-config/config.yml +---- +// NOTCONSOLE + +Remember to update the `--output` argument value if your directory name is different, or you want to use a different config file name. +==== + +.*Step 2: Update the configuration file for your self-managed connector* +[%collapsible] +==== +Update the configuration file with the following settings to match your environment: + +* `elasticsearch.host` +* `elasticsearch.api_key` +* `connectors` + +If you're running the connector service against a Dockerized version of Elasticsearch and Kibana, your config file will look like this: + +[source,yaml,subs="attributes"] +---- +# When connecting to your cloud deployment you should edit the host value +elasticsearch.host: http://host.docker.internal:9200 +elasticsearch.api_key: + +connectors: + - + connector_id: + service_type: {service-name-stub} + api_key: # Optional. If not provided, the connector will use the elasticsearch.api_key instead + +---- + +Using the `elasticsearch.api_key` is the recommended authentication method. However, you can also use `elasticsearch.username` and `elasticsearch.password` to authenticate with your Elasticsearch instance. + +Note: You can change other default configurations by simply uncommenting specific settings in the configuration file and modifying their values. + +==== + +.*Step 3: Run the Docker image* +[%collapsible] +==== +Run the Docker image with the Connector Service using the following command: + +[source,sh,subs="attributes"] +---- +docker run \ +-v ~/connectors-config:/config \ +--network "elastic" \ +--tty \ +--rm \ +docker.elastic.co/enterprise-search/elastic-connectors:{version}.0 \ +/app/bin/elastic-ingest \ +-c /config/config.yml +---- +==== + +Refer to {connectors-python}/docs/DOCKER.md[`DOCKER.md`^] in the `elastic/connectors` repo for more details. + +Find all available Docker images in the https://www.docker.elastic.co/r/enterprise-search/elastic-connectors[official registry]. + +[TIP] +==== +We also have a quickstart self-managed option using Docker Compose, so you can spin up all required services at once: Elasticsearch, Kibana, and the connectors service. +Refer to this https://github.com/elastic/connectors/tree/main/scripts/stack#readme[README] in the `elastic/connectors` repo for more information. +==== \ No newline at end of file diff --git a/docs/reference/connector/docs/_connectors-list-advanced-rules.asciidoc b/docs/reference/connector/docs/_connectors-list-advanced-rules.asciidoc new file mode 100644 index 0000000000000..b09aa7f1d4e7e --- /dev/null +++ b/docs/reference/connector/docs/_connectors-list-advanced-rules.asciidoc @@ -0,0 +1,14 @@ +* <> +* <> +* <> +* <> +* <> +* <> +* <> +* <> +* <> +* <> +* <> +* <> +* <> +* <> diff --git a/docs/reference/connector/docs/_connectors-list-clients.asciidoc b/docs/reference/connector/docs/_connectors-list-clients.asciidoc new file mode 100644 index 0000000000000..b56d7458d2924 --- /dev/null +++ b/docs/reference/connector/docs/_connectors-list-clients.asciidoc @@ -0,0 +1,29 @@ +* <> +* <> +* <> +* <> +* <> +* <> +* <> +* <> +* <> +* <> +* <> +* <> +* <> +* <> +* <> +* <> +* <> +* <> +* <> +* <> +* <> +* <> +* <> +* <> +* <> +* <> +* <> +* <> +* <> diff --git a/docs/reference/connector/docs/_connectors-list-dls.asciidoc b/docs/reference/connector/docs/_connectors-list-dls.asciidoc new file mode 100644 index 0000000000000..f5ea6c1d43c5c --- /dev/null +++ b/docs/reference/connector/docs/_connectors-list-dls.asciidoc @@ -0,0 +1,13 @@ +* <> +* <> +* <> (including Jira Data Center) +* <> +* <> +* <> +* <> +* <> +* <> +* <> +* <> +* <> +* <> \ No newline at end of file diff --git a/docs/reference/connector/docs/_connectors-list-incremental.asciidoc b/docs/reference/connector/docs/_connectors-list-incremental.asciidoc new file mode 100644 index 0000000000000..88822164258bb --- /dev/null +++ b/docs/reference/connector/docs/_connectors-list-incremental.asciidoc @@ -0,0 +1,19 @@ +* <> +* <> +* <> +* <> +* <> +* <> +* <> +* <> +* <> +* <> +* <> +* <> +* <> +* <> +* <> +* <> +* <> +* <> +* <> \ No newline at end of file diff --git a/docs/reference/connector/docs/_connectors-list-local-content-extraction.asciidoc b/docs/reference/connector/docs/_connectors-list-local-content-extraction.asciidoc new file mode 100644 index 0000000000000..d86583644d574 --- /dev/null +++ b/docs/reference/connector/docs/_connectors-list-local-content-extraction.asciidoc @@ -0,0 +1,16 @@ +* <> +* <> +* <> +* <> +* <> +* <> +* <> +* <> +* <> +* <> +* <> +* <> +* <> +* <> +* <> +* <> diff --git a/docs/reference/connector/docs/_connectors-list-native.asciidoc b/docs/reference/connector/docs/_connectors-list-native.asciidoc new file mode 100644 index 0000000000000..9222abe11fc3a --- /dev/null +++ b/docs/reference/connector/docs/_connectors-list-native.asciidoc @@ -0,0 +1,26 @@ +* <> +* <> +* <> (including Confluence Data Center) +* <> +* <> +* <> +* <> +* <> +* <> (including Jira Data Center) +* <> +* <> +* <> +* <> +* <> +* <> +* <> +* <> +* <> +* <> +* <> +* <> +* <> +* <> +* <> +* <> +* <> \ No newline at end of file diff --git a/docs/reference/connector/docs/_connectors-list-workplace-search.asciidoc b/docs/reference/connector/docs/_connectors-list-workplace-search.asciidoc new file mode 100644 index 0000000000000..f9391f3bf4a1e --- /dev/null +++ b/docs/reference/connector/docs/_connectors-list-workplace-search.asciidoc @@ -0,0 +1,22 @@ +* {workplace-search-ref}/workplace-search-box-connector.html[Box^] +* {workplace-search-ref}/workplace-search-confluence-cloud-connector.html[Confluence Cloud^] +* https://github.com/elastic/connectors-ruby/tree/8.3/lib/connectors_sdk/confluence_cloud[Confluence Cloud^] (Ruby connector package) +* {workplace-search-ref}/workplace-search-confluence-server-connector.html[Confluence Server^] +* {workplace-search-ref}/workplace-search-dropbox-connector.html[Dropbox^] +* {workplace-search-ref}/workplace-search-github-connector.html[GitHub^] +* {workplace-search-ref}/workplace-search-gmail-connector.html[Gmail^] +* {workplace-search-ref}/workplace-search-google-drive-connector.html[Google Drive^] +* {workplace-search-ref}/workplace-search-jira-cloud-connector.html[Jira Cloud^] +* {workplace-search-ref}/workplace-search-jira-server-connector.html[Jira Server^] +* https://github.com/elastic/enterprise-search-network-drive-connector[Network drives^] (Python connector package) +* {workplace-search-ref}/workplace-search-onedrive-connector.html[OneDrive^] +* https://github.com/elastic/enterprise-search-microsoft-outlook-connector[Outlook^] (Python connector package) +* {workplace-search-ref}/workplace-search-salesforce-connector.html[Salesforce^] +* {workplace-search-ref}/workplace-search-servicenow-connector.html[ServiceNow^] +* {workplace-search-ref}/workplace-search-sharepoint-online-connector.html[SharePoint Online^] +* https://github.com/elastic/connectors-ruby/tree/8.3/lib/connectors_sdk/share_point[SharePoint Online^] (Ruby connector package) +* https://github.com/elastic/enterprise-search-sharepoint-server-connector[SharePoint Server^] (Python connector package) +* {workplace-search-ref}/workplace-search-slack-connector.html[Slack^] +* {workplace-search-ref}/microsoft-teams.html[Teams^] (Python connector package) +* {workplace-search-ref}/workplace-search-zendesk-connector.html[Zendesk^] +* {workplace-search-ref}/zoom.html[Zoom^] (Python connector package) diff --git a/docs/reference/connector/docs/_connectors-overview-table.asciidoc b/docs/reference/connector/docs/_connectors-overview-table.asciidoc new file mode 100644 index 0000000000000..f25ea3deceeee --- /dev/null +++ b/docs/reference/connector/docs/_connectors-overview-table.asciidoc @@ -0,0 +1,51 @@ +This table provides an overview of our available connectors, their current support status, and the features they support. + +The columns provide specific information about each connector: + +* *Status*: Indicates whether the connector is in General Availability (GA), Technical Preview, Beta, or is an Example connector. +* *Native (Elastic Cloud)*: Specifies the versions in which the connector is available as a managed service, if applicable. +* *Advanced sync rules*: Specifies the versions in which advanced sync rules are supported, if applicable. +* *Local binary extraction service*: Specifies the versions in which the local binary extraction service is supported, if applicable. +* *Incremental syncs*: Specifies the version in which incremental syncs are supported, if applicable. +* *Document level security*: Specifies the version in which document level security is supported, if applicable. +* *Code*: Provides a link to the connector's source code in the https://github.com/elastic/connectors[`elastic/connectors` GitHub repository]. + +NOTE: All connectors are available as self-managed <>. + +[options="header"] +|==================================================================================================================================================================================================== +| Connector| Status| Native (Elastic Cloud) | <> | <> | <> | <> | Source code +|<>|*GA*|8.9+|-|8.11+|8.13+|-|https://github.com/elastic/connectors/tree/main/connectors/sources/azure_blob_storage.py[View code] +|<>|*Preview*|8.14+|-|-|8.13+|-|https://github.com/elastic/connectors/tree/main/connectors/sources/box.py[View code] +|<>|*GA*|8.9+|8.9+|8.11+|8.13+|8.10|https://github.com/elastic/connectors/tree/main/connectors/sources/confluence.py[View code] +|<>|*Preview*|8.13+|8.13+|8.13+|8.13+|8.14+|https://github.com/elastic/connectors/tree/main/connectors/sources/confluence.py[View code] +|<>|*GA*|8.9+|8.9+|8.11+|8.13+|8.14+|https://github.com/elastic/connectors/tree/main/connectors/sources/confluence.py[View code] +|<>|*GA*|8.10+|-|8.11+|8.13+|8.12+|https://github.com/elastic/connectors/tree/main/connectors/sources/dropbox.py[View code] +|<>|*GA*|8.11+|8.10+|8.11+|8.13+|8.12+|https://github.com/elastic/connectors/tree/main/connectors/sources/github.py[View code] +|<>|*GA*|8.13+|-|-|8.13+|8.10+|https://github.com/elastic/connectors/tree/main/connectors/sources/gmail.py[View code] +|<>|*GA*|8.12+|-|8.11+|8.13+|-|https://github.com/elastic/connectors/tree/main/connectors/sources/google_cloud_storage.py[View code] +|<>|*GA*|8.11+|-|8.11+|8.13+|8.10+|https://github.com/elastic/connectors/tree/main/connectors/sources/google_drive.py[View code] +|<>|*Preview*|-|-|-|-|-|https://github.com/elastic/connectors/tree/main/connectors/sources/graphql.py[View code] +|<>|*GA*|8.9+|8.9+|8.11+|8.13+|8.10+|https://github.com/elastic/connectors/tree/main/connectors/sources/jira.py[View code] +|<>|*Preview*|8.13+|8.13+|8.13+|8.13+|8.13+*|https://github.com/elastic/connectors/tree/main/connectors/sources/jira.py[View code] +|<>|*GA*|8.9+|8.9+|8.11+|8.13+|-|https://github.com/elastic/connectors/tree/main/connectors/sources/jira.py[View code] +|<>|*GA*|8.8|8.8 native/ 8.12 self-managed|-|-|-|https://github.com/elastic/connectors/tree/main/connectors/sources/mongo.py[View code] +|<>|*GA*|8.8+|8.11+|-|-|-|https://github.com/elastic/connectors/tree/main/connectors/sources/mssql.py[View code] +|<>|*GA*|8.5+|8.8+|-|-|-|https://github.com/elastic/connectors/tree/main/connectors/sources/mysql.py[View code] +|<>|*GA*|8.9+|8.10+|8.14+|8.13+|8.11+|https://github.com/elastic/connectors/tree/main/connectors/sources/network_drive.py[View code] +|<>|*GA*|8.14+|8.14+|-|-|-|https://github.com/elastic/connectors/tree/main/connectors/sources/notion.py[View code] +|<>|*GA*|8.11+|8.11+|8.11+|8.13+|8.11+|https://github.com/elastic/connectors/tree/main/connectors/sources/onedrive.py[View code] +|<>|Example|n/a|n/a|n/a|n/a|-|https://github.com/elastic/connectors/tree/main/connectors/sources/opentext_documentum.py[View code] +|<>|*GA*|8.12+|-|-|-|-|https://github.com/elastic/connectors/tree/main/connectors/sources/oracle.py[View code] +|<>|*GA*|8.13+|-|8.11+|8.13+|8.14+|https://github.com/elastic/connectors/tree/main/connectors/sources/outlook.py[View code] +|<>|*GA*|8.8+|8.11+|-|-|-|https://github.com/elastic/connectors/tree/main/connectors/sources/postgresql.py[View code] +|<>|*Preview*|-|-|-|-|-|https://github.com/elastic/connectors/tree/main/connectors/sources/redis.py[View code] +|<>|*GA*|8.12+|8.12+|8.11+|-|-|https://github.com/elastic/connectors/tree/main/connectors/sources/s3.py[View code] +|<>|*GA*|8.12+|8.12+|8.11+|8.13+|8.13+|https://github.com/elastic/connectors/tree/main/connectors/sources/salesforce.py[View code] +|<>|*GA*|8.10+|8.10+|8.11+|8.13+|8.13+|https://github.com/elastic/connectors/tree/main/connectors/sources/servicenow.py[View code] +|<>|*GA*|8.9+|8.9+|8.9+|8.9+|8.9+|https://github.com/elastic/connectors/tree/main/connectors/sources/sharepoint_online.py[View code] +|<>|*Beta*|8.15+|-|8.11+|8.13+|8.14+|https://github.com/elastic/connectors/tree/main/connectors/sources/sharepoint_server.py[View code] +|<>|*Preview*|8.14+|-|-|-|-|https://github.com/elastic/connectors/tree/main/connectors/sources/slack.py[View code] +|<>|*Preview*|8.14+|-|-|8.13+|-|https://github.com/elastic/connectors/tree/main/connectors/sources/teams.py[View code] +|<>|*Preview*|8.14+|-|8.11+|8.13+|-|https://github.com/elastic/connectors/tree/main/connectors/sources/zoom.py[View code] +|==================================================================================================================================================================================================== diff --git a/docs/reference/connector/docs/connectors-API-tutorial.asciidoc b/docs/reference/connector/docs/connectors-API-tutorial.asciidoc new file mode 100644 index 0000000000000..8a9e462ebd94d --- /dev/null +++ b/docs/reference/connector/docs/connectors-API-tutorial.asciidoc @@ -0,0 +1,487 @@ +[#es-connectors-tutorial-api] +=== Connector API tutorial +++++ +API tutorial +++++ + +Learn how to set up a self-managed connector using the {ref}/connector-apis.html[{es} Connector APIs]. + +For this example we'll use the connectors-postgresql,PostgreSQL connector to sync data from a PostgreSQL database to {es}. +We'll spin up a simple PostgreSQL instance in Docker with some example data, create a connector, and sync the data to {es}. +You can follow the same steps to set up a connector for another data source. + +[TIP] +==== +This tutorial focuses on running a self-managed connector on your own infrastructure, and managing syncs using the Connector APIs. +See connectors for an overview of how connectors work. + +If you're just getting started with {es}, this tutorial might be a bit advanced. +Refer to {ref}/getting-started.html[quickstart] for a more beginner-friendly introduction to {es}. + +If you're just getting started with connectors, you might want to start in the UI first. +We have two tutorials that focus on managing connectors using the UI: + +* <>. Set up a native MongoDB connector, fully managed in Elastic Cloud. +* <>. Set up a self-managed PostgreSQL connector. +==== + +[discrete#es-connectors-tutorial-api-prerequisites] +==== Prerequisites + +* You should be familiar with how connectors, connectors work, to understand how the API calls relate to the overall connector setup. +* You need to have https://www.docker.com/products/docker-desktop/[Docker Desktop] installed. +* You need to have {es} running, and an API key to access it. +Refer to the next section for details, if you don't have an {es} deployment yet. + +[discrete#es-connectors-tutorial-api-setup-es] +==== Set up {es} + +If you already have an {es} deployment on Elastic Cloud (_Hosted deployment_ or _Serverless project_), you're good to go. +To spin up {es} in local dev mode in Docker for testing purposes, open the collapsible section below. + +.*Run local {es} in Docker* +[%collapsible] +=============== + +[source,sh,subs="attributes+"] +---- +docker run -p 9200:9200 -d --name elasticsearch \ + -e "discovery.type=single-node" \ + -e "xpack.security.enabled=false" \ + -e "xpack.security.http.ssl.enabled=false" \ + -e "xpack.license.self_generated.type=trial" \ + docker.elastic.co/elasticsearch/elasticsearch:{version} +---- + +[WARNING] +==== +This {es} setup is for development purposes only. +Never use this configuration in production. +Refer to {ref}/setup.html[Set up {es}] for production-grade installation instructions, including Docker. +==== + +We will use the default password `changeme` for the `elastic` user. For production environments, always ensure your cluster runs with security enabled. + +[source,sh] +---- +export ELASTIC_PASSWORD="changeme" +---- + +Since we run our cluster locally with security disabled, we won't use API keys to authenticate against the {es}. Instead, in each cURL request, we will use the `-u` flag for authentication. + +Let's test that we can access {es}: + +[source,sh] +---- +curl -s -X GET -u elastic:$ELASTIC_PASSWORD http://localhost:9200 +---- +// NOTCONSOLE + +Note: With {es} running locally, you will need to pass the username and password to authenticate against {es} in the configuration file for the connector service. + +=============== + +.Running API calls +**** + +You can run API calls using the https://www.elastic.co/guide/en/kibana/master/console-kibana.html[Dev Tools Console] in Kibana, using `curl` in your terminal, or with our programming language clients. +Our example widget allows you to copy code examples in both Dev Tools Console syntax and curl syntax. +To use curl, you'll need to add authentication headers to your request. + +Here's an example of how to do that. Note that if you want the connector ID to be auto-generated, use the `POST _connector` endpoint. + +[source,sh] +---- +curl -s -X PUT http://localhost:9200/_connector/my-connector-id \ +-H "Authorization: APIKey $APIKEY" \ +-H "Content-Type: application/json" \ +-d '{ + "name": "Music catalog", + "index_name": "music", + "service_type": "postgresql" +}' +---- +// NOTCONSOLE + +Refer to connectors-tutorial-api-create-api-key for instructions on creating an API key. +**** + +[discrete#es-connectors-tutorial-api-setup-postgres] +==== Run PostgreSQL instance in Docker (optional) + +For this tutorial, we'll set up a PostgreSQL instance in Docker with some example data. +Of course, you can *skip this step and use your own existing PostgreSQL instance* if you have one. +Keep in mind that using a different instance might require adjustments to the connector configuration described in the next steps. + +.*Expand* to run simple PostgreSQL instance in Docker and import example data +[%collapsible] +=============== + +Let's launch a PostgreSQL container with a user and password, exposed at port `5432`: + +[source,sh] +---- +docker run --name postgres -e POSTGRES_USER=myuser -e POSTGRES_PASSWORD=mypassword -p 5432:5432 -d postgres +---- + +*Download and import example data* + +Next we need to create a directory to store our example dataset for this tutorial. +In your terminal, run the following command: + +[source,sh] +---- +mkdir -p ~/data +---- + +We will use the https://github.com/lerocha/chinook-database/blob/master/ChinookDatabase/DataSources/Chinook_PostgreSql.sql[Chinook dataset] example data. + +Run the following command to download the file to the `~/data` directory: + +[source,sh] +---- +curl -L https://raw.githubusercontent.com/lerocha/chinook-database/master/ChinookDatabase/DataSources/Chinook_PostgreSql.sql -o ~/data/Chinook_PostgreSql.sql +---- +// NOTCONSOLE + +Now we need to import the example data into the PostgreSQL container and create the tables. + +Run the following Docker commands to copy our sample data into the container and execute the `psql` script: + +[source,sh] +---- +docker cp ~/data/Chinook_PostgreSql.sql postgres:/ +docker exec -it postgres psql -U myuser -f /Chinook_PostgreSql.sql +---- + +Let's verify that the tables are created correctly in the `chinook` database: + +[source,sh] +---- +docker exec -it postgres psql -U myuser -d chinook -c "\dt" +---- + +The `album` table should contain *347* entries and the `artist` table should contain *275* entries. +=============== + +This tutorial uses a very basic setup. To use advanced functionality such as filtering rules and incremental syncs, enable `track_commit_timestamp` on your PostgreSQL database. Refer to postgresql-connector-client-tutorial for more details. + +Now it's time for the real fun! We'll set up a connector to create a searchable mirror of our PostgreSQL data in {es}. + +[discrete#es-connectors-tutorial-api-create-connector] +==== Create a connector + +We'll use the https://www.elastic.co/guide/en/elasticsearch/reference/master/create-connector-api.html[Create connector API] to create a PostgreSQL connector instance. + +Run the following API call, using the https://www.elastic.co/guide/en/kibana/master/console-kibana.html[Dev Tools Console] or `curl`: + +[source,console] +---- +PUT _connector/my-connector-id +{ + "name": "Music catalog", + "index_name": "music", + "service_type": "postgresql" +} +---- +// TEST[skip:TODO] + +[TIP] +==== +`service_type` refers to the third-party data source you're connecting to. +==== + +Note that we specified the `my-connector-id` ID as a part of the `PUT` request. +We'll need the connector ID to set up and run the connector service locally. + +If you'd prefer to use an autogenerated ID, replace `PUT _connector/my-connector-id` with `POST _connector`. + +[discrete#es-connectors-tutorial-api-deploy-connector] +==== Run connector service + +[NOTE] +==== +The connector service runs automatically in Elastic Cloud, if you're using our managed Elastic managed connectors. +Because we're running a self-managed connector, we need to spin up this service locally. +==== + +Now we'll run the connector service so we can start syncing data from our PostgreSQL instance to {es}. +We'll use the steps outlined in connectors-run-from-docker. + +When running the connectors service on your own infrastructure, you need to provide a configuration file with the following details: + +* Your {es} endpoint (`elasticsearch.host`) +* An {es} API key (`elasticsearch.api_key`) +* Your third-party data source type (`service_type`) +* Your connector ID (`connector_id`) + +[discrete#es-connectors-tutorial-api-create-api-key] +===== Create an API key + +If you haven't already created an API key to access {es}, you can use the {ref}/security-api-create-api-key.html[_security/api_key] endpoint. + +Here, we assume your target {es} index name is `music`. If you use a different index name, adjust the request body accordingly. + +[source,console] +---- +POST /_security/api_key +{ + "name": "music-connector", + "role_descriptors": { + "music-connector-role": { + "cluster": [ + "monitor", + "manage_connector" + ], + "indices": [ + { + "names": [ + "music", + ".search-acl-filter-music", + ".elastic-connectors*" + ], + "privileges": [ + "all" + ], + "allow_restricted_indices": false + } + ] + } + } +} +---- +// TEST[skip:TODO] + +You'll need to use the `encoded` value from the response as the `elasticsearch.api_key` in your configuration file. + +[TIP] +==== +You can also create an API key in the {kib} and Serverless UIs. +==== + +[discrete#es-connectors-tutorial-api-prepare-configuration-file] +===== Prepare the configuration file + +Let's create a directory and a `config.yml` file to store the connector configuration: + +[source,sh] +---- +mkdir -p ~/connectors-config +touch ~/connectors-config/config.yml +---- + +Now, let's add our connector details to the config file. +Open `config.yml` and paste the following configuration, replacing placeholders with your own values: + +[source,yaml] +---- +elasticsearch.host: # Your Elasticsearch endpoint +elasticsearch.api_key: # Your Elasticsearch API key + +connectors: + - connector_id: "my-connector-id" + service_type: "postgresql" +---- + +We provide an https://raw.githubusercontent.com/elastic/connectors/main/config.yml.example[example configuration file] in the `elastic/connectors` repository for reference. + +[discrete#es-connectors-tutorial-api-run-connector-service] +===== Run the connector service + +Now that we have the configuration file set up, we can run the connector service locally. +This will point your connector instance at your {es} deployment. + +Run the following Docker command to start the connector service: + +[source,sh,subs="attributes+"] +---- +docker run \ +-v "$HOME/connectors-config:/config" \ +--rm \ +--tty -i \ +--network host \ +docker.elastic.co/enterprise-search/elastic-connectors:{version}.0 \ +/app/bin/elastic-ingest \ +-c /config/config.yml +---- + +Verify your connector is connected by getting the connector status (should be `needs_configuration`) and `last_seen` field (note that time is reported in UTC). +The `last_seen` field indicates that the connector successfully connected to {es}. + +[source, console] +---- +GET _connector/my-connector-id +---- +// TEST[skip:TODO] + +[discrete#es-connectors-tutorial-api-update-connector-configuration] +==== Configure connector + +Now our connector instance is up and running, but it doesn't yet know _where_ to sync data from. +The final piece of the puzzle is to configure our connector with details about our PostgreSQL instance. +When setting up a connector in the Elastic Cloud or Serverless UIs, you're prompted to add these details in the user interface. + +But because this tutorial is all about working with connectors _programmatically_, we'll use the {ref}/update-connector-configuration-api.html[Update connector configuration API] to add our configuration details. + +[TIP] +==== +Before configuring the connector, ensure that the configuration schema is registered by the service. +For Elastic managed connectors, this occurs shortly after creation via the API. +For self-managed connectors, the schema registers on service startup (once the `config.yml` is populated). + +Configuration updates via the API are possible only _after schema registration_. +Verify this by checking the configuration property returned by the `GET _connector/my-connector-id` request. +It should be non-empty. +==== + +Run the following API call to configure the connector with our connectors-postgresql-client-configuration,PostgreSQL configuration details: + +[source, console] +---- +PUT _connector/my-connector-id/_configuration +{ + "values": { + "host": "127.0.0.1", + "port": 5432, + "username": "myuser", + "password": "mypassword", + "database": "chinook", + "schema": "public", + "tables": "album,artist" + } +} +---- +// TEST[skip:TODO] + +[NOTE] +==== +Configuration details are specific to the connector type. +The keys and values will differ depending on which third-party data source you're connecting to. +Refer to the individual connectors-references,connector references for these configuration details. +==== + +[discrete#es-connectors-tutorial-api-sync] +==== Sync data + +[NOTE] +==== +We're using a self-managed connector in this tutorial. +To use these APIs with an Elastic managed connector, there's some extra setup for API keys. +Refer to native-connectors-manage-API-keys for details. +==== + +We're now ready to sync our PostgreSQL data to {es}. +Run the following API call to start a full sync job: + +[source, console] +---- +POST _connector/_sync_job +{ + "id": "my-connector-id", + "job_type": "full" +} +---- +// TEST[skip:TODO] + +To store data in {es}, the connector needs to create an index. +When we created the connector, we specified the `music` index. +The connector will create and configure this {es} index before launching the sync job. + +[TIP] +==== +In the approach we've used here, the connector will use {ref}/mapping.html#mapping-dynamic[dynamic mappings] to automatically infer the data types of your fields. +In a real-world scenario you would use the {es} {ref}/indices-create-index.html[Create index API] to first create the index with the desired field mappings and index settings. +Defining your own mappings upfront gives you more control over how your data is indexed. +==== + +[discrete#es-connectors-tutorial-api-check-sync-status] +===== Check sync status + +Use the {ref}/get-connector-sync-job-api.html[Get sync job API] to track the status and progress of the sync job. +By default, the most recent job statuses are returned first. +Run the following API call to check the status of the sync job: + +[source, console] +---- +GET _connector/_sync_job?connector_id=my-connector-id&size=1 +---- +// TEST[skip:TODO] + +The job document will be updated as the sync progresses, you can check it as often as you'd like to poll for updates. + +Once the job completes, the status should be `completed` and `indexed_document_count` should be *622*. + +Verify that data is present in the `music` index with the following API call: + +[source, console] +---- +GET music/_count +---- +// TEST[skip:TODO] + +{es} stores data in documents, which are JSON objects. +List the individual documents with the following API call: + +[source, console] +---- +GET music/_search +---- +// TEST[skip:TODO] + +[discrete#es-connectors-tutorial-api-troubleshooting] +=== Troubleshooting + +Use the following command to inspect the latest sync job's status: + +[source, console] +---- +GET _connector/_sync_job?connector_id=my-connector-id&size=1 +---- +// TEST[skip:TODO] + +If the connector encountered any errors during the sync, you'll find these in the `error` field. + +[discrete#es-connectors-tutorial-api-cleanup] +==== Cleaning up + +To delete the connector and its associated sync jobs run this command: + +[source, console] +---- +DELETE _connector/my-connector-id&delete_sync_jobs=true +---- +// TEST[skip:TODO] + +This won't delete the Elasticsearch index that was created by the connector to store the data. +Delete the `music` index by running the following command: + +[source, console] +---- +DELETE music +---- +// TEST[skip:TODO] + +To remove the PostgreSQL container, run the following commands: + +[source,sh] +---- +docker stop postgres +docker rm postgres +---- + +To remove the connector service, run the following commands: +[source,sh] +---- +docker stop +docker rm +---- + +[discrete#es-connectors-tutorial-api-next-steps] +==== Next steps + +Congratulations! You've successfully set up a self-managed connector using the Connector APIs. + +Here are some next steps to explore: + +* Learn more about the {ref}/connector-apis.html[Connector APIs]. +* Learn how to deploy {es}, {kib}, and the connectors service using Docker Compose in our https://github.com/elastic/connectors/tree/main/scripts/stack#readme[quickstart guide]. diff --git a/docs/reference/connector/docs/connectors-APIs.asciidoc b/docs/reference/connector/docs/connectors-APIs.asciidoc new file mode 100644 index 0000000000000..ad6ad78353fc0 --- /dev/null +++ b/docs/reference/connector/docs/connectors-APIs.asciidoc @@ -0,0 +1,22 @@ +[#es-connectors-apis] +== Connector APIs + +In 8.12 we introduced a set of APIs to create and manage Elastic connectors and sync jobs. +Learn more in the {es} REST API documentation: + +* {ref}/connector-apis.html[Connector APIs] +* {ref}/connector-apis.html#sync-job-apis[Sync job APIs] + +.Connector API tutorial +[sidebar] +-- +Check out this concrete <> to get started with the Connector APIs. +-- + +[discrete#es-connectors-apis-cli] +=== Command-line interface + +We also provide a command-line interface. +Learn more in the https://github.com/elastic/connectors/blob/main/docs/CLI.md[`elastic/connectors` repo]. + +Use these tools if you'd like to work with connectors and sync jobs programmatically, without using the UI. \ No newline at end of file diff --git a/docs/reference/connector/docs/connectors-architecture.asciidoc b/docs/reference/connector/docs/connectors-architecture.asciidoc new file mode 100644 index 0000000000000..3269f2badb308 --- /dev/null +++ b/docs/reference/connector/docs/connectors-architecture.asciidoc @@ -0,0 +1,45 @@ +[#es-connectors-overview-architecture] +=== Internal knowledge search architecture +++++ +Internal knowledge search +++++ + +The following section provides a high-level overview of common architecture approaches for the internal knowledge search use case (AKA workplace search). + +[discrete#es-connectors-overview-architecture-hybrid] +==== Hybrid architecture + +Data is synced to an Elastic Cloud deployment through managed connectors and/or self-managed connectors. +A self-managed search application exposes the relevant data that your end users are authorized to see in a search experience. + +Summary: + +* The best combination in terms of flexibility and out-of-the box functionality +* Integrates with Elastic Cloud hosted managed connectors to bring data to Elasticsearch with minimal operational overhead +* Self-managed connectors allow enterprises to adhere to strict access policies when using firewalls that don't allow incoming connections to data sources, while outgoing traffic is easier to control +* Provides additional functionality available for self-managed connectors such as the <> +* Basic functionality available for Standard licenses, advanced features for Platinum licenses + +The following diagram provides a high-level overview of the hybrid internal knowledge search architecture. + +[.screenshot] +image::images/hybrid-architecture.png[align="center",width="50%"] + +[discrete#es-connectors-overview-architecture-self-managed] +==== Self-managed architecture + +Data is synced to an Elastic deployment through self-managed connectors. +A self-managed search application exposes the relevant data that your end users are authorized to see in a search experience. + +Summary: + +* Gives flexibility to build custom solutions tailored to specific business requirements and internal processes +* Allows enterprises to adhere to strict access policies when using firewalls that don't allow incoming connections to data sources, while outgoing traffic is easier to control +* Provides additional functionality available for self-managed connectors such as the <> +* Feasible for air-gapped environments +* Requires Platinum license for full spectrum of features and self-managed connectors + +The following diagram provides a high-level overview of the self-managed internal knowledge search architecture. + +[.screenshot] +image::images/self-managed-architecture.png[align="center",width="50%"] \ No newline at end of file diff --git a/docs/reference/connector/docs/connectors-azure-blob.asciidoc b/docs/reference/connector/docs/connectors-azure-blob.asciidoc new file mode 100644 index 0000000000000..0e0978a423e3c --- /dev/null +++ b/docs/reference/connector/docs/connectors-azure-blob.asciidoc @@ -0,0 +1,291 @@ +[#es-connectors-azure-blob] +=== Elastic Azure Blob Storage connector reference +++++ +Azure Blob Storage +++++ +// Attributes used in this file +:service-name: Azure Blob Storage +:service-name-stub: azure_blob_storage + +The _Elastic Azure Blob Storage connector_ is a <> for https://azure.microsoft.com/en-us/services/storage/blobs/[Azure Blob Storage^]. + +This connector is written in Python using the {connectors-python}[Elastic connector framework^]. + +View the {connectors-python}/connectors/sources/{service-name-stub}.py[*source code* for this connector^] (branch _{connectors-branch}_, compatible with Elastic _{minor-version}_). + +.Choose your connector reference +******************************* +Are you using a managed connector on Elastic Cloud or a self-managed connector? Expand the documentation based on your deployment method. +******************************* + +// //////// //// //// //// //// //// //// //////// +// //////// NATIVE CONNECTOR REFERENCE /////// +// //////// //// //// //// //// //// //// //////// + +[discrete#es-connectors-azure-blob-native-connector-reference] +==== *Elastic managed connector reference* + +.View *Elastic managed connector* reference + +[%collapsible] +=============== + +[discrete#es-connectors-azure-blob-availability-prerequisites] +===== Availability and prerequisites + +This connector is available as a *managed connector* on Elastic Cloud, as of *8.9.1*. + +To use this connector natively in Elastic Cloud, satisfy all <>. + +[discrete#es-connectors-azure-blob-compatability] +===== Compatibility + +This connector has not been tested with Azure Government. +Therefore we cannot guarantee that it will work with Azure Government endpoints. +For more information on Azure Government compared to Global Azure, refer to the + https://learn.microsoft.com/en-us/azure/azure-government/compare-azure-government-global-azure[official Microsoft documentation^]. + +[discrete#es-connectors-{service-name-stub}-create-native-connector] +===== Create {service-name} connector + +include::_connectors-create-native.asciidoc[] + +[discrete#es-connectors-azure-blob-usage] +===== Usage + +To use this connector as a *managed connector*, see <>. + +For additional operations, see <>. + +[discrete#es-connectors-azure-blob-configuration] +===== Configuration + +The following configuration fields are required to set up the connector: + +Account name:: +Name of Azure Blob Storage account. + +Account key:: +https://learn.microsoft.com/en-us/azure/storage/common/storage-account-keys-manage?tabs=azure-portal[Account key^] for the Azure Blob Storage account. + +Blob endpoint:: +Endpoint for the Blob Service. + +Containers:: +List of containers to index. +`*` will index all containers. + +[discrete#es-connectors-azure-blob-documents-syncs] +===== Documents and syncs + +The connector will fetch all data available in the container. + +[NOTE] +==== +* Content from files bigger than 10 MB won't be extracted. (Self-managed connectors can use the <> to handle larger binary files.) +* Permissions are not synced. +**All documents** indexed to an Elastic deployment will be visible to **all users with access** to that Elastic Deployment. +==== + +[discrete#es-connectors-azure-blob-sync-types] +====== Sync types + +<> are supported by default for all connectors. + +This connector also supports <>. + +[discrete#es-connectors-azure-sync-rules] +===== Sync rules + +<> are identical for all connectors and are available by default. + +Advanced sync rules are not available for this connector in the present version. +Currently filtering is controlled via ingest pipelines. + +[discrete#es-connectors-azure-blob-content-extraction] +===== Content extraction + +See <>. + +[discrete#es-connectors-azure-blob-known-issues] +===== Known issues + +This connector has the following known issues: + +* *`lease data` and `tier` fields are not updated in Elasticsearch indices* ++ +This is because the blob timestamp is not updated. +Refer to https://github.com/elastic/connectors-python/issues/289[Github issue^]. + +[discrete#es-connectors-azure-blob-troubleshooting] +===== Troubleshooting + +See <>. + +[discrete#es-connectors-azure-blob-security] +===== Security + +See <>. + +View the {connectors-python}/connectors/sources/azure_blob_storage.py[source code for this connector^] (branch _{connectors-branch}_, compatible with Elastic _{minor-version}_) + +// Closing the collapsible section +=============== + + +// //////// //// //// //// //// //// //// //////// +// //////// CONNECTOR CLIENT REFERENCE /////// +// //////// //// //// //// //// //// //// //////// + +[discrete#es-connectors-azure-blob-connector-client-reference] +==== *Self-managed connector* + +.View *self-managed connector* reference + +[%collapsible] +=============== + +[discrete#es-connectors-azure-blob-client-availability-prerequisites] +===== Availability and prerequisites + +This connector is available as a self-managed *self-managed connector*. +This self-managed connector is compatible with Elastic versions *8.6.0+*. +To use this connector, satisfy all <>. + +[discrete#es-connectors-azure-blob-client-compatability] +===== Compatibility + +This connector has not been tested with Azure Government. +Therefore we cannot guarantee that it will work with Azure Government endpoints. +For more information on Azure Government compared to Global Azure, refer to the + https://learn.microsoft.com/en-us/azure/azure-government/compare-azure-government-global-azure[official Microsoft documentation^]. + +[discrete#es-connectors-{service-name-stub}-create-connector-client] +===== Create {service-name} connector + +include::_connectors-create-client.asciidoc[] + +[discrete#es-connectors-azure-blob-client-usage] +===== Usage + +To use this connector as a *self-managed connector*, see <> +For additional usage operations, see <>. + +[discrete#es-connectors-azure-blob-client-configuration] +===== Configuration + +[TIP] +==== +When using the <> workflow, initially these fields will use the default configuration set in the {connectors-python}/connectors/sources/azure_blob_storage.py[connector source code^]. +These are set in the `get_default_configuration` function definition. + +These configurable fields will be rendered with their respective *labels* in the Kibana UI. +Once connected, you'll be able to update these values in Kibana. +==== + +The following configuration fields are required to set up the connector: + +`account_name`:: +Name of Azure Blob Storage account. + +`account_key`:: +https://learn.microsoft.com/en-us/azure/storage/common/storage-account-keys-manage?tabs=azure-portal[Account key^] for the Azure Blob Storage account. + +`blob_endpoint`:: +Endpoint for the Blob Service. + +`containers`:: +List of containers to index. +`*` will index all containers. + +`retry_count`:: +Number of retry attempts after a failed call. +Default value is `3`. + +`concurrent_downloads`:: +Number of concurrent downloads for fetching content. +Default value is `100`. + +`use_text_extraction_service`:: +Requires a separate deployment of the <>. Requires that ingest pipeline settings disable text extraction. +Default value is `False`. + +[discrete#es-connectors-azure-blob-client-docker] +===== Deployment using Docker + +include::_connectors-docker-instructions.asciidoc[] + +[discrete#es-connectors-azure-blob-client-documents-syncs] +===== Documents and syncs + +The connector will fetch all data available in the container. + +[NOTE] +==== +* Content from files bigger than 10 MB won't be extracted by default. You can use the <> to handle larger binary files. +* Permissions are not synced. +**All documents** indexed to an Elastic deployment will be visible to **all users with access** to that Elastic Deployment. +==== + +[discrete#es-connectors-azure-blob-client-sync-types] +====== Sync types + +<> are supported by default for all connectors. + +This connector also supports <>. + +[discrete#es-connectors-azure-blob-client-sync-rules] +===== Sync rules + +<> are identical for all connectors and are available by default. + +Advanced sync rules are not available for this connector in the present version. +Currently filtering is controlled via ingest pipelines. + +[discrete#es-connectors-azure-blob-client-content-extraction] +===== Content extraction + +See <>. + +[discrete#es-connectors-azure-blob-client-testing] +===== End-to-end testing + +The connector framework enables operators to run functional tests against a real data source. +Refer to <> for more details. + +To perform E2E testing for the Azure Blob Storage connector, run the following command: + +[source,shell] +---- +$ make ftest NAME=azure_blob_storage +---- + +For faster tests, add the `DATA_SIZE=small` flag: + +[source,shell] +---- +make ftest NAME=azure_blob_storage DATA_SIZE=small +---- + +[discrete#es-connectors-azure-blob-client-known-issues] +===== Known issues + +This connector has the following known issues: + +* *`lease data` and `tier` fields are not updated in Elasticsearch indices* ++ +This is because the blob timestamp is not updated. +Refer to https://github.com/elastic/connectors/issues/289[Github issue^]. + +[discrete#es-connectors-azure-blob-client-troubleshooting] +===== Troubleshooting + +See <>. + +[discrete#es-connectors-azure-blob-client-security] +===== Security + +See <>. + +// Closing the collapsible section +=============== diff --git a/docs/reference/connector/docs/connectors-box.asciidoc b/docs/reference/connector/docs/connectors-box.asciidoc new file mode 100644 index 0000000000000..07e4308d67c20 --- /dev/null +++ b/docs/reference/connector/docs/connectors-box.asciidoc @@ -0,0 +1,379 @@ +[#es-connectors-box] +=== Elastic Box connector reference +++++ +Box +++++ +// Attributes used in this file +:service-name: Box +:service-name-stub: box + +Th Box connector is written in Python using the {connectors-python}[Elastic connector framework^]. + +View the {connectors-python}/connectors/sources/box.py[source code for this connector^] (branch _{connectors-branch}_, compatible with Elastic _{minor-version}_). + +// //////// //// //// //// //// //// //// //////// +// //////// NATIVE CONNECTOR REFERENCE (MANAGED SERVICE) /////// +// //////// //// //// //// //// //// //// //////// + +[discrete#es-connectors-box-native-connector-reference] +==== *Elastic managed connector reference* + +.View *Elastic managed connector* reference +[%collapsible] +=============== + +[discrete#es-connectors-box-availability-and-prerequisites] +===== Availability and prerequisites + +This connector is available as a *managed connector* as of Elastic version *8.14.0*. + +To use this connector natively in Elastic Cloud, satisfy all <>. + +[NOTE] +==== +This connector is in *technical preview* and is subject to change. +The design and code is less mature than official GA features and is being provided as-is with no warranties. +Technical preview features are not subject to the support SLA of official GA features. +==== + +[discrete#es-connectors-box-create-connector-native] +===== Create a {service-name} connector +include::_connectors-create-native.asciidoc[] + +[discrete#es-connectors-box-usage] +===== Usage + +To use this connector as a *self-managed connector*, use the *Box* tile from the connectors list OR *Customized connector* workflow. + +For additional operations, see <>. + +[discrete#es-connectors-box-api-authorization] +===== Box API Authorization + +[discrete#es-connectors-box-free-account] +====== Box Free Account + +[discrete#es-connectors-box-create-oauth-custom-app] +======= Create Box User Authentication (OAuth 2.0) Custom App + +You'll need to create an OAuth app in the Box developer console by following these steps: + +1. Register a new app in the https://app.box.com/developers/console[Box dev console] with custom App and select User authentication (OAuth 2.0). +2. Add the URL of the web page in *Redirect URIs*, which is accessible by you. +3. Check "Write all files and folders stored in Box" in Application Scopes. +4. Once the app is created, *Client ID* and *Client secret* values are available in the configuration tab. Keep these handy. + +[discrete#es-connectors-box-connector-generate-a-refresh-token] +======= Generate a refresh Token + +To generate a refresh token, follow these steps: + +1. Go to the following URL, replacing `` with the *Client ID* value saved earlier. +For example: ++ +[source,bash] +---- +https://account.box.com/api/oauth2/authorize?response_type=code&client_id= +---- ++ +2. Grant access to your application. +3. You will now be redirected to the web page that you configured in *Redirect URIs*, and the HTTP response should contain an *authorization code* that you'll use to generate a refresh token. +*Note:* Authorization codes to generate refresh tokens can only be used once and are only valid for 30 seconds. +4. In your terminal, run the following `curl` command, replacing ``, ` and ` with the values you saved earlier: ++ +[source,bash] +---- +curl -i -X POST "https://api.box.com/oauth2/token" \ + -H "Content-Type: application/x-www-form-urlencoded" \ + -d "client_id=" \ + -d "client_secret=" \ + -d "code=" \ + -d "grant_type=authorization_code" +---- ++ +Save the refresh token from the response. You'll need this for the connector configuration. + +[discrete#es-connectors-box-enterprise-account] +====== Box Enterprise Account + +[discrete#es-connectors-box-connector-create-box-server-authentication-client-credentials-grant-custom-app] +======= Create Box Server Authentication (Client Credentials Grant) Custom App + +1. Register a new app in the https://app.box.com/developers/console[Box dev console] with custom App and select Server Authentication (Client Credentials Grant). +2. Check following permissions: ++ +* "Write all files and folders stored in Box" in Application Scopes +* "Make API calls using the as-user header" in Advanced Features +3. Select `App + Enterprise Access` in App Access Level. +4. Authorize your application from the admin console. +Save the *Client Credentials* and *Enterprise ID*. You'll need these to configure the connector. + +[discrete#es-connectors-box-configuration] +===== Configuration + +`Box Account` (required):: +Dropdown to determine Box Account type: `Box Free Account` or `Box Enterprise Account`. Default value is `Box Free Account`. + +`Client ID` (required):: +The Client ID to authenticate with Box instance. + +`Client Secret` (required):: +The Client Secret to authenticate with Box instance. + +`Refresh Token` (required if Box Account is Box Free):: +The Refresh Token to generate Access Token. +*NOTE:* If the process terminates, you'll need to generate a new refresh token. + +`Enterprise ID` (required if Box Account is Box Enterprise):: +The Enterprise ID to authenticate with Box instance. + +[discrete#es-connectors-box-content-extraction] +===== Content Extraction + +Refer to <>. + +[discrete#es-connectors-box-documents-and-syncs] +===== Documents and syncs + +The connector syncs the following objects and entities: + +* *Files* +* *Folders* + +[NOTE] +==== +* Files bigger than 10 MB won't be extracted. +* Permissions are not synced. *All documents* indexed to an Elastic deployment will be visible to *all users with access* to that Elastic Deployment. +==== + +[discrete#es-connectors-box-sync-types] +====== Sync types + +<> are supported by default for all connectors. + +This connector also supports <>. + +[discrete#es-connectors-box-sync-rules] +===== Sync rules + +<> are identical for all connectors and are available by default. + +[discrete#es-connectors-box-advanced-sync-rules] +===== Advanced Sync Rules + +Advanced sync rules are not available for this connector in the present version. + +[discrete#es-connectors-box-known-issues] +===== Known issues + +There are no known issues for this connector. +Refer to <> for a list of known issues for all connectors. + +[discrete#es-connectors-box-troubleshooting] +===== Troubleshooting + +See <>. + +[discrete#es-connectors-box-security] +===== Security + +See <>. + +// Closing the collapsible section +=============== + + +// //////// //// //// //// //// //// //// //////// +// //////// CONNECTOR CLIENT REFERENCE (SELF-MANAGED) /////// +// //////// //// //// //// //// //// //// //////// + +[discrete#es-connectors-box-connector-client-reference] +==== *Self-managed connector reference* + +.View *self-managed connector* reference +[%collapsible] +=============== + +[discrete#es-connectors-box-client-availability-and-prerequisites] +===== Availability and prerequisites + +This connector is available as a self-managed *self-managed connector*. To use this connector, satisfy all <>. + +[NOTE] +==== +This connector is in *technical preview* and is subject to change. +The design and code is less mature than official GA features and is being provided as-is with no warranties. +Technical preview features are not subject to the support SLA of official GA features. +==== + +[discrete#es-connectors-box-client-create-connector-client] +===== Create a {service-name} connector +include::_connectors-create-client.asciidoc[] + +[discrete#es-connectors-box-client-usage] +===== Usage + +To use this connector as a *self-managed connector*, use the *Box* tile from the connectors list OR *Customized connector* workflow. + +For additional operations, see <>. + +[discrete#es-connectors-box-client-api-authorization] +===== Box API Authorization + +[discrete#es-connectors-box-client-free-account] +====== Box Free Account + +[discrete#es-connectors-box-client-create-oauth-custom-app] +======= Create Box User Authentication (OAuth 2.0) Custom App + +You'll need to create an OAuth app in the Box developer console by following these steps: + +1. Register a new app in the https://app.box.com/developers/console[Box dev console] with custom App and select User authentication (OAuth 2.0). +2. Add the URL of the web page in *Redirect URIs*, which is accessible by you. +3. Check "Write all files and folders stored in Box" in Application Scopes. +4. Once the app is created, *Client ID* and *Client secret* values are available in the configuration tab. Keep these handy. + +[discrete#es-connectors-box-client-connector-generate-a-refresh-token] +======= Generate a refresh Token + +To generate a refresh token, follow these steps: + +1. Go to the following URL, replacing `` with the *Client ID* value saved earlier. +For example: ++ +[source,bash] +---- +https://account.box.com/api/oauth2/authorize?response_type=code&client_id= +---- ++ +2. Grant access to your application. +3. You will now be redirected to the web page that you configured in *Redirect URIs*, and the HTTP response should contain an *authorization code* that you'll use to generate a refresh token. +*Note:* Authorization codes to generate refresh tokens can only be used once and are only valid for 30 seconds. +4. In your terminal, run the following `curl` command, replacing ``, ` and ` with the values you saved earlier: ++ +[source,bash] +---- +curl -i -X POST "https://api.box.com/oauth2/token" \ + -H "Content-Type: application/x-www-form-urlencoded" \ + -d "client_id=" \ + -d "client_secret=" \ + -d "code=" \ + -d "grant_type=authorization_code" +---- ++ +Save the refresh token from the response. You'll need this for the connector configuration. + +[discrete#es-connectors-box-client-enterprise-account] +====== Box Enterprise Account + +[discrete#es-connectors-box-client-connector-create-box-server-authentication-client-credentials-grant-custom-app] +======= Create Box Server Authentication (Client Credentials Grant) Custom App + +1. Register a new app in the https://app.box.com/developers/console[Box dev console] with custom App and select Server Authentication (Client Credentials Grant). +2. Check following permissions: ++ +* "Write all files and folders stored in Box" in Application Scopes +* "Make API calls using the as-user header" in Advanced Features +3. Select `App + Enterprise Access` in App Access Level. +4. Authorize your application from the admin console. +Save the *Client Credentials* and *Enterprise ID*. You'll need these to configure the connector. + +[discrete#es-connectors-box-client-configuration] +===== Configuration + +`Box Account` (required):: +Dropdown to determine Box Account type: `Box Free Account` or `Box Enterprise Account`. Default value is `Box Free Account`. + +`Client ID` (required):: +The Client ID to authenticate with Box instance. + +`Client Secret` (required):: +The Client Secret to authenticate with Box instance. + +`Refresh Token` (required if Box Account is Box Free):: +The Refresh Token to generate Access Token. +*NOTE:* If the process terminates, you'll need to generate a new refresh token. + +`Enterprise ID` (required if Box Account is Box Enterprise):: +The Enterprise ID to authenticate with Box instance. + +[discrete#es-connectors-box-client-client-docker] +====== Deployment using Docker + +include::_connectors-docker-instructions.asciidoc[] + +[discrete#es-connectors-box-client-content-extraction] +===== Content Extraction + +Refer to <>. + +[discrete#es-connectors-box-client-documents-and-syncs] +===== Documents and syncs + +The connector syncs the following objects and entities: + +* *Files* +* *Folders* + +[NOTE] +==== +* Files bigger than 10 MB won't be extracted. +* Permissions are not synced. *All documents* indexed to an Elastic deployment will be visible to *all users with access* to that Elastic Deployment. +==== + +[discrete#es-connectors-box-client-sync-types] +====== Sync types + +<> are supported by default for all connectors. + +This connector also supports <>. + +[discrete#es-connectors-box-client-sync-rules] +===== Sync rules + +<> are identical for all connectors and are available by default. + +[discrete#es-connectors-box-client-advanced-sync-rules] +===== Advanced Sync Rules + +Advanced sync rules are not available for this connector in the present version. + +[discrete#es-connectors-box-client-end-to-end-testing] +===== End-to-end Testing + +The connector framework enables operators to run functional tests against a real data source. +Refer to <> for more details. + +To perform E2E testing for the Box connector, run the following command: + +[source,shell] +---- +$ make ftest NAME=box +---- + +For faster tests, add the `DATA_SIZE=small` flag: + +[source,shell] +---- +make ftest NAME=box DATA_SIZE=small +---- + +[discrete#es-connectors-box-client-known-issues] +===== Known issues + +There are no known issues for this connector. +Refer to <> for a list of known issues for all connectors. + +[discrete#es-connectors-box-client-troubleshooting] +===== Troubleshooting + +See <>. + +[discrete#es-connectors-box-client-security] +===== Security + +See <>. + + +// Closing the collapsible section +=============== diff --git a/docs/reference/connector/docs/connectors-confluence.asciidoc b/docs/reference/connector/docs/connectors-confluence.asciidoc new file mode 100644 index 0000000000000..61946745195fc --- /dev/null +++ b/docs/reference/connector/docs/connectors-confluence.asciidoc @@ -0,0 +1,541 @@ +[#es-connectors-confluence] +=== Elastic Confluence connector reference +++++ +Confluence +++++ +// Attributes used in this file +:service-name: Confluence +:service-name-stub: confluence + +The _Elastic Confluence connector_ is a <> for https://www.atlassian.com/software/confluence[Atlassian Confluence^]. +This connector is written in Python using the {connectors-python}[Elastic connector framework^]. + +View the {connectors-python}/connectors/sources/{service-name-stub}.py[*source code* for this connector^] (branch _{connectors-branch}_, compatible with Elastic _{minor-version}_). + +.Choose your connector reference +******************************* +Are you using a managed connector on Elastic Cloud or a self-managed connector? Expand the documentation based on your deployment method. +******************************* + +// //////// //// //// //// //// //// //// //////// +// //////// NATIVE CONNECTOR REFERENCE /////// +// //////// //// //// //// //// //// //// //////// + +[discrete#es-connectors-confluence-native-connector-reference] +==== *Elastic managed connector reference* + +.View *Elastic managed connector* reference + +[%collapsible] +=============== + +[discrete#es-connectors-confluence-availability-prerequisites] +===== Availability and prerequisites + +This connector is available as a *managed connector* on Elastic Cloud, as of *8.9.1*. + +[NOTE] +==== +Confluence Data Center support was added in 8.13.0 in technical preview and is subject to change. The design and code is less mature than official GA features and is being provided as-is with no warranties. Technical preview features are not subject to the support SLA of official GA features. +==== + +To use this connector natively in Elastic Cloud, satisfy all <>. + +[discrete#es-connectors-confluence-create-native-connector] +===== Create a {service-name} connector +include::_connectors-create-native.asciidoc[] + +[discrete#es-connectors-confluence-usage] +===== Usage + +To use this connector as a *managed connector*, see <>. + +For additional operations, see <>. + +[discrete#es-connectors-confluence-compatability] +===== Compatibility + +* Confluence Cloud or Confluence Server/Data Center *versions 7 or later*. + +[discrete#es-connectors-confluence-configuration] +===== Configuration + +The following configuration fields are required to set up the connector: + +Confluence data source:: +Dropdown to determine the Confluence platform type: `Confluence Cloud`, `Confluence Server`, or `Confluence Data Center`. Default value is `Confluence Server`. + +Confluence Data Center username:: +The username of the account for Confluence Data Center. + +Confluence Data Center password:: +The password of the account to be used for the Confluence Data Center. + +Confluence Server username:: +The username of the account for Confluence server. + +Confluence Server password:: +The password of the account to be used for Confluence Server. + +Confluence Cloud account email:: +The account email for Confluence Cloud. + +Confluence Cloud API token:: +The API Token to authenticate with Confluence cloud. + +Confluence URL label:: +The domain where the Confluence is hosted. Examples: + +* `https://192.158.1.38:8080/` +* `https://test_user.atlassian.net/` + +Confluence space keys:: +Comma-separated list of https://confluence.atlassian.com/doc/space-keys-829076188.html[Space Keys] to fetch data from Confluence server or cloud. If the value is `*`, the connector will fetch data from all spaces present in the configured `spaces`. Default value is `*`. Examples: ++ +* `EC`, `TP` +* `*` + +Enable indexing labels:: +Toggle to enable syncing of labels from pages. +NOTE: This will increase the amount of network calls to the source, and may decrease performance. + +Enable SSL:: +Whether SSL verification will be enabled. Default value is `False`. + +SSL certificate:: +Content of SSL certificate. Note: If `ssl_enabled` is `False`, the value in this field is ignored. Example certificate: ++ +[source, txt] +---- +-----BEGIN CERTIFICATE----- +MIID+jCCAuKgAwIBAgIGAJJMzlxLMA0GCSqGSIb3DQEBCwUAMHoxCzAJBgNVBAYT +... +7RhLQyWn2u00L7/9Omw= +-----END CERTIFICATE----- +---- + +Enable document level security:: +Toggle to enable <>. +When enabled, full syncs will fetch access control lists for each document and store them in the `_allow_access_control` field. +Access control syncs will fetch users' access control lists and store them in a separate index. +[NOTE] +==== +To access user data in Jira Administration, the account you created must be granted *Product Access* for Jira Administration. +This access needs to be provided by an administrator from the http://admin.atlassian.com/[Atlassian Admin], and the access level granted should be `Product Admin`. +==== + +[discrete#es-connectors-confluence-documents-syncs] +===== Documents and syncs + +The connector syncs the following Confluence object types: + +* Pages +* Spaces +* Blog Posts +* Attachments + +[NOTE] +==== +* Content from files bigger than 10 MB won't be extracted. (Self-managed connectors can use the <> to handle larger binary files.) +* Permissions are not synced by default. +You must first enable <>. +Otherwise, *all documents* indexed to an Elastic deployment will be visible to *all users with access* to that Elastic Deployment. +==== + +[discrete#es-connectors-confluence-sync-types] +====== Sync types + +<> are supported by default for all connectors. + +This connector also supports <>. + +[discrete#es-connectors-confluence-sync-rules] +===== Sync rules + +<> are identical for all connectors and are available by default. + +This connector supports <> for remote filtering. +These rules cover complex query-and-filter scenarios that cannot be expressed with = now('-5w')" + }, + { + "query": "lastmodified < startOfYear()" + } +] +---- +// NOTCONSOLE + +*Example 3*: Query for indexing only given types in a *Space* with key 'SD'. + +[source,js] +---- +[ + { + "query": "type in ('page', 'attachment') AND space.key = 'SD'" + } +] +---- +// NOTCONSOLE + +[NOTE] +==== +Syncing recently created/updated items in Confluence may be delayed when using advanced sync rules, because the search endpoint used for CQL queries returns stale results in the response. +For more details refer to the following issue in the https://jira.atlassian.com/browse/CONFCLOUD-73997[Confluence documentation^]. +==== + +[discrete#es-connectors-confluence-document-level-security] +===== Document level security + +[NOTE] +==== +DLS is automatically available for Atlassian Confluence Cloud since 8.9.0. +DLS is available since 8.14.0 for Confluence Server and Confluence Data Center, but requires installing https://marketplace.atlassian.com/apps/1217507/extender-for-confluence?tab=overview&hosting=datacenter[Extender for Confluence]. +==== + +Document level security (DLS) enables you to restrict access to documents based on a user’s permissions. +Refer to <> on this page for how to enable DLS for this connector. + + +[WARNING] +==== +When the `data_source` is set to Confluence Data Center or Server, the connector will only fetch 1000 users for access control syncs, due a https://auth0.com/docs/manage-users/user-search/retrieve-users-with-get-users-endpoint#limitations[limitation in the API used^]. +==== + +[NOTE] +==== +Refer to <> to learn how to ingest data from a connector with DLS enabled, when building a search application. +The example uses SharePoint Online as the data source, but the same steps apply to every connector. +==== + +[discrete#es-connectors-confluence-content-extraction] +===== Content Extraction + +See <>. + +[discrete#es-connectors-confluence-known-issues] +===== Known issues + +There are currently no known issues for this connector. +Refer to <> for a list of known issues for all connectors. + +[discrete#es-connectors-confluence-troubleshooting] +===== Troubleshooting + +See <>. + +[discrete#es-connectors-confluence-security] +===== Security + +See <>. + +// Closing the collapsible section +=============== + + +// //////// //// //// //// //// //// //// //////// +// //////// CONNECTOR CLIENT REFERENCE /////// +// //////// //// //// //// //// //// //// //////// + +[discrete#es-connectors-confluence-connector-client-reference] +==== *Self-managed connector* + +.View *self-managed connector* reference + +[%collapsible] +=============== + +[discrete#es-connectors-confluence-client-availability-prerequisites] +===== Availability and prerequisites + +This connector is available as a *self-managed connector* using the *Elastic connector framework*. +This self-managed connector is compatible with Elastic versions *8.7.0+*. + +[NOTE] +==== +Confluence Data Center support was added in 8.13.0 in technical preview and is subject to change. The design and code is less mature than official GA features and is being provided as-is with no warranties. Technical preview features are not subject to the support SLA of official GA features. +==== + +To use this connector, satisfy all <>. + +[discrete#es-connectors-confluence-create-connector-client] +===== Create a {service-name} connector +include::_connectors-create-client.asciidoc[] + +[discrete#es-connectors-confluence-client-usage] +===== Usage + +To use this connector as a *self-managed connector*, see <> +For additional usage operations, see <>. + +[discrete#es-connectors-confluence-client-compatability] +===== Compatibility + +* Confluence Cloud or Confluence Server/Data Center *versions 7 or later* + +[discrete#es-connectors-confluence-client-configuration] +===== Configuration + +[TIP] +==== +When using the <> workflow, initially these fields will use the default configuration set in the {connectors-python}/connectors/sources/confluence.py[connector source code^]. +These are set in the `get_default_configuration` function definition. + +These configurable fields will be rendered with their respective *labels* in the Kibana UI. +Once connected, you'll be able to update these values in Kibana. +==== + +The following configuration fields are required to set up the connector: + +`data_source`:: +Dropdown to determine the Confluence platform type: `Confluence Cloud`, `Confluence Server`, or `Confluence Data Center`. Default value is `Confluence Server`. + +`data_center_username`:: +The username of the account for Confluence Data Center. + +`data_center_password`:: +The password of the account to be used for the Confluence Data Center. + +`username`:: +The username of the account for Confluence Server. + +`password`:: +The password of the account to be used for the Confluence server. + +`account_email`:: +The account email for the Confluence Cloud. + +`api_token`:: +The API Token to authenticate with Confluence Cloud. + +`confluence_url`:: +The domain where the Confluence instance is hosted. Examples: + +* `https://192.158.1.38:8080/` +* `https://test_user.atlassian.net/` + +`spaces`:: +Comma-separated list of https://confluence.atlassian.com/doc/space-keys-829076188.html[Space Keys] to fetch data from Confluence. If the value is `*`, the connector will fetch data from all spaces present in the configured `spaces`. Default value is `*`. Examples: ++ +* `EC`, `TP` +* `*` + +`index_labels`:: +Toggle to enable syncing of labels from pages. +NOTE: This will increase the amount of network calls to the source, and may decrease performance. + +`ssl_enabled`:: +Whether SSL verification will be enabled. Default value is `False`. + +`ssl_ca`:: +Content of SSL certificate. Note: If `ssl_enabled` is `False`, the value in this field is ignored. Example certificate: ++ +[source, txt] +---- +-----BEGIN CERTIFICATE----- +MIID+jCCAuKgAwIBAgIGAJJMzlxLMA0GCSqGSIb3DQEBCwUAMHoxCzAJBgNVBAYT +... +7RhLQyWn2u00L7/9Omw= +-----END CERTIFICATE----- +---- + +`retry_count`:: +The number of retry attempts after failed request to Confluence. Default value is `3`. + +`concurrent_downloads`:: +The number of concurrent downloads for fetching the attachment content. This speeds up the content extraction of attachments. Defaults to `50`. + +`use_document_level_security`:: +Toggle to enable <>. ++ +When enabled, full syncs will fetch access control lists for each document and store them in the `_allow_access_control` field. +Access control syncs will fetch users' access control lists and store them in a separate index. ++ +[NOTE] +==== +To access user data in Jira Administration, the account you created must be granted *Product Access* for Jira Administration. +This access needs to be provided by an administrator from the http://admin.atlassian.com/[Atlassian Admin], and the access level granted should be `Product Admin`. +==== + +`use_text_extraction_service`:: +Toggle to enable the local text extraction service. Default value is `False`. +Requires a separate deployment of the Elastic Text Extraction Service. +Requires that ingest pipeline settings disable text extraction. + + +[discrete#es-connectors-confluence-client-docker] +===== Deployment using Docker + +include::_connectors-docker-instructions.asciidoc[] + +[discrete#es-connectors-confluence-client-documents-syncs] +===== Documents and syncs + +The connector syncs the following Confluence object types: + +* Pages +* Spaces +* Blog Posts +* Attachments + +[NOTE] +==== +* Content of files bigger than 10 MB won't be extracted. +* Permissions are not synced. **All documents** indexed to an Elastic deployment will be visible to **all users with access** to that Elastic Deployment. +==== + +[discrete#es-connectors-confluence-client-sync-types] +====== Sync types + +<> are supported by default for all connectors. + +This connector also supports <>. + +[discrete#es-connectors-confluence-client-sync-rules] +===== Sync rules + +<> are identical for all connectors and are available by default. + +This connector supports <> for remote filtering. +These rules cover complex query-and-filter scenarios that cannot be expressed with = now('-5w')" + }, + { + "query": "lastmodified < startOfYear()" + } +] +---- +// NOTCONSOLE + +*Example 3*: Query for indexing only given types in a *Space* with key 'SD'. + +[source,js] +---- +[ + { + "query": "type in ('page', 'attachment') AND space.key = 'SD'" + } +] +---- +// NOTCONSOLE + +[NOTE] +==== +Syncing recently created/updated items in Confluence may be delayed when using advanced sync rules, because the search endpoint used for CQL queries returns stale results in the response. +For more details refer to the following issue in the https://jira.atlassian.com/browse/CONFCLOUD-73997[Confluence documentation^]. +==== + +[discrete#es-connectors-confluence-client-document-level-security] +===== Document level security + +[NOTE] +==== +DLS is automatically available for Atlassian Confluence Cloud since 8.9.0. +DLS is available since 8.14.0 for Confluence Server and Confluence Data Center, but requires installing https://marketplace.atlassian.com/apps/1217507/extender-for-confluence?tab=overview&hosting=datacenter[Extender for Confluence]. +==== + +Document level security (DLS) enables you to restrict access to documents based on a user’s permissions. +Refer to <> on this page for how to enable DLS for this connector. + +[WARNING] +==== +When the `data_source` is set to Confluence Data Center or Server, the connector will only fetch 1000 users for access control syncs, due a https://auth0.com/docs/manage-users/user-search/retrieve-users-with-get-users-endpoint#limitations[limitation in the API used^]. +==== + +[NOTE] +==== +Refer to <> to learn how to ingest data from a connector with DLS enabled, when building a search application. +The example uses SharePoint Online as the data source, but the same steps apply to every connector. +==== + +[discrete#es-connectors-confluence-client-content-extraction] +===== Content Extraction + +See <>. + +[discrete#es-connectors-confluence-client-connector-client-operations] +===== Self-managed connector operations + +[discrete#es-connectors-confluence-client-testing] +===== End-to-end testing + +The connector framework enables operators to run functional tests against a real data source. +Refer to <> for more details. + +To perform E2E testing for the Confluence connector, run the following command: + +[source,shell] +---- +$ make ftest NAME=confluence +---- + +For faster tests, add the `DATA_SIZE=small` flag: + +[source,shell] +---- +make ftest NAME=confluence DATA_SIZE=small +---- + +[discrete#es-connectors-confluence-client-known-issues] +===== Known issues + +There are currently no known issues for this connector. +Refer to <> for a list of known issues for all connectors. + +[discrete#es-connectors-confluence-client-troubleshooting] +===== Troubleshooting + +See <>. + +[discrete#es-connectors-confluence-client-security] +===== Security + +See <>. + +// Closing the collapsible section +=============== diff --git a/docs/reference/connector/docs/connectors-content-extraction.asciidoc b/docs/reference/connector/docs/connectors-content-extraction.asciidoc new file mode 100644 index 0000000000000..cc456e76fc65e --- /dev/null +++ b/docs/reference/connector/docs/connectors-content-extraction.asciidoc @@ -0,0 +1,360 @@ +[#es-connectors-content-extraction] +=== Content extraction + +Connectors use the {ref}/attachment.html[Elastic ingest attachment processor^] to extract file contents. +The processor extracts files using the https://tika.apache.org[Apache Tika^] text extraction library. +The logic for content extraction is defined in {connectors-python}/connectors/utils.py[utils.py^]. + +While intended primarily for PDF and Microsoft Office formats, you can use any of the <>. + +Enterprise Search uses an {ref}/ingest.html[Elasticsearch ingest pipeline^] to power the web crawler's binary content extraction. +The default pipeline, `ent-search-generic-ingestion`, is automatically created when Enterprise Search first starts. + +You can {ref}/ingest.html#create-manage-ingest-pipelines[view^] this pipeline in Kibana. +Customizing your pipeline usage is also an option. +See {ref}/ingest-pipeline-search.html[Ingest pipelines for Search indices]. + +For advanced use cases, the <> can be used to extract content from files larger than 10MB. + +[discrete#es-connectors-content-extraction-supported-file-types] +==== Supported file types + +The following file types are supported: + +* `.txt` +* `.py` +* `.rst` +* `.html` +* `.markdown` +* `.json` +* `.xml` +* `.csv` +* `.md` +* `.ppt` +* `.rtf` +* `.docx` +* `.odt` +* `.xls` +* `.xlsx` +* `.rb` +* `.paper` +* `.sh` +* `.pptx` +* `.pdf` +* `.doc` + +[NOTE] +==== +The ingest attachment processor does not support compressed files, e.g., an archive file containing a set of PDFs. +Expand the archive file and make individual uncompressed files available for the connector to process. +==== + +[discrete#es-connectors-content-extraction-local] +==== Extraction Service + +[NOTE] +==== +Currently, content extraction from large files via the Extraction Service is available for a subset of our **self-managed connectors**. +It is not available for Elastic managed connectors running on Elastic Cloud. +This feature is in *beta*. +==== + +Standard content extraction is done via the Attachment Processor, through Elasticsearch Ingest Pipelines. +The self-managed connector limits file sizes for pipeline extraction to 10MB per file (Elasticsearch also has a hard limit of 100MB per file). + +For use cases that require extracting content from files larger than these limits, the *self-managed extraction service* can be used for self-managed connectors. +Instead of sending the file as an `attachment` to Elasticsearch, the file's content is extracted at the edge by the extraction service before ingestion. +The extracted text is then included as the `body` field of a document when it is ingested. + +To use this feature, you will need to do the following: + +* <> +* <> +* Set the value of the configurable field `use_text_extraction_service` to `true` + +[TIP] +==== +The data extraction service code is now available in this public repository: https://github.com/elastic/data-extraction-service. +==== + +[discrete#es-connectors-content-extraction-available-connectors] +===== Available connectors + +Local content extraction is available for the following self-managed connectors: + +include::_connectors-list-local-content-extraction.asciidoc[] + +[discrete#es-connectors-content-extraction-data-extraction-service] +===== Running the extraction service + +Self-hosted content extraction is handled by a *separate* extraction service. + +The versions for the extraction service do not align with the Elastic stack. +For version `8.11.x`, you should use extraction service version `0.3.x`. + +You can run the service with the following command: + +[source,bash] +---- +$ docker run \ + -p 8090:8090 \ + -it \ + --name extraction-service \ + docker.elastic.co/integrations/data-extraction-service:$EXTRACTION_SERVICE_VERSION +---- + +[discrete#es-connectors-extraction-service-configuration] +===== Configuring the extraction service + +You can enable your self-managed connector to use the self-hosted extraction service by adding the required configuration. +The self-managed connector determines if the extraction service is enabled by the presence of these fields in the configuration file. + +1. Open the `config.yml` configuration file in your text editor of choice. +2. Add the following fields. They can be added anywhere in the file, so long as they begin at the root level. + +[source,yaml] +---- +# data-extraction-service settings +extraction_service: + host: http://localhost:8090 +---- + +[NOTE] +==== +There is no password protection between the self-managed connector and the extraction service. +Self-hosted extraction should only be used if the two services are running on the same network and behind the same firewall. +==== + +[options="header"] +|======= +|Field|Description +|`host`|The endpoint for the extraction service. `http://localhost:8090` can be used if it is running on the same server as your self-managed connector. +|======= + +The self-managed connector will perform a preflight check against the configured `host` value at startup. +The following line will be output to the log if the data extraction service was found and is running normally. + +[source,bash] +---- +Data extraction service found at . +---- + +If you don't see this log at startup, refer to <>. + +[discrete#es-connectors-content-extraction-advanced-configuration] +====== Advanced configuration + +The following fields can be included in the configuration file. +They are optional and will fallback on default values if they are not specified. + +[source,yaml] +---- +# data-extraction-service settings +extraction_service: + host: http://localhost:8090 + timeout: 30 + use_file_pointers: false + stream_chunk_size: 65536 + shared_volume_dir: '/app/files' +---- + +[options="header"] +|======= +|Advanced Field|Description +|`timeout`|Timeout limit in seconds for content extraction. Defaults to `30` if not set. Increase this if you have very large files that timeout during content extraction. In the event of a timeout, the indexed document's `body` field will be an empty string. +|`use_file_pointers`|Whether or not to use file pointers instead of sending files to the extraction service. Defaults to `false`. Refer to <> for more details about this setting. +|`stream_chunk_size`|The size that files are chunked to facilitate streaming to extraction service, in bytes. Defaults to 65536 (64 KB). Only applicable if `use_file_pointers` is `false`. Increasing this value may speed up the connector, but will also increase memory usage. +|`shared_volume_dir`|The shared volume from which the data extraction service will extract files. Defaults to `/app/files`. Only applicable if `use_file_pointers` is `true`. +|======= + +[discrete#es-connectors-content-extraction-data-extraction-service-file-pointers] +===== Using file pointers + +The self-hosted extraction service can be set up to use file pointers instead of sending files via HTTP requests. +File pointers are faster than sending files and consume less memory, but require the connector framework and the extraction service to be able to share a file system. +This can be set up with both a dockerized and non-dockerized self-managed connector. + +[discrete#es-connectors-content-extraction-data-extraction-service-file-pointers-configuration] +====== Configuration for non-dockerized self-managed connectors + +If you are running a non-dockerized version of the self-managed connector, you need to determine the local directory where you'll download files for extraction. +This can be anywhere on your file system that you are comfortable using. +Be aware that the self-managed connector will download files with randomized filenames to this directory, so there is a chance that any files already present will be overwritten. +For that reason, we recommend using a dedicated directory for self-hosted extraction. + +[discrete#es-connectors-content-extraction-data-extraction-service-file-pointers-configuration-example] +======= Example + +1. For this example, we will be using `/app/files` as both our local directory and our container directory. +When you run the extraction service docker container, you can mount the directory as a volume using the command-line option `-v /app/files:/app/files`. ++ +[source,bash] +---- +$ docker run \ + -p 8090:8090 \ + -it \ + -v /app/files:/app/files \ + --name extraction-service \ + docker.elastic.co/integrations/data-extraction-service:$EXTRACTION_SERVICE_VERSION +---- ++ +[NOTE] +==== +Due to how this feature works in the codebase for non-dockerized setups, **the local filepath and the docker container's filepath must be identical**. +For example, if using `/app/files`, you must mount the directory as `-v /app/files:/app/files`. +If either directory is different, the self-managed connector will be unable to provide an accurate file pointer for the extraction service. This is not a factor when using a dockerized self-managed connector. +==== ++ +2. Next, before running the self-managed connector, be sure to update the config file with the correct information. ++ +[source,yaml] +---- +# data-extraction-service settings +extraction_service: + host: http://localhost:8090 + use_file_pointers: true + shared_volume_dir: '/app/files' +---- ++ +3. Then all that's left is to start the self-managed connector and run a sync. +If you encounter any unexpected errors, refer to <>. + +[discrete#es-connectors-content-extraction-data-extraction-service-file-pointers-configuration-dockerized] +====== Configuration for dockerized self-managed connectors + +When using self-hosted extraction from a dockerized self-managed connector, there are a few extra steps required on top of {connectors-python}/docs/DOCKER.md[running the self-managed connector in docker^]. + +* The self-hosted extraction service will need to share the same network that the self-managed connector and Elasticsearch are sharing. +* The self-managed connector and the extraction service will also need to share a volume. You can decide what directory inside these docker containers the volume will be mounted onto, but the directory must be the same for both docker containers. + +[discrete#es-connectors-content-extraction-data-extraction-service-file-pointers-configuration-dockerized-example] +======= Example + +1. First, set up a volume for the two docker containers to share. +This will be where files are downloaded into and then extracted from. ++ +[source,bash] +---- +$ docker volume create --name extraction-service-volume +---- ++ +2. If you haven't set up a network yet, you can create it now. ++ +[source,bash] +---- +$ docker network create elastic +---- ++ +3. Include the docker volume name and the network as arguments when running the extraction service. +For this example, we will be using `/app/files` as our container directory. ++ +[source,bash] +---- +$ docker run \ + -p 8090:8090 \ + -it \ + -v extraction-service-volume:/app/files \ + --network "elastic" \ + --name extraction-service \ + docker.elastic.co/integrations/data-extraction-service:$EXTRACTION_SERVICE_VERSION +---- ++ +4. Next, you can follow the instructions for {connectors-python}/docs/DOCKER.md[running the self-managed connector in docker^] until step `4. Update the configuration file for your self-managed connector`. +When setting up your configuration, be sure to add the following settings for the self-hosted content extraction service. +Note that the `host` will now refer to an internal docker endpoint instead of localhost. ++ +[source,yaml] +---- +# data-extraction-service settings +extraction_service: + host: http://host.docker.internal:8090 + use_file_pointers: true + shared_volume_dir: '/app/files' +---- ++ +5. Next, during step `5. Run the Docker image`, we only need to add our new shared volume in the run command using `-v extraction-service-volume:/app/files`. ++ +[source,bash] +---- +$ docker run \ + -v ~/connectors-config:/config \ + -v extraction-service-volume:/app/files \ + --network "elastic" \ + --tty \ + --rm \ + docker.elastic.co/enterprise-search/elastic-connectors:$CONNECTOR_CLIENT_VERSION \ + /app/bin/elastic-ingest \ + -c /config/config.yml +---- ++ +6. Now the self-managed connector and extraction service docker containers should be set up to share files. +Run a test sync to make sure everything is configured correctly. +If you encounter any unexpected errors, refer to <>. + +[discrete#es-connectors-content-extraction-local-logs] +===== Self-hosted extraction service logs + +The extraction service produces two different log files that may be informative when troubleshooting. +These are saved at the following file locations internally in the docker container: + +* `/var/log/openresty.log` for request traffic logs +* `/var/log/tika.log` for tikaserver jar logs + +Logs can be viewed from outside of docker by combining `docker exec` with the `tail` command. + +[source,bash] +---- +$ docker exec extraction-service /bin/sh -c "tail /var/log/openresty.log" +$ docker exec extraction-service /bin/sh -c "tail /var/log/tika.log" +---- + +[discrete#es-connectors-content-extraction-troubleshooting] +===== Troubleshooting the self-hosted extraction service + +The following warning logs may appear while using self-hosted extraction service. +Each log in this section is followed by a description of what may have happened, and suggested fixes. + +[source,bash] +---- +Extraction service is not configured, skipping its preflight check. +---- + +The configuration file is missing the `extraction_service.host` field. +If you want to use this service, check that the configuration is formatted correctly and that the required field is present. + +[source,bash] +---- +Data extraction service found at , but health-check returned . +---- + +The `/ping` endpoint returned a non-`200` response. +This could mean that the extraction service is unhealthy and may need to be restarted, or that the configured `extraction_service.host` is incorrect. +You can find more information about what happened in the <>. + +[source,bash] +---- +Expected to find a running instance of data extraction service at but failed. . +---- + +The health check returned either a timeout or client connection error. + +* A timeout may be caused by the extraction service server not running, or not being accessible from the configured `host` in the configuration file. +* A server connection error is an internal error on the extraction service. You will need to investigate the <>. + +[source,bash] +---- +Extraction service has been initialised but no extraction service configuration was found. No text will be extracted for this sync. +---- + +You have enabled self-hosted extraction service for the connector, but the configuration file is missing the `extraction_service.host` field. +Check that the configuration is formatted correctly and that the required field is present. + +[source,bash] +---- +Extraction service could not parse . Status: ; : . +---- + +This warning will appear every time a file is not extractable. +Generally the `` will provide an explanation for why extraction failed. +Contact support if the message is unclear. +When a file fails extraction, it will be indexed with an empty string in the `body` field. diff --git a/docs/reference/connector/docs/connectors-content-syncs.asciidoc b/docs/reference/connector/docs/connectors-content-syncs.asciidoc new file mode 100644 index 0000000000000..f1745382677a2 --- /dev/null +++ b/docs/reference/connector/docs/connectors-content-syncs.asciidoc @@ -0,0 +1,64 @@ +[#es-connectors-sync-types] +== Content syncs + +Elastic connectors have two types of content syncs: + +* <> +* <> + +[discrete#es-connectors-sync-types-full] +=== Full syncs + +[NOTE] +==== +We recommend running a full sync whenever <> are modified +==== + +A full sync syncs all documents in the third-party data source into {es}. + +It also deletes any documents in {es}, which no longer exist in the third-party data source. + +A full sync, by definition, takes longer than an incremental sync but it ensures full data consistency. + +A full sync is available for all connectors. + +You can <> or <> a full sync job. + +[discrete#es-connectors-sync-types-incremental] +=== Incremental syncs + +An incremental sync only syncs data changes since the last full or incremental sync. + +Incremental syncs are only available after an initial full sync has successfully completed. +Otherwise the incremental sync will fail. + +You can <> or <> an incremental sync job. + +[discrete#es-connectors-sync-types-incremental-performance] +==== Incremental sync performance + +During an incremental sync your connector will still _fetch_ all data from the third-party data source. +If data contains timestamps, the connector framework compares document ids and timestamps. +If a document already exists in {es} with the same timestamp, then this document does not need updating and will not be sent to {es}. + +The determining factor in incremental sync performance is the raw volume of data ingested. +For small volumes of data, the performance improvement using incremental syncs will be negligible. +For large volumes of data, the performance impact can be huge. +Additionally, an incremental sync is less likely to be throttled by {es}, making it more performant than a full sync when {es} is under heavy load. + +A third-party data source that has throttling and low throughput, but stores very little data in Elasticsearch, such as GitHub, Jira, or Confluence, won't see a significant performance improvement from incremental syncs. + +However, a fast, accessible third-party data source that stores huge amounts of data in {es}, such as Azure Blob Storage, Google Drive, or S3, can lead to a significant performance improvement from incremental syncs. + +[NOTE] +==== +Incremental syncs for the SharePoint Online connector use specific logic. +All other connectors use the same shared connector framework logic for incremental syncs. +==== + +[discrete#es-connectors-sync-types-incremental-supported] +==== Incremental sync availability + +Incremental syncs are available for the following connectors: + +include::_connectors-list-incremental.asciidoc[] diff --git a/docs/reference/connector/docs/connectors-docker-compose-quickstart.asciidoc b/docs/reference/connector/docs/connectors-docker-compose-quickstart.asciidoc new file mode 100644 index 0000000000000..52a8921c90ec1 --- /dev/null +++ b/docs/reference/connector/docs/connectors-docker-compose-quickstart.asciidoc @@ -0,0 +1,10 @@ +[#es-connectors-docker-compose-quickstart] +=== Docker Compose quickstart + +Use our Docker Compose quickstart to easily set up a full self-managed stack and try out Elastic Connectors. + +We've provided a script to start up and run Elasticsearch, Kibana, and Connectors instances using Docker Compose. +The script prompts you to configure your Connectors before starting. +Additionally, you can use the same set of scripts to manually configure your Connectors and run the stack. + +Refer to the https://github.com/elastic/connectors/tree/main/scripts/stack#readme[README^] in the https://github.com/elastic/connectors[elastic/connectors^] source repository for more information. diff --git a/docs/reference/connector/docs/connectors-dropbox.asciidoc b/docs/reference/connector/docs/connectors-dropbox.asciidoc new file mode 100644 index 0000000000000..1f80a0ab4e952 --- /dev/null +++ b/docs/reference/connector/docs/connectors-dropbox.asciidoc @@ -0,0 +1,580 @@ +[#es-connectors-dropbox] +=== Elastic Dropbox connector reference +++++ +Dropbox +++++ +// Attributes used in this file +:service-name: Dropbox +:service-name-stub: dropbox + +The _Elastic Dropbox connector_ is a <> for https://www.dropbox.com[Dropbox^]. +This connector is written in Python using the {connectors-python}[Elastic connector framework^]. + +View the {connectors-python}/connectors/sources/{service-name-stub}.py[*source code* for this connector^] (branch _{connectors-branch}_, compatible with Elastic _{minor-version}_). + +.Choose your connector reference +******************************* +Are you using a managed connector on Elastic Cloud or a self-managed connector? Expand the documentation based on your deployment method. +******************************* + +// //////// //// //// //// //// //// //// //////// +// //////// NATIVE CONNECTOR REFERENCE /////// +// //////// //// //// //// //// //// //// //////// + +[discrete#es-connectors-dropbox-native-connector-reference] +==== *Elastic managed connector reference* + +.View *Elastic managed connector* reference + +[%collapsible] +=============== + +[discrete#es-connectors-dropbox-availability-and-prerequisites] +===== Availability and prerequisites + +This connector is available as a *managed connector* in Elastic versions *8.10.0 and later*. + +To use this connector natively in Elastic Cloud, satisfy all <>. + +[discrete#es-connectors-dropbox-create-native-connector] +===== Create a {service-name} connector +include::_connectors-create-native.asciidoc[] + +[discrete#es-connectors-dropbox-usage] +===== Usage + +To use this connector as a *managed connector*, see <>. + +For additional operations, see <>. + +Before you can configure your connector, you'll need to: + +* <> +* <> + +[discrete#es-connectors-dropbox-dropbox-api-authorization] +===== Dropbox API Authorization + +[discrete#es-connectors-dropbox-create-dropbox-oauth-app] +====== Create Dropbox OAuth App + +You'll need to create an OAuth app in the Dropbox platform by following these steps: + +1. Register a new app in the https://www.dropbox.com/developers/apps[Dropbox App Console^]. +Select *Full Dropbox API app* and choose the following required permissions: +* `files.content.read` +* `sharing.read` ++ +To use document level security, you'll also need the following permissions: +* `team_info.read` +* `team_data.member` +* `team_data.content.read` +* `members.read` +2. Once the app is created, make note of the *app key* and *app secret* values which you'll need to configure the Dropbox connector on your Elastic deployment. + +[discrete#es-connectors-dropbox-refresh-token] +====== Generate a refresh Token + +To generate a refresh token, follow these steps: + +1. Go to the following URL, replacing `` with the *app key* value saved earlier: +`https://www.dropbox.com/oauth2/authorize?client_id=&response_type=code&token_access_type=offline` ++ +The HTTP response should contain an *authorization code* that you'll use to generate a refresh token. +An authorization code *can only be used once* to create a refresh token. ++ +2. In your terminal, run the following `cURL` command, replacing ``, `:` with the values you saved earlier: ++ +[source,shell] +---- +curl -X POST "https://api.dropboxapi.com/oauth2/token?code=&grant_type=authorization_code" -u ":" +---- +// NOTCONSOLE +Store the refresh token from the response to be used in the connector configuration. ++ +Make sure the response has a list of the following scopes: ++ +* `account_info.read` +* `files.content.read` +* `files.metadata.read` +* `sharing.read` +* `team_info.read` (if using document level security) +* `team_data.member` (if using document level security) +* `team_data.content.read` (if using document level security) +* `members.read` (if using document level security) + +[discrete#es-connectors-dropbox-configuration] +===== Configuration + +The following configuration fields are required to set up the connector: + +Path to fetch files/folders:: +The folder path to fetch files/folders from Dropbox. Default value is `/`. ++ +[Note] +==== +This field can be bypassed by advanced sync rules. +==== + +App key:: +The App Key to authenticate your Dropbox application. + +App secret:: +The App Secret to authenticate your Dropbox application. + +Refresh token:: +The refresh token to authenticate your Dropbox application. + +Enable document level security:: +Toggle to enable <>. +When enabled, full syncs will fetch access control lists for each document and store them in the `_allow_access_control` field. +Access control syncs will fetch users' access control lists and store them in a separate index. + +Include groups and inherited users:: +Appears when document level security is enabled. +Include groups and inherited users when indexing permissions. + +[WARNING] +==== +Enabling `Include groups and inherited users` will cause a signficant performance degradation. +==== + +[discrete#es-connectors-dropbox-documents-and-syncs] +===== Documents and syncs + +The connector syncs the following objects and entities: + +* *Files* +** Includes metadata such as file name, path, size, content, etc. +* *Folders* + +[NOTE] +==== +Due to a Dropbox issue, metadata updates to Paper files from Dropbox Paper are not immediately reflected in the Dropbox UI. +This delays the availability of updated results for the connector. +Once the metadata changes are visible in the Dropbox UI, the updates are available. +==== + +[NOTE] +==== +* Content from files bigger than 10 MB won't be extracted. (Self-managed connectors can use the <> to handle larger binary files.) +* Currently, the connector doesn't retrieve files from shared Team folders. +* Permissions are not synced by default. If <> is not enabled *all documents* indexed to an Elastic deployment will be visible to *all users with access* to that Elastic Deployment. +==== + +[discrete#es-connectors-dropbox-sync-types] +====== Sync types + +<> are supported by default for all connectors. + +This connector also supports <>. + +[discrete#es-connectors-dropbox-sync-rules] +===== Sync rules + +<> are identical for all connectors and are available by default. + +[discrete#es-connectors-dropbox-sync-rules-advanced] +====== Advanced sync rules + +[NOTE] +==== +A <> is required for advanced sync rules to take effect. +==== + +The following section describes <> for this connector. + +Advanced sync rules for Dropbox allow you to sync Dropbox files based on a query that matches strings in the filename. +You can optionally filter the results of the query by `file_extensions` or `file_categories`. +When both are provided, priority is given to `file_categories`. +We have some examples below for illustration. + +[discrete#es-connectors-dropbox-sync-rules-advanced-example-1] +======= Example: Query only + +[source,js] +---- +[ + { + "query": "confidential" + }, + { + "query": "dropbox" + } +] +---- +// NOTCONSOLE + +[discrete#es-connectors-dropbox-sync-rules-advanced-example-2] +======= Example: Query with file extension filter + +[source,js] +---- +[ + { + "query": "dropbox", + "options": { + "file_extensions": [ + "txt", + "pdf" + ] + } + } +] +---- +// NOTCONSOLE + +[discrete#es-connectors-dropbox-sync-rules-advanced-example-3] +======= Example: Query with file category filter + +[source,js] +---- +[ + { + "query": "test", + "options": { + "file_categories": [ + { + ".tag": "paper" + }, + { + ".tag": "png" + } + ] + } + } +] +---- +// NOTCONSOLE + +[discrete#es-connectors-dropbox-sync-rules-advanced-limitations] +======= Limitations + +* Content extraction is not supported for Dropbox *Paper* files when advanced sync rules are enabled. + +[discrete#es-connectors-dropbox-known-issues] +===== Known issues + +Refer to <> for a list of known issues for all connectors. + +[discrete#es-connectors-dropbox-troubleshooting] +===== Troubleshooting + +See <> for a list of troubleshooting tips for all connectors. + +[discrete#es-connectors-dropbox-security] +===== Security + +See <> for a list of security tips for all connectors. + +[discrete#es-connectors-dropbox-content-extraction] +===== Content extraction + +See <>. +// Closing the collapsible section +=============== + + +// //////// //// //// //// //// //// //// //////// +// //////// CONNECTOR CLIENT REFERENCE /////// +// //////// //// //// //// //// //// //// //////// + +[discrete#es-connectors-dropbox-connector-client-reference] +==== *Self-managed connector* + +.View *self-managed connector* reference + +[%collapsible] +=============== + +[discrete#es-connectors-dropbox-client-availability-and-prerequisites] +===== Availability and prerequisites + +This connector is available as a self-managed *self-managed connector*. + +This self-managed connector is compatible with Elastic versions *8.9.0*+. + +To use this connector, satisfy all <>. + +[discrete#es-connectors-dropbox-create-connector-client] +===== Create a {service-name} connector +include::_connectors-create-client.asciidoc[] + +[discrete#es-connectors-dropbox-client-usage] +===== Usage +Before you can configure your connector, you'll need to: + +* <> +* <> + + +To use this connector as a *self-managed connector*, see <> +Once set up, for additional usage operations, see <>. + +[discrete#es-connectors-dropbox-client-dropbox-api-authorization] +===== Dropbox API Authorization + +[discrete#es-connectors-dropbox-client-create-dropbox-oauth-app] +====== Create Dropbox OAuth App + +You'll need to create an OAuth app in the Dropbox platform by following these steps: + +1. Register a new app in the https://www.dropbox.com/developers/apps[Dropbox App Console^]. +Select *Full Dropbox API app* and choose the following required permissions: +* `files.content.read` +* `sharing.read` ++ +To use document level security, you'll also need the following permissions: +* `team_info.read` +* `team_data.member` +* `team_data.content.read` +* `members.read` +2. Once the app is created, make note of the *app key* and *app secret* values which you'll need to configure the Dropbox connector on your Elastic deployment. + +[discrete#es-connectors-dropbox-client-refresh-token] +====== Generate a refresh Token + +To generate a refresh token, follow these steps: + +1. Go to the following URL, replacing `` with the *app key* value saved earlier: +`https://www.dropbox.com/oauth2/authorize?client_id=&response_type=code&token_access_type=offline` ++ +The HTTP response should contain an *authorization code* that you'll use to generate a refresh token. +An authorization code *can only be used once* to create a refresh token. ++ +2. In your terminal, run the following `cURL` command, replacing ``, `:` with the values you saved earlier: ++ +[source,shell] +---- +curl -X POST "https://api.dropboxapi.com/oauth2/token?code=&grant_type=authorization_code" -u ":" +---- +// NOTCONSOLE +Store the refresh token from the response to be used in the connector configuration. ++ +Make sure the response has a list of the following scopes: ++ +* `account_info.read` +* `files.content.read` +* `files.metadata.read` +* `sharing.read` +* `team_info.read` (if using document level security) +* `team_data.member` (if using document level security) +* `team_data.content.read` (if using document level security) +* `members.read` (if using document level security) + +[discrete#es-connectors-dropbox-client-configuration] +===== Configuration + +[TIP] +==== +When using the <> workflow, initially these fields will use the default configuration set in the {connectors-python}/connectors/sources/confluence.py[connector source code^]. +These are set in the `get_default_configuration` function definition. + +These configurable fields will be rendered with their respective *labels* in the Kibana UI. +Once connected, you'll be able to update these values in Kibana. +==== + +The following configuration fields are required to set up the connector: + +`path`:: +The folder path to fetch files/folders from Dropbox. Default value is `/`. + +`app_key` (required):: +The App Key to authenticate your Dropbox application. + +`app_secret` (required):: +The App Secret to authenticate your Dropbox application. + +`refresh_token` (required):: +The refresh token to authenticate your Dropbox application. + +use_document_level_security:: +Toggle to enable <>. +When enabled, full syncs will fetch access control lists for each document and store them in the `_allow_access_control` field. +Access control syncs will fetch users' access control lists and store them in a separate index. + +`retry_count`:: +The number of retry attempts after a failed request to Dropbox. Default value is `3`. + +`concurrent_downloads`:: +The number of concurrent downloads for fetching attachment content. +This can help speed up content extraction of attachments. Defaults to `100`. + +`use_text_extraction_service`:: +Requires a separate deployment of the <>. +Requires that pipeline settings disable text extraction. +Default value is `False`. + +`use_document_level_security`:: +Toggle to enable <>. +When enabled, full syncs will fetch access control lists for each document and store them in the `_allow_access_control` field. Access control syncs will fetch users' access control lists and store them in a separate index. + +`include_inherited_users_and_groups`:: +Depends on document level security being enabled. +Include groups and inherited users when indexing permissions. + +[WARNING] +==== +Enabling `Include groups and inherited users` will cause a signficant performance degradation. +==== + +[discrete#es-connectors-dropbox-client-docker] +===== Deployment using Docker + +include::_connectors-docker-instructions.asciidoc[] + +[discrete#es-connectors-dropbox-client-documents-and-syncs] +===== Documents and syncs + +The connector syncs the following objects and entities: + +* *Files* +** Includes metadata such as file name, path, size, content, etc. +* *Folders* + +[NOTE] +==== +Due to a Dropbox issue, metadata updates to Paper files from Dropbox Paper are not immediately reflected in the Dropbox UI. +This delays the availability of updated results for the connector. +Once the metadata changes are visible in the Dropbox UI, the updates are available. +==== + +[NOTE] +==== +* Content from files bigger than 10 MB won't be extracted by default. You can use the <> to handle larger binary files. +* Currently, the connector doesn't retrieve files from shared Team folders. +* Permissions are not synced by default. If <> is not enabled *all documents* indexed to an Elastic deployment will be visible to *all users with access* to that Elastic Deployment. +==== + +[discrete#es-connectors-dropbox-client-sync-types] +====== Sync types + +<> are supported by default for all connectors. + +This connector also supports <>. + +[discrete#es-connectors-dropbox-client-sync-rules] +===== Sync rules + +<> are identical for all connectors and are available by default. + +[discrete#es-connectors-dropbox-client-sync-rules-advanced] +====== Advanced sync rules + +[NOTE] +==== +A <> is required for advanced sync rules to take effect. +==== + +The following section describes <> for this connector. + +Advanced sync rules for Dropbox allow you to sync Dropbox files based on a query that matches strings in the filename. +You can optionally filter the results of the query by `file_extensions` or `file_categories`. +When both are provided, priority is given to `file_categories`. +We have some examples below for illustration. + +[discrete#es-connectors-dropbox-client-sync-rules-advanced-example-1] +======= Example: Query only + +[source,js] +---- +[ + { + "query": "confidential" + }, + { + "query": "dropbox" + } +] +---- +// NOTCONSOLE + +[discrete#es-connectors-dropbox-client-sync-rules-advanced-example-2] +======= Example: Query with file extension filter + +[source,js] +---- +[ + { + "query": "dropbox", + "options": { + "file_extensions": [ + "txt", + "pdf" + ] + } + } +] +---- +// NOTCONSOLE + +[discrete#es-connectors-dropbox-client-sync-rules-advanced-example-3] +======= Example: Query with file category filter + +[source,js] +---- +[ + { + "query": "test", + "options": { + "file_categories": [ + { + ".tag": "paper" + }, + { + ".tag": "png" + } + ] + } + } +] +---- +// NOTCONSOLE + +[discrete#es-connectors-dropbox-client-sync-rules-advanced-limitations] +======= Limitations + +* Content extraction is not supported for Dropbox *Paper* files when advanced sync rules are enabled. + +[discrete#es-connectors-dropbox-client-end-to-end-testing] +===== End-to-end Testing + +The connector framework enables operators to run functional tests against a real data source. +Refer to <> for more details. + +To perform E2E testing for the Dropbox connector, run the following command: + +[source,shell] +---- +$ make ftest NAME=dropbox +---- + +For faster tests, add the `DATA_SIZE=small` flag: + +[source,shell] +---- +make ftest NAME=dropbox DATA_SIZE=small +---- + +[discrete#es-connectors-dropbox-client-known-issues] +===== Known issues + +Refer to <> for a list of known issues for all connectors. + +[discrete#es-connectors-dropbox-client-troubleshooting] +===== Troubleshooting + +See <> for a list of troubleshooting tips for all connectors. + +[discrete#es-connectors-dropbox-client-security] +===== Security + +See <> for a list of security tips for all connectors. + +[discrete#es-connectors-dropbox-client-content-extraction] +===== Content extraction + +See <>. + +// Closing the collapsible section +=============== diff --git a/docs/reference/connector/docs/connectors-filter-extract-transform.asciidoc b/docs/reference/connector/docs/connectors-filter-extract-transform.asciidoc new file mode 100644 index 0000000000000..278478c908bf0 --- /dev/null +++ b/docs/reference/connector/docs/connectors-filter-extract-transform.asciidoc @@ -0,0 +1,53 @@ +[#es-connectors-filter-extract-transform] +== Extract, filter, and transform content +++++ +Extract and transform +++++ + +Elastic connectors offer a number of tools for extracting, filtering, and transforming content from your third-party data sources. +Each connector has its own default logic, specific to the data source, and every Elastic Search deployment uses a default ingest pipeline to extract and transform data. +Several tools are also available for more advanced use cases. + +The following diagram provides an overview of how content extraction, sync rules, and ingest pipelines can be orchestrated in your connector's data pipeline. + +[.screenshot] +image::images/pipelines-extraction-sync-rules.png[Architecture diagram of data pipeline with content extraction, sync rules, and ingest pipelines] + +By default, only the connector specific logic (2) and the default `ent-search-generic-ingestion` pipeline (6) extract and transform your data, as configured in your deployment. + +The following tools are available for more advanced use cases: + +* *Advanced sync rules* (1). Remote filtering at the data source level, before data reaches the connector. +* *Basic sync rules* (4) or *extraction service* (3). Integration filtering controlled by the connector. +* *Ingest pipelines* (6). Customized pipeline filtering where {es} filters data _before_ indexing. + +Learn more in the following documentation links. + +[discrete#es-connectors-filter-extract-transform-content-extraction] +=== Content extraction + +Connectors have a default content extraction service, plus the <> for advanced use cases. + +Refer to <> for details. + +[discrete#es-connectors-filter-extract-transform-sync-rules] +=== Sync rules + +Use sync rules to help control which documents are synced between the third-party data source and Elasticsearch. +Sync rules enable you to filter data early in the data pipeline, which is more efficient and secure. + +* *Basic* sync rules are identical for all connectors. +* *Advanced sync rules* are data source-specific. +They cover complex query-and-filter scenarios, defined in a DSL JSON snippet. + +Refer to <> for details. + +[discrete#es-connectors-filter-extract-transform-ingest-pipelines] +=== Ingest pipelines + +Ingest pipelines are a user-defined sequence of processors that modify documents before they are indexed into Elasticsearch. +Use ingest pipelines for data enrichment, normalization, and more. + +Elastic connectors use a default ingest pipeline, which you can copy and customize to meet your needs. + +Refer to {ref}/ingest-pipeline-search.html[ingest pipelines in Search] in the {es} documentation. \ No newline at end of file diff --git a/docs/reference/connector/docs/connectors-framework.asciidoc b/docs/reference/connector/docs/connectors-framework.asciidoc new file mode 100644 index 0000000000000..b0a037d9ef468 --- /dev/null +++ b/docs/reference/connector/docs/connectors-framework.asciidoc @@ -0,0 +1,27 @@ +[#es-connectors-framework] +== Elastic connector framework: build and customize connectors +++++ +Build and customize connectors +++++ + +The Elastic connector framework enables developers to build Elastic-supported self-managed connectors which sync third-party data sources to Elasticsearch. +The framework implements common functionalities out of the box, so developers can focus on the logic specific to integrating their chosen data source. + +The framework ensures compatibility, makes it easier for our team to review PRs, and help out in the development process. +When you build using our framework, we provide a pathway for the connector to be officially supported by Elastic. + +[discrete#es-connectors-framework-use-cases] +=== Use cases + +The framework serves two distinct, but related use cases: + +* Customizing an existing Elastic <> +* Building a new self-managed connector + +[discrete#es-connectors-framework-learn-more] +=== Learn more + +To learn how to contribute connectors using the framework, refer to our https://github.com/elastic/connectors/blob/main/docs/CONTRIBUTING.md[contributing guide] in the `connectors` repository. +This guide explains how to get started and includes a contribution checklist and pull request guidelines. + +This repo contains all the source code for existing Elastic connectors. \ No newline at end of file diff --git a/docs/reference/connector/docs/connectors-github.asciidoc b/docs/reference/connector/docs/connectors-github.asciidoc new file mode 100644 index 0000000000000..aa683e4bb0829 --- /dev/null +++ b/docs/reference/connector/docs/connectors-github.asciidoc @@ -0,0 +1,697 @@ +[#es-connectors-github] +=== Elastic GitHub connector reference +++++ +GitHub +++++ +// Attributes used in this file +:service-name: GitHub +:service-name-stub: github + +The _Elastic GitHub connector_ is a <> for https://www.github.com[GitHub^]. +This connector is written in Python using the {connectors-python}[Elastic connector framework^]. + +View the {connectors-python}/connectors/sources/{service-name-stub}.py[*source code* for this connector^] (branch _{connectors-branch}_, compatible with Elastic _{minor-version}_). + +.Choose your connector reference +******************************* +Are you using a managed connector on Elastic Cloud or a self-managed connector? Expand the documentation based on your deployment method. +******************************* + +// //////// //// //// //// //// //// //// //////// +// //////// NATIVE CONNECTOR REFERENCE /////// +// //////// //// //// //// //// //// //// //////// + +[discrete#es-connectors-github-native-connector-reference] +==== *Elastic managed connector reference* + +.View *Elastic managed connector* reference + +[%collapsible] +=============== + +[discrete#es-connectors-github-availability-prerequisites] +===== Availability and prerequisites + +This connector is available as a *managed connector* as of Elastic version *8.11.0*. + +To use this connector natively in Elastic Cloud, satisfy all <>. + +[discrete#es-connectors-github-create-native-connector] +===== Create a {service-name} connector +include::_connectors-create-native.asciidoc[] + +[discrete#es-connectors-github-usage] +===== Usage + +To use this connector as a *managed connector*, see <>. + +For additional operations, see <>. + +[discrete#es-connectors-github-personal-access-token] +====== GitHub personal access token + +Configure a GitHub personal access token to fetch data from GitHub. + +Follow these steps to generate a GitHub personal access token: + +* Go to *GitHub Settings → Developer settings → Personal access tokens → Tokens(classic)*. +* Select `Generate new token`. +* Add a note and select the following scopes: +** `repo` +** `user` +** `read:org` +* Select `Generate token` and copy the token. + +[discrete#es-connectors-github-github-app] +====== GitHub App + +Configure a GitHub App to fetch data from GitHub. + +Follow these steps to create a GitHub App: + +* Go to *GitHub Settings → Developer settings → GitHub Apps*. +* Select `New GitHub App`. +* Add a name and Homepage URL, deselect `Active` under `Webhook`. +* Under `Permissions`, select `Read-only` for `Commit statuses`, `Contents`, `Issues`, `Metadata` and `Pull requests` under `Repository permissions`, select `Read-only` for `Members` under `Organization permissions`. +* Select `Any account` for `Where can this GitHub App be installed?`. +* Click `Create GitHub App`. +* Scroll down to the section `Private keys`, and click `Generate a private key`. +* Click `Install App` in the upper-left corner, select the organizations/personal accounts you want to install the GitHub App on, click `Install`. +* You can choose to install it on all repositories or selected repositories, and click `Install`. + +[discrete#es-connectors-github-compatability] +===== Compatibility + +Both GitHub and GitHub Enterprise are supported. + +[discrete#es-connectors-github-configuration] +===== Configuration + +The following configuration fields are required: + +Data source:: +Toggle between GitHub Cloud or GitHub Server. + +Server URL:: +URL of the GitHub Server instance. (GitHub Server only) + +Authentication method:: +The method to authenticate the GitHub instance. Toggle between `Personal access token` and `GitHub App`. + +Token:: +GitHub personal access token to authenticate the GitHub instance. This field is only available for `Personal access token` authentication method. + +Repository Type:: +Toggle between `Organization` and `Other`. +Note that document level security (DLS) is only available for `Organization` repositories. + +Organization Name:: +Name of the organization to fetch data from. This field is only available when `Authentication method` is set to `Personal access token` and `Repository Type` is set to `Organization`. + +App ID:: +App ID of the GitHub App. This field is only available when `Authentication method` is set to `GitHub App`. + +App private key:: +Private key generated for the GitHub App. This field is only available when `Authentication method` is set to `GitHub App`. + +List of repositories:: +Comma-separated list of repositories to fetch data from GitHub instance. If the value is `*` the connector will fetch data from all repositories present in the configured user's account. ++ +Default value is `*`. ++ +Examples: ++ +* `elasticsearch`,`elastic/kibana` +* `*` +[TIP] +==== +*Repository ownership* + +If the "OWNER/" portion of the "OWNER/REPO" repository argument is omitted, it defaults to the name of the authenticating user. + +In the examples provided here: + +* the `elasticsearch` repo synced will be the `/elasticsearch` repo +* the `kibana` repo synced will be the Elastic owned repo + +The "OWNER/" portion of the "OWNER/REPO" repository argument must be provided when `GitHub App` is selected as the `Authentication method`. +==== +[NOTE] +==== +This field can be bypassed by advanced sync rules. +==== + +Enable SSL:: +Enable SSL for the GitHub instance. + +SSL certificate:: +SSL certificate for the GitHub instance. Example: ++ +[source, txt] +---- +-----BEGIN CERTIFICATE----- +MIID+jCCAuKgAwIBAgIGAJJMzlxLMA0GCSqGSIb3DQEBCwUAMHoxCzAJBgNVBAYT +... +7RhLQyWn2u00L7/9Omw= +-----END CERTIFICATE----- +---- + +Enable document level security:: +Toggle to enable <>. +When enabled, full syncs will fetch access control lists for each document and store them in the `_allow_access_control` field. +DLS is only available when `Repository Type` is set to `Organization`. + +[discrete#es-connectors-github-documents-syncs] +===== Documents and syncs + +The connector syncs the following objects and entities: + +* **Repositories** +* **Pull Requests** +* **Issues** +* **Files & Folder** + +Only the following file extensions are ingested: + +* `.markdown` +* `.md` +* `.rst` + +[NOTE] +==== +* Content from files bigger than 10 MB won't be extracted. (Self-managed connectors can use the <> to handle larger binary files.) +* Permissions are not synced. **All documents** indexed to an Elastic deployment will be visible to **all users with access** to that Elasticsearch Index. +==== + +[discrete#es-connectors-github-sync-types] +====== Sync types + +<> are supported by default for all connectors. + +This connector also supports <>. + +[discrete#es-connectors-github-sync-rules] +===== Sync rules + +_Basic_ sync rules are identical for all connectors and are available by default. +For more information read <>. + +[discrete#es-connectors-github-sync-rules-advanced] +====== Advanced sync rules + +[NOTE] +==== +A <> is required for advanced sync rules to take effect. +==== + +The following section describes *advanced sync rules* for this connector. +Advanced sync rules are defined through a source-specific DSL JSON snippet. + +The following sections provide examples of advanced sync rules for this connector. + +[discrete#es-connectors-github-sync-rules-advanced-branch] +======= Indexing document and files based on branch name configured via branch key + +[source,js] +---- +[ + { + "repository": "repo_name", + "filter": { + "branch": "sync-rules-feature" + } + } +] +---- +// NOTCONSOLE + +[discrete#es-connectors-github-sync-rules-advanced-issue-key] +======= Indexing document based on issue query related to bugs via issue key + +[source,js] +---- +[ + { + "repository": "repo_name", + "filter": { + "issue": "is:bug" + } + } +] +---- +// NOTCONSOLE + +[discrete#es-connectors-github-sync-rules-advanced-pr-key] +======= Indexing document based on PR query related to open PR's via PR key + +[source,js] +---- +[ + { + "repository": "repo_name", + "filter": { + "pr": "is:open" + } + } +] +---- +// NOTCONSOLE + +[discrete#es-connectors-github-sync-rules-advanced-issue-query-branch-name] +======= Indexing document and files based on queries and branch name + +[source,js] +---- +[ + { + "repository": "repo_name", + "filter": { + "issue": "is:bug", + "pr": "is:open", + "branch": "sync-rules-feature" + } + } +] +---- +// NOTCONSOLE + +[NOTE] +==== +All documents pulled by a given rule are indexed regardless of whether the document has already been indexed by a previous rule. +This can lead to document duplication, but the indexed documents count will differ in the logs. +Check the Elasticsearch index for the actual document count. +==== + +[discrete#es-connectors-github-sync-rules-advanced-overlapping] +======= Advanced rules for overlapping + +[source,js] +---- +[ + { + "filter": { + "pr": "is:pr is:merged label:auto-backport merged:>=2023-07-20" + }, + "repository": "repo_name" + }, + { + "filter": { + "pr": "is:pr is:merged label:auto-backport merged:>=2023-07-15" + }, + "repository": "repo_name" + } +] +---- +// NOTCONSOLE + +[NOTE] +==== +If `GitHub App` is selected as the authentication method, the "OWNER/" portion of the "OWNER/REPO" repository argument must be provided. +==== + +[discrete#es-connectors-github-content-extraction] +===== Content Extraction + +See <>. + +[discrete#es-connectors-github-known-issues] +===== Known issues + +There are currently no known issues for this connector. +Refer to <> for a list of known issues for all connectors. + +[discrete#es-connectors-github-troubleshooting] +===== Troubleshooting + +See <>. + +[discrete#es-connectors-github-security] +===== Security + +See <>. + +// Closing the collapsible section +=============== + + +// //////// //// //// //// //// //// //// //////// +// //////// CONNECTOR CLIENT REFERENCE /////// +// //////// //// //// //// //// //// //// //////// + +[discrete#es-connectors-github-connector-client-reference] +==== *Self-managed connector* + +.View *self-managed connector* reference + +[%collapsible] +=============== + +[discrete#es-connectors-github-client-availability-prerequisites] +===== Availability and prerequisites + +This connector is available as a self-managed *self-managed connector*. + +This self-managed connector is compatible with Elastic versions *8.10.0+*. + +To use this connector, satisfy all <>. + +[discrete#es-connectors-github-create-connector-client] +===== Create a {service-name} connector +include::_connectors-create-client.asciidoc[] + +[discrete#es-connectors-github-client-usage] +===== Usage + +To use this connector as a *self-managed connector*, see <> +For additional usage operations, see <>. + +[discrete#es-connectors-github-client-personal-access-token] +====== GitHub personal access token + +Configure a GitHub personal access token to fetch data from GitHub. + +Follow these steps to generate a GitHub access token: + +* Go to *GitHub Settings → Developer settings → Personal access tokens → Tokens(classic)*. +* Select `Generate new token`. +* Add a note and select the following scopes: +** `repo` +** `user` +** `read:org` +* Select `Generate token` and copy the token. + +[discrete#es-connectors-github-client-github-app] +====== GitHub App + +Configure a GitHub App to fetch data from GitHub. + +Follow these steps to create a GitHub App: + +* Go to *GitHub Settings → Developer settings → GitHub Apps*. +* Select `New GitHub App`. +* Add a name and Homepage URL, deselect `Active` under `Webhook`. +* Under `Permissions`, select `Read-only` for `Commit statuses`, `Contents`, `Issues`, `Metadata` and `Pull requests` under `Repository permissions`, select `Read-only` for `Members` under `Organization permissions`. +* Select `Any account` for `Where can this GitHub App be installed?`. +* Click `Create GitHub App`. +* Scroll down to the section `Private keys`, and click `Generate a private key`. +* Click `Install App` in the upper-left corner, select the organizations/personal accounts you want to install the GitHub App on, click `Install`. +* You can choose to install it on all repositories or selected repositories, and click `Install`. + + +[discrete#es-connectors-github-client-compatability] +===== Compatibility + +Both GitHub and GitHub Enterprise are supported. + +[discrete#es-connectors-github-client-configuration] +===== Configuration + +[TIP] +==== +When using the <> workflow, initially these fields will use the default configuration set in the {connectors-python}/connectors/sources/github.py[connector source code^]. +These are set in the `get_default_configuration` function definition. + +These configurable fields will be rendered with their respective *labels* in the Kibana UI. +Once connected, you'll be able to update these values in Kibana. +==== + +The following configuration fields are required: + +`data_source`:: +GitHub Cloud or GitHub Server. + +`host`:: +URL of the GitHub Server instance. (GitHub Server only) + +`auth_method`:: +The method to authenticate the GitHub instance. Toggle between `Personal access token` and `GitHub App`. + +`token`:: +GitHub personal access token to authenticate the GitHub instance. This field is only available for `Personal access token` authentication method. + +`repo_type`:: +Toggle between `Organization` and `Other`. +Note that document level security (DLS) is only available for `Organization` repositories. + +`org_name`:: +Name of the organization to fetch data from. This field is only available when `Authentication method` is set to `Personal access token` and `Repository Type` is set to `Organization`. + +`app_id`:: +App ID of the GitHub App. This field is only available when `Authentication method` is set to `GitHub App`. + +`private_key`:: +Private key generated for the GitHub App. This field is only available when `Authentication method` is set to `GitHub App`. + +`repositories`:: +Comma-separated list of repositories to fetch data from GitHub instance. If the value is `*` the connector will fetch data from all repositories present in the configured user's account. ++ +Default value is `*`. ++ +Examples: ++ +* `elasticsearch`,`elastic/kibana` +* `*` +[TIP] +==== +*Repository ownership* + +If the "OWNER/" portion of the "OWNER/REPO" repository argument is omitted, it defaults to the name of the authenticating user. + +In the examples provided here: + +* the `elasticsearch` repo synced will be the `/elasticsearch` +* the `kibana` repo synced will be the Elastic owned repo + +The "OWNER/" portion of the "OWNER/REPO" repository argument must be provided when `GitHub App` is selected as the `Authentication method`. +==== +[NOTE] +==== +This field can be bypassed by advanced sync rules. +==== + +`ssl_enabled`:: +Whether SSL verification will be enabled. Default value is `False`. + +`ssl_ca`:: +Content of SSL certificate. Note: If `ssl_enabled` is `False`, the value in this field is ignored. Example certificate: ++ +[source, txt] +---- +-----BEGIN CERTIFICATE----- +MIID+jCCAuKgAwIBAgIGAJJMzlxLMA0GCSqGSIb3DQEBCwUAMHoxCzAJBgNVBAYT +... +7RhLQyWn2u00L7/9Omw= +-----END CERTIFICATE----- +---- + +`use_document_level_security`:: +Toggle to enable <>. +When enabled, full syncs will fetch access control lists for each document and store them in the `_allow_access_control` field. +DLS is only available when `Repository Type` is set to `Organization`. + +`retry_count`:: +The number of retry attempts after failed request to GitHub. Default value is `3`. + +`use_text_extraction_service`:: +Requires a separate deployment of the <>. Requires that pipeline settings disable text extraction. +Default value is `False`. + +[discrete#es-connectors-github-client-docker] +===== Deployment using Docker + +include::_connectors-docker-instructions.asciidoc[] + +[discrete#es-connectors-github-client-documents-syncs] +===== Documents and syncs + +The connector syncs the following objects and entities: + +* **Repositories** +* **Pull Requests** +* **Issues** +* **Files & Folder** + +Only the following file extensions are ingested: + +* `.markdown` +* `.md` +* `.rst` + +[NOTE] +==== +* Content of files bigger than 10 MB won't be extracted. +* Permissions are not synced. **All documents** indexed to an Elastic deployment will be visible to **all users with access** to that Elasticsearch Index. +==== + +[discrete#es-connectors-github-client-sync-types] +====== Sync types + +<> are supported by default for all connectors. + +This connector also supports <>. + +[discrete#es-connectors-github-client-sync-rules] +===== Sync rules + +_Basic_ sync rules are identical for all connectors and are available by default. +For more information read <>. + +[discrete#es-connectors-github-client-sync-rules-advanced] +====== Advanced sync rules + +[NOTE] +==== +A <> is required for advanced sync rules to take effect. +==== + +The following section describes *advanced sync rules* for this connector. +Advanced sync rules are defined through a source-specific DSL JSON snippet. + +The following sections provide examples of advanced sync rules for this connector. + +[discrete#es-connectors-github-client-sync-rules-advanced-branch] +======= Indexing document and files based on branch name configured via branch key + +[source,js] +---- +[ + { + "repository": "repo_name", + "filter": { + "branch": "sync-rules-feature" + } + } +] +---- +// NOTCONSOLE + +[discrete#es-connectors-github-client-sync-rules-advanced-issue-key] +======= Indexing document based on issue query related to bugs via issue key + +[source,js] +---- +[ + { + "repository": "repo_name", + "filter": { + "issue": "is:bug" + } + } +] +---- +// NOTCONSOLE + +[discrete#es-connectors-github-client-sync-rules-advanced-pr-key] +======= Indexing document based on PR query related to open PR's via PR key + +[source,js] +---- +[ + { + "repository": "repo_name", + "filter": { + "pr": "is:open" + } + } +] +---- +// NOTCONSOLE + +[discrete#es-connectors-github-client-sync-rules-advanced-issue-query-branch-name] +======= Indexing document and files based on queries and branch name + +[source,js] +---- +[ + { + "repository": "repo_name", + "filter": { + "issue": "is:bug", + "pr": "is:open", + "branch": "sync-rules-feature" + } + } +] +---- +// NOTCONSOLE + +[NOTE] +==== +All documents pulled by a given rule are indexed regardless of whether the document has already been indexed by a previous rule. +This can lead to document duplication, but the indexed documents count will differ in the logs. +Check the Elasticsearch index for the actual document count. +==== + +[discrete#es-connectors-github-client-sync-rules-advanced-overlapping] +======= Advanced rules for overlapping + +[source,js] +---- +[ + { + "filter": { + "pr": "is:pr is:merged label:auto-backport merged:>=2023-07-20" + }, + "repository": "repo_name" + }, + { + "filter": { + "pr": "is:pr is:merged label:auto-backport merged:>=2023-07-15" + }, + "repository": "repo_name" + } +] +---- +// NOTCONSOLE + +[NOTE] +==== +If `GitHub App` is selected as the authentication method, the "OWNER/" portion of the "OWNER/REPO" repository argument must be provided. +==== + +[discrete#es-connectors-github-client-content-extraction] +===== Content Extraction + +See <>. + +[discrete#es-connectors-github-client-connector-client-operations] +===== Self-managed connector operations + +[discrete#es-connectors-github-client-testing] +===== End-to-end testing + +The connector framework enables operators to run functional tests against a real data source. +Refer to <> for more details. + +To perform E2E testing for the GitHub connector, run the following command: + +[source,shell] +---- +$ make ftest NAME=github +---- + +For faster tests, add the `DATA_SIZE=small` flag: + +[source,shell] +---- +make ftest NAME=github DATA_SIZE=small +---- + +[discrete#es-connectors-github-client-known-issues] +===== Known issues + +There are currently no known issues for this connector. +Refer to <> for a list of known issues for all connectors. + +[discrete#es-connectors-github-client-troubleshooting] +===== Troubleshooting + +See <>. + +[discrete#es-connectors-github-client-security] +===== Security + +See <>. + +// Closing the collapsible section +=============== diff --git a/docs/reference/connector/docs/connectors-gmail.asciidoc b/docs/reference/connector/docs/connectors-gmail.asciidoc new file mode 100644 index 0000000000000..594df7b9e681a --- /dev/null +++ b/docs/reference/connector/docs/connectors-gmail.asciidoc @@ -0,0 +1,366 @@ +[#es-connectors-gmail] +=== Elastic Gmail connector reference +++++ +Gmail +++++ +// Attributes used in this file +:service-name: Gmail +:service-name-stub: gmail + +The _Elastic GMail connector_ is a <> for GMail. + + +// //////// //// //// //// //// //// //// //////// +// //////// NATIVE CONNECTOR REFERENCE (MANAGED SERVICE) /////// +// //////// //// //// //// //// //// //// //////// + +[discrete#es-connectors-gmail-native-connector-reference] +==== *Elastic managed connector reference* + +.View *Elastic managed connector* reference +[%collapsible] +=============== + +[discrete#es-connectors-gmail-availability] +===== Availability and prerequisites + +This connector is available as a *managed connector* (managed service) in Elastic Cloud. + +This connector is compatible with Elastic versions *8.13.0+*. + +To use this connector, satisfy all <>. + +[discrete#es-connectors-gmail-create-native-connector] +==== Create a {service-name} connector +include::_connectors-create-native.asciidoc[] + +[discrete#es-connectors-gmail-usage] +===== Usage + +To use this connector as a managed connector in Elastic Cloud, use the *Connectors* workflow in the Kibana UI. + +To create a new {service-name} connector: + +. Navigate to *Search -> Connectors* page in the Kibana UI. +. Select the *New Native Connector* button. +. Select the *{service-name}* connector. + +For additional operations, see <>. + +[discrete#es-connectors-gmail-connector-authentication-prerequisites] +===== Connector authentication prerequisites + +Before syncing any data from GMail, you need to create a https://cloud.google.com/iam/docs/service-account-overview[service account^] with appropriate access to the GMail and the Google Directory API, which is part of the Google Admin SDK API. +You also need to enable domain-wide delegation to impersonate the users you're fetching messages from. + +To get started, log into https://cloud.google.com[Google Cloud Platform^] and go to the `Console`. + +. *Create a Google Cloud Project.* Give your project a name, change the project ID and click the Create button. +. *Enable Google APIs.* Choose APIs & Services from the left menu and click on `Enable APIs and Services`. You need to enable *GMail API* and the *Google Admin SDK API*. +. *Create a Service Account.* In the `APIs & Services` section, click on `Credentials` and click on `Create credentials` to create a service account. Give your service account a name and a service account ID. This is like an email address and will be used to identify your service account in the future. Click `Done` to finish creating the service account. ++ +Your service account needs to have access to at least the following scope: ++ +* `https://www.googleapis.com/auth/gmail.readonly` +. *Create a Key File*. + * In the Cloud Console, go to `IAM and Admin` > `Service accounts` page. + * Click the email address of the service account that you want to create a key for. + * Click the `Keys` tab. Click the `Add key` drop-down menu, then select `Create new key`. + * Select JSON as the Key type and then click `Create`. This will download a JSON file that will contain the service account credentials. + +. *Google Workspace domain-wide delegation of authority*. ++ +To access user data like messages on a Google Workspace domain, the service account that you created needs to be granted access by a super administrator for the domain. You can follow https://developers.google.com/cloud-search/docs/guides/delegation[the official documentation^] to perform Google Workspace domain-wide delegation of authority. ++ +You need to grant the following *OAuth Scopes* to your service account: ++ +-- +* `https://www.googleapis.com/auth/admin.directory.user.readonly` +-- ++ +This step allows the connector to access user data and their group memberships in your Google Workspace organization. + +[discrete#es-connectors-gmail-configuration] +===== Configuration + +The following configuration fields are required: + +GMail service account JSON:: +The service account credentials generated from Google Cloud Platform (JSON string). +Refer to the https://developers.google.com/workspace/guides/create-credentials#create_credentials_for_a_service_account[Google Cloud documentation^] for more information. + +Google Workspace admin email:: +Google Workspace admin email. +Required to enable document level security (DLS). +A service account with delegated authority can impersonate an admin user with permissions to access Google Workspace user data and their group memberships. +Refer to the https://support.google.com/a/answer/162106?hl=en[Google Cloud documentation^] for more information. + +Google customer ID:: +Google customer id. +Required to fetch messages and to enable document level security (DLS). +Go to `Google Workspace Admin Console` -> `Account` and copy the value under `Customer Id`. + +Include spam and trash emails:: +Toggle to fetch spam and trash emails. +Also works with document level security (DLS). + +Enable document level security:: +Toggle to enable <>. +DLS is supported for the GMail connector. +When enabled: ++ +* Full syncs will fetch access control lists for each document and store them in the `_allow_access_control` field. +* Access control syncs will fetch users' access control lists and store them in a separate index. + +[discrete#es-connectors-gmail-documents-and-syncs] +===== Documents and syncs + +The connector will fetch all messages of all users the service account has access to. + +[discrete#es-connectors-gmail-sync-types] +====== Sync types + +<> are supported by default for all connectors. + +This connector also supports <>. + +[discrete#es-connectors-gmail-sync-rules] +===== Sync rules + +<> are identical for all connectors and are available by default. + +Advanced sync rules are available for this connector. +The connector supports the https://support.google.com/mail/answer/7190[GMail advanced search syntax] under the `messages` field. + +For example: + +[source,js] +---- +{ + "messages": [ + "before:2021/10/10", + "from:amy" + ] +} +---- +// NOTCONSOLE + +[discrete#es-connectors-gmail-document-level-security] +===== Document level security + +Document level security (DLS) enables you to restrict access to documents based on a user’s permissions. +Refer to <> on this page for how to enable DLS for this connector. + +[NOTE] +==== +Refer to <> to learn how to ingest data from a connector with DLS enabled, when building a search application. +The example uses SharePoint Online as the data source, but the same steps apply to every connector. +==== + +[discrete#es-connectors-gmail-known-issues] +===== Known issues + +There are currently no known issues for this connector. + +[discrete#es-connectors-gmail-troubleshooting] +===== Troubleshooting + +See <>. + +[discrete#es-connectors-gmail-security] +===== Security + +See <>. + +[discrete#es-connectors-gmail-framework-and-source] +===== Framework and source + +This connector is built in Python with the {connectors-python}[Elastic connector framework^]. + +View the {connectors-python}/connectors/sources/gmail.py[source code for this connector^] (branch _{connectors-branch}_, compatible with Elastic _{minor-version}_). + + +// Closing the collapsible section +=============== + + +// //////// //// //// //// //// //// //// //////// +// //////// CONNECTOR CLIENT REFERENCE (SELF-MANAGED) /////// +// //////// //// //// //// //// //// //// //////// + +[discrete#es-connectors-gmail-connector-client-reference] +==== *Self-managed connector reference* + +.View *self-managed connector* reference +[%collapsible] +=============== + +[discrete#es-connectors-gmail-client-availability] +===== Availability and prerequisites + +This connector is available as a self-managed *self-managed connector* from the *Elastic connector framework*. + +This self-managed connector is compatible with Elastic versions *8.10.0+*. + +To use this connector, satisfy all <>. + +[discrete#es-connectors-gmail-create-connector-client] +===== Create a {service-name} connector +include::_connectors-create-client.asciidoc[] + +[discrete#es-connectors-gmail-client-usage] +===== Usage + +To use this connector as a **self-managed connector**, use the *Connector* workflow in the Kibana UI. + +For additional operations, see <>. + +[discrete#es-connectors-gmail-client-connector-authentication-prerequisites] +===== Connector authentication prerequisites + +Before syncing any data from GMail, you need to create a https://cloud.google.com/iam/docs/service-account-overview[service account^] with appropriate access to the GMail and the Google Directory API, which is part of the Google Admin SDK API. +You also need to enable domain-wide delegation to impersonate the users you're fetching messages from. + +To get started, log into https://cloud.google.com[Google Cloud Platform^] and go to the `Console`. + +. *Create a Google Cloud Project.* Give your project a name, change the project ID and click the Create button. +. *Enable Google APIs.* Choose APIs & Services from the left menu and click on `Enable APIs and Services`. You need to enable *GMail API* and the *Google Admin SDK API*. +. *Create a Service Account.* In the `APIs & Services` section, click on `Credentials` and click on `Create credentials` to create a service account. Give your service account a name and a service account ID. This is like an email address and will be used to identify your service account in the future. Click `Done` to finish creating the service account. ++ +Your service account needs to have access to at least the following scope: ++ +* `https://www.googleapis.com/auth/gmail.readonly` +. *Create a Key File*. + * In the Cloud Console, go to `IAM and Admin` > `Service accounts` page. + * Click the email address of the service account that you want to create a key for. + * Click the `Keys` tab. Click the `Add key` drop-down menu, then select `Create new key`. + * Select JSON as the Key type and then click `Create`. This will download a JSON file that will contain the service account credentials. + +. *Google Workspace domain-wide delegation of authority*. ++ +To access user data like messages on a Google Workspace domain, the service account that you created needs to be granted access by a super administrator for the domain. You can follow https://developers.google.com/cloud-search/docs/guides/delegation[the official documentation^] to perform Google Workspace domain-wide delegation of authority. ++ +You need to grant the following *OAuth Scopes* to your service account: ++ +-- +* `https://www.googleapis.com/auth/admin.directory.user.readonly` +-- ++ +This step allows the connector to access user data and their group memberships in your Google Workspace organization. + +[discrete#es-connectors-gmail-client-configuration] +===== Configuration + +[TIP] +==== +When using the <> workflow, initially these fields will use the default configuration set in the {connectors-python}/connectors/sources/gmail.py[connector source code^]. +These are set in the `get_default_configuration` function definition. + +These configurable fields will be rendered with their respective *labels* in the Kibana UI. +Once connected, you'll be able to update these values in Kibana. +==== + +The following configuration fields are required: + +`GMail service account JSON`:: +The service account credentials generated from Google Cloud Platform (JSON string). +Refer to the https://developers.google.com/workspace/guides/create-credentials#create_credentials_for_a_service_account[Google Cloud documentation^] for more information. + +`Google Workspace admin email`:: +Google Workspace admin email. +Required to enable document level security (DLS). +A service account with delegated authority can impersonate an admin user with permissions to access Google Workspace user data and their group memberships. +Refer to the https://support.google.com/a/answer/162106?hl=en[Google Cloud documentation^] for more information. + +`Google customer id`:: +Google customer id. +Required to fetch messages and to enable document level security (DLS). +Go to `Google Workspace Admin Console` -> `Account` and copy the value under `Customer Id`. + +`Include spam and trash emails`:: +Toggle to fetch spam and trash emails. +Also works with DLS. + +`Enable document level security`:: +Toggle to enable <>. +DLS is supported for the GMail connector. +When enabled: ++ +* Full syncs will fetch access control lists for each document and store them in the `_allow_access_control` field. +* Access control syncs will fetch users' access control lists and store them in a separate index. + +[discrete#es-connectors-gmail-client-deployment-using-docker] +===== Deployment using Docker + +include::_connectors-docker-instructions.asciidoc[] + +[discrete#es-connectors-gmail-client-documents-and-syncs] +===== Documents and syncs + +The connector will fetch all messages of all users the service account has access to. + +[discrete#es-connectors-gmail-client-sync-types] +====== Sync types + +<> are supported by default for all connectors. + +This connector also supports <>. + +[discrete#es-connectors-gmail-client-sync-rules] +===== Sync rules + +<> are identical for all connectors and are available by default. + +Advanced sync rules are available for this connector. +The connector supports the https://support.google.com/mail/answer/7190[GMail advanced search syntax] under the `messages` field. + +For example: + +[source,js] +---- +{ + "messages": [ + "before:2021/10/10", + "from:amy" + ] +} +---- +// NOTCONSOLE + +[discrete#es-connectors-gmail-client-document-level-security] +===== Document level security + +Document level security (DLS) enables you to restrict access to documents based on a user’s permissions. +Refer to <> on this page for how to enable DLS for this connector. + +[NOTE] +==== +Refer to <> to learn how to ingest data from a connector with DLS enabled, when building a search application. +The example uses SharePoint Online as the data source, but the same steps apply to every connector. +==== + +[discrete#es-connectors-gmail-client-known-issues] +===== Known issues + +There are currently no known issues for this connector. + +[discrete#es-connectors-gmail-client-troubleshooting] +===== Troubleshooting + +See <>. + +[discrete#es-connectors-gmail-client-security] +===== Security + +See <>. + +[discrete#es-connectors-gmail-client-framework-and-source] +===== Framework and source + +This connector is built in Python with the {connectors-python}[Elastic connector framework^]. + +View the {connectors-python}/connectors/sources/gmail.py[source code for this connector^] (branch _{connectors-branch}_, compatible with Elastic _{minor-version}_). + + +// Closing the collapsible section +=============== diff --git a/docs/reference/connector/docs/connectors-google-cloud.asciidoc b/docs/reference/connector/docs/connectors-google-cloud.asciidoc new file mode 100644 index 0000000000000..64fcb82b19ab7 --- /dev/null +++ b/docs/reference/connector/docs/connectors-google-cloud.asciidoc @@ -0,0 +1,266 @@ +[#es-connectors-google-cloud] +=== Google Cloud Storage Connector +++++ +Google Cloud Storage +++++ + +// Attributes used in this file +:service-name: Google Cloud Storage +:service-name-stub: google_cloud_storage + +The _Elastic Google Cloud Storage connector_ is a <> for https://cloud.google.com/storage[Google Cloud Storage^] data sources. + + +// //////// //// //// //// //// //// //// //////// +// //////// NATIVE CONNECTOR REFERENCE (MANAGED SERVICE) /////// +// //////// //// //// //// //// //// //// //////// + +[discrete#es-connectors-google-cloud-native-connector-reference] +==== *Elastic managed connector reference* + +.View *Elastic managed connector* reference +[%collapsible] +=============== + +[discrete#es-connectors-google-cloud-availability-prerequisites] +===== Availability and prerequisites + +This connector is available natively in Elastic Cloud since *8.12.0.* +To use this connector in Elastic Cloud, satisfy all <>. + +[discrete#es-connectors-google-cloud-usage] +===== Usage + +The Google Cloud Storage service account must have (at least) the following scopes and roles: + +* `resourcemanager.projects.get` +* `serviceusage.services.use` +* `storage.buckets.list` +* `storage.objects.list` +* `storage.objects.get` + +Google Cloud Storage service account credentials are stored in a JSON file. + +[discrete#es-connectors-google-cloud-configuration] +===== Configuration + +The following configuration field is required to set up the connector: + +Buckets:: +List of buckets to index. +`*` will index all buckets. + +Google Cloud service account JSON:: +The service account credentials generated from Google Cloud Storage (JSON string). +Refer to the https://developers.google.com/workspace/guides/create-credentials#create_credentials_for_a_service_account[Google Cloud documentation^] for more information. + +[discrete#es-connectors-google-cloud-documents-syncs] +===== Documents and syncs + +The connector will fetch all buckets and paths the service account has access to. + +The `Owner` field is not fetched as `read_only` scope doesn’t allow the connector to fetch IAM information. + +[NOTE] +==== +* Content from files bigger than 10 MB won't be extracted. (Self-managed connectors can use the <> to handle larger binary files.) +* Permission are not synced. All documents indexed to an Elastic deployment will be visible to all users with access to that Elastic Deployment. +==== + +[discrete#es-connectors-google-cloud-sync-types] +====== Sync types + +<> are supported by default for all connectors. + +This connector also supports <>. + +[discrete#es-connectors-google-cloud-sync-rules] +===== Sync rules + +<> are identical for all connectors and are available by default. + +Advanced sync rules are not available for this connector in the present version. +Currently filtering is controlled by ingest pipelines. + +[discrete#es-connectors-google-cloud-content-extraction] +===== Content extraction + +See <>. + +[source,shell] +---- +$ make ftest NAME=google_cloud_storage +---- + +For faster tests, add the `DATA_SIZE=small` flag: + +[source,shell] +---- +make ftest NAME=google_cloud_storage DATA_SIZE=small +---- + +[discrete#es-connectors-google-cloud-known-issues] +===== Known issues + +There are currently no known issues for this connector. + +[discrete#es-connectors-google-cloud-troubleshooting] +===== Troubleshooting + +See <>. + +[discrete#es-connectors-google-cloud-security] +===== Security + +See <>. + +[discrete#es-connectors-google-cloud-source] +===== Framework and source + +This connector is built with the {connectors-python}[Elastic connector framework^]. + +View the {connectors-python}/connectors/sources/google_cloud_storage.py[source code for this connector^] (branch _{connectors-branch}_, compatible with Elastic _{minor-version}_). + +// Closing the collapsible section +=============== + + +// //////// //// //// //// //// //// //// //////// +// //////// CONNECTOR CLIENT REFERENCE (SELF-MANAGED) /////// +// //////// //// //// //// //// //// //// //////// + +[discrete#es-connectors-google-cloud-connector-client-reference] +==== *Self-managed connector reference* + +.View *self-managed connector* reference +[%collapsible] +=============== + +[discrete#es-connectors-google-cloud-client-availability-prerequisites] +===== Availability and prerequisites + +This connector is available as a self-managed *self-managed connector*. +This self-managed connector is compatible with Elastic versions *8.6.0+*. +To use this connector, satisfy all <>. + +[discrete#es-connectors-google-cloud-client-usage] +===== Usage + +The Google Cloud Storage service account must have (at least) the following scopes and roles: + +* `resourcemanager.projects.get` +* `serviceusage.services.use` +* `storage.buckets.list` +* `storage.objects.list` +* `storage.objects.get` + +Google Cloud Storage service account credentials are stored in a JSON file. + +[discrete#es-connectors-google-cloud-client-configuration] +===== Configuration + +[TIP] +==== +When using the <> workflow, initially these fields will use the default configuration set in the {connectors-python}/connectors/sources/google_cloud_storage.py[connector source code^]. +These are set in the `get_default_configuration` function definition. + +These configurable fields will be rendered with their respective *labels* in the Kibana UI. +Once connected, you'll be able to update these values in Kibana. +==== + +The following configuration fields are required to set up the connector: + +`buckets`:: +List of buckets to index. +`*` will index all buckets. + +`service_account_credentials`:: +The service account credentials generated from Google Cloud Storage (JSON string). +Refer to the https://developers.google.com/workspace/guides/create-credentials#create_credentials_for_a_service_account[Google Cloud documentation^] for more information. + +`retry_count`:: +The number of retry attempts after a failed call to Google Cloud Storage. +Default value is `3`. + +[discrete#es-connectors-google-cloud-client-docker] +===== Deployment using Docker + +include::_connectors-docker-instructions.asciidoc[] + +[discrete#es-connectors-google-cloud-client-documents-syncs] +===== Documents and syncs + +The connector will fetch all buckets and paths the service account has access to. + +The `Owner` field is not fetched as `read_only` scope doesn’t allow the connector to fetch IAM information. + +[NOTE] +==== +* Content from files bigger than 10 MB won't be extracted by default. You can use the <> to handle larger binary files. +* Permission are not synced. All documents indexed to an Elastic deployment will be visible to all users with access to that Elastic Deployment. +==== + +[discrete#es-connectors-google-cloud-client-sync-types] +====== Sync types + +<> are supported by default for all connectors. + +This connector also supports <>. + +[discrete#es-connectors-google-cloud-client-sync-rules] +===== Sync rules + +<> are identical for all connectors and are available by default. + +Advanced sync rules are not available for this connector in the present version. +Currently filtering is controlled by ingest pipelines. + +[discrete#es-connectors-google-cloud-client-content-extraction] +===== Content extraction + +See <>. + +[discrete#es-connectors-google-cloud-client-client-operations-testing] +===== End-to-end testing + +The connector framework enables operators to run functional tests against a real data source. +Refer to <> for more details. + +To perform E2E testing for the Google Cloud Storage connector, run the following command: + +[source,shell] +---- +$ make ftest NAME=google_cloud_storage +---- + +For faster tests, add the `DATA_SIZE=small` flag: + +[source,shell] +---- +make ftest NAME=google_cloud_storage DATA_SIZE=small +---- + +[discrete#es-connectors-google-cloud-client-known-issues] +===== Known issues + +There are currently no known issues for this connector. + +[discrete#es-connectors-google-cloud-client-troubleshooting] +===== Troubleshooting + +See <>. + +[discrete#es-connectors-google-cloud-client-security] +===== Security + +See <>. + +[discrete#es-connectors-google-cloud-client-source] +===== Framework and source + +This connector is built with the {connectors-python}[Elastic connector framework^]. + +View the {connectors-python}/connectors/sources/google_cloud_storage.py[source code for this connector^] (branch _{connectors-branch}_, compatible with Elastic _{minor-version}_). + +// Closing the collapsible section +=============== diff --git a/docs/reference/connector/docs/connectors-google-drive.asciidoc b/docs/reference/connector/docs/connectors-google-drive.asciidoc new file mode 100644 index 0000000000000..d3c4a0886efc3 --- /dev/null +++ b/docs/reference/connector/docs/connectors-google-drive.asciidoc @@ -0,0 +1,409 @@ +[#es-connectors-google-drive] +=== Elastic Google Drive connector reference +++++ +Google Drive +++++ +// Attributes used in this file +:service-name: Google Drive +:service-name-stub: google_drive + +The _Elastic Google Drive connector_ is a <> for https://www.google.com/drive[Google Drive^]. +This connector is written in Python using the {connectors-python}[Elastic connector framework^]. + +View the {connectors-python}/connectors/sources/{service-name-stub}.py[*source code* for this connector^] (branch _{connectors-branch}_, compatible with Elastic _{minor-version}_). + +.Choose your connector reference +******************************* +Are you using a managed connector on Elastic Cloud or a self-managed connector? Expand the documentation based on your deployment method. +******************************* + +// //////// //// //// //// //// //// //// //////// +// //////// NATIVE CONNECTOR REFERENCE /////// +// //////// //// //// //// //// //// //// //////// + +[discrete#es-connectors-google-drive-native-connector-reference] +==== *Elastic managed connector reference* + +.View *Elastic managed connector* reference + +[%collapsible] +=============== + +[discrete#es-connectors-google-drive-availability-and-prerequisites] +===== Availability and prerequisites + +This connector is available as a *managed connector* as of Elastic version *8.11.0*. + +To use this connector natively in Elastic Cloud, satisfy all <>. + +[discrete#es-connectors-google-drive-usage] +===== Usage + +To use this connector natively in Elastic Cloud, see <>. + +For additional operations, see <>. + +[discrete#es-connectors-google-drive-connector-authentication-prerequisites] +===== Connector authentication prerequisites + +Before syncing any data from Google Drive, you need to create a https://cloud.google.com/iam/docs/service-account-overview[service account^] with appropriate access to Google Drive API. + +To get started, log into https://cloud.google.com[Google Cloud Platform^] and go to the `Console`. + +. *Create a Google Cloud Project.* Give your project a name, change the project ID and click the Create button. + +. *Enable Google APIs.* Choose APIs & Services from the left menu and click on `Enable APIs and Services`. You need to enable the *Drive API*. + +. *Create a Service Account.* In the `APIs & Services` section, click on `Credentials` and click on `Create credentials` to create a service account. Give your service account a name and a service account ID. This is like an email address and will be used to identify your service account in the future. Click `Done` to finish creating the service account. Your service account needs to have access to at least the following scope: +- `https://www.googleapis.com/auth/drive.readonly` + +. *Create a Key File*. + - In the Cloud Console, go to `IAM and Admin` > `Service accounts` page. + - Click the email address of the service account that you want to create a key for. + - Click the `Keys` tab. Click the `Add key` drop-down menu, then select `Create new key`. + - Select JSON as the Key type and then click `Create`. This will download a JSON file that will contain the service account credentials. + +. *[Optional] Share Google Drive Folders.* If you use domain-wide delegation for syncing data you can skip this step. Go to your Google Drive. Right-click the folder or shared drive, choose `Share` and add the email address of the service account you created in step 3. as a viewer to this folder. + +[NOTE] +==== +When you grant a service account access to a specific folder or shared drive in Google Drive, it's important to note that the permissions extend to all the children within that folder or drive. +This means that any folders or files contained within the granted folder or drive inherit the same access privileges as the parent. +==== + +[discrete#es-connectors-google-drive-additional-prerequisites-for-domain-wide-delegation] +====== Additional authentication prerequisites for domain-wide delegation + +This step is *required* when *Use domain-wide delegation for data sync* or *Enable document level security* configuration option is enabled. + +. *Enable Google APIs*. ++ +Choose APIs & Services from the left menu and click on `Enable APIs and Services`. You need to enable the *Admin SDK API* and *Drive API*. + +. *Google Workspace domain-wide delegation of authority*. ++ +To access drive and user data in a Google Workspace domain, the service account that you created needs to be granted access by a super administrator for the domain. You can follow https://developers.google.com/cloud-search/docs/guides/delegation[the official documentation^] to perform Google Workspace domain-wide delegation of authority. ++ +You need to grant the following *OAuth Scopes* to your service account: ++ +-- +* `https://www.googleapis.com/auth/admin.directory.group.readonly` +* `https://www.googleapis.com/auth/admin.directory.user.readonly` +* `https://www.googleapis.com/auth/drive.readonly` +* `https://www.googleapis.com/auth/drive.metadata.readonly` +-- ++ +This step allows the connector to: + +* access user data and their group memberships in a Google Workspace organization +* access Google Drive data in drives associated to Google Workspace members + +[discrete#es-connectors-google-drive-configuration] +===== Configuration + +The following configuration fields are required: + +Google Drive service account JSON:: +The service account credentials generated from Google Cloud Platform (JSON string). +Refer to the https://developers.google.com/workspace/guides/create-credentials#create_credentials_for_a_service_account[Google Cloud documentation^] for more information. + +Enable document level security:: +Toggle to enable <>. +DLS is supported for the Google Drive connector. +When enabled: ++ +* Full syncs will fetch access control lists for each document and store them in the `_allow_access_control` field. +* Access control syncs will fetch users' access control lists and store them in a separate index. + +Google Workspace admin email:: +Google Workspace admin email. +Required to enable document level security (DLS) or domain-wide delegation for data sync. +A service account with delegated authority can impersonate an admin user with permissions to access Google Workspace user data and their group memberships. +Refer to the https://support.google.com/a/answer/162106?hl=en[Google Cloud documentation^] for more information. + +[discrete#es-connectors-google-drive-documents-and-syncs] +===== Documents and syncs + +The connector will fetch all files and folders the service account has access to. + +It will attempt to extract the content from Google Suite documents (Google Docs, Google Sheets and Google Slides) and regular files. + +[NOTE] +==== +* Content from files bigger than 10 MB won't be extracted. (Self-managed connectors can use the <> to handle larger binary files.) +* Permissions are not synced by default. +You must first enable <>. +Otherwise, *all documents* indexed to an Elastic deployment will be visible to *all users with access* to that Elastic Deployment. +==== + +[discrete#es-connectors-google-drive-sync-types] +====== Sync types + +<> are supported by default for all connectors. + +This connector also supports <>. + +[discrete#es-connectors-google-drive-sync-rules] +===== Sync rules + +<> are identical for all connectors and are available by default. + +Advanced sync rules are not available for this connector in the present version. +Currently filtering is controlled via ingest pipelines. + +[discrete#es-connectors-google-drive-document-level-security] +===== Document level security + +Document level security (DLS) enables you to restrict access to documents based on a user’s permissions. +Refer to <> on this page for how to enable DLS for this connector. + +[NOTE] +==== +Refer to <> to learn how to ingest data from a connector with DLS enabled, when building a search application. +The example uses SharePoint Online as the data source, but the same steps apply to every connector. +==== + +[discrete#es-connectors-google-drive-content-extraction] +===== Content extraction + +See <> for more information. + +[discrete#es-connectors-google-drive-known-issues] +===== Known issues + +There are currently no known issues for this connector. + +[discrete#es-connectors-google-drive-troubleshooting] +===== Troubleshooting + +See <>. + +[discrete#es-connectors-google-drive-security] +===== Security + +See <>. + +// Closing the collapsible section +=============== + + +// //////// //// //// //// //// //// //// //////// +// //////// CONNECTOR CLIENT REFERENCE /////// +// //////// //// //// //// //// //// //// //////// + +[discrete#es-connectors-google-drive-connector-client-reference] +==== *Self-managed connector* + +.View *self-managed connector* reference + +[%collapsible] +=============== + +[discrete#es-connectors-google-drive-client-availability-and-prerequisites] +===== Availability and prerequisites + +This connector is available as a self-managed *self-managed connector*. To use this connector, satisfy all <>. + +[discrete#es-connectors-google-drive-client-usage] +===== Usage + +To use this connector as a *self-managed connector*, see <> +For additional usage operations, see <>. + +[discrete#es-connectors-google-drive-client-connector-authentication-prerequisites] +===== Connector authentication prerequisites + +Before syncing any data from Google Drive, you need to create a https://cloud.google.com/iam/docs/service-account-overview[service account^] with appropriate access to Google Drive API. + +To get started, log into https://cloud.google.com[Google Cloud Platform^] and go to the `Console`. + +. *Create a Google Cloud Project.* Give your project a name, change the project ID and click the Create button. + +. *Enable Google APIs.* Choose APIs & Services from the left menu and click on `Enable APIs and Services`. You need to enable the *Drive API*. + +. *Create a Service Account.* In the `APIs & Services` section, click on `Credentials` and click on `Create credentials` to create a service account. Give your service account a name and a service account ID. This is like an email address and will be used to identify your service account in the future. Click `Done` to finish creating the service account. Your service account needs to have access to at least the following scope: +- `https://www.googleapis.com/auth/drive.readonly` + +. *Create a Key File*. + - In the Cloud Console, go to `IAM and Admin` > `Service accounts` page. + - Click the email address of the service account that you want to create a key for. + - Click the `Keys` tab. Click the `Add key` drop-down menu, then select `Create new key`. + - Select JSON as the Key type and then click `Create`. This will download a JSON file that will contain the service account credentials. + +. *[Optional] Share Google Drive Folders.* If you use domain-wide delegation for syncing data you can skip this step. Go to your Google Drive. Right-click the folder or shared drive, choose `Share` and add the email address of the service account you created in step 3. as a viewer to this folder. + +[NOTE] +==== +When you grant a service account access to a specific folder or shared drive in Google Drive, it's important to note that the permissions extend to all the children within that folder or drive. +This means that any folders or files contained within the granted folder or drive inherit the same access privileges as the parent. +==== + +[discrete#es-connectors-google-drive-client-additional-prerequisites-for-domain-wide-delegation] +====== Additional authentication prerequisites for domain-wide delegation + +This step is *required* when *Use domain-wide delegation for data sync* or *Enable document level security* configuration option is enabled. + +. *Enable Google APIs*. ++ +Choose APIs & Services from the left menu and click on `Enable APIs and Services`. You need to enable the *Admin SDK API* and *Drive API*. + +. *Google Workspace domain-wide delegation of authority*. ++ +To access drive and user data in a Google Workspace domain, the service account that you created needs to be granted access by a super administrator for the domain. You can follow https://developers.google.com/cloud-search/docs/guides/delegation[the official documentation^] to perform Google Workspace domain-wide delegation of authority. ++ +You need to grant the following *OAuth Scopes* to your service account: ++ +-- +* `https://www.googleapis.com/auth/admin.directory.group.readonly` +* `https://www.googleapis.com/auth/admin.directory.user.readonly` +* `https://www.googleapis.com/auth/drive.readonly` +* `https://www.googleapis.com/auth/drive.metadata.readonly` +-- ++ +This step allows the connector to: + +* access user data and their group memberships in a Google Workspace organization +* access Google Drive data in drives associated to Google Workspace members + +[discrete#es-connectors-google-drive-client-configuration] +===== Configuration + +[TIP] +==== +When using the <> workflow, initially these fields will use the default configuration set in the {connectors-python}/connectors/sources/google_drive.py[connector source code^]. +These are set in the `get_default_configuration` function definition. + +These configurable fields will be rendered with their respective *labels* in the Kibana UI. +Once connected, you'll be able to update these values in Kibana. +==== + +The following configuration fields are required: + +`service_account_credentials`:: +The service account credentials generated from Google Cloud Platform (JSON string). +Refer to the https://developers.google.com/workspace/guides/create-credentials#create_credentials_for_a_service_account[Google Cloud documentation^] for more information. + +`use_domain_wide_delegation_for_sync`:: +Use https://developers.google.com/cloud-search/docs/guides/delegation[domain-wide delegation] to automatically sync content from all shared and personal drives in the Google workspace. +This eliminates the need to manually share Google Drive data with your service account, though it may increase the sync time. +If disabled, only items and folders manually shared with the service account will be synced. + +`google_workspace_admin_email_for_data_sync`:: +Required when domain-wide delegation for data sync is enabled. +This email is used for discovery and syncing of shared drives. Only the shared drives this user has access to are synced. + +`google_workspace_email_for_shared_drives_sync`:: +Required when domain-wide delegation for data sync is enabled. +Provide the Google Workspace user email for discovery and syncing of shared drives. Only the shared drives this user has access to will be synced. + +`use_document_level_security`:: +Toggle to enable <>. +DLS is supported for the Google Drive connector. +When enabled: ++ +* Full syncs will fetch access control lists for each document and store them in the `_allow_access_control` field. +* Access control syncs will fetch users' access control lists and store them in a separate index. + +`google_workspace_admin_email`:: +Google Workspace admin email. +Required to enable document level security (DLS) or domain-wide delegation for data sync. +A service account with delegated authority can impersonate an admin user with permissions to access Google Workspace user data and their group memberships. +Refer to the https://support.google.com/a/answer/162106?hl=en[Google Cloud documentation^] for more information. + +`max_concurrency`:: +The maximum number of concurrent HTTP requests to the Google Drive API. +Increasing this value can improve data retrieval speed, but it may also place higher demands on system resources and network bandwidth. + +`use_text_extraction_service`:: +Requires a separate deployment of the <>. +Requires that pipeline settings disable text extraction. +Default value is `False`. + +[discrete#es-connectors-google-drive-client-deployment-using-docker] +===== Deployment using Docker + +include::_connectors-docker-instructions.asciidoc[] + +[discrete#es-connectors-google-drive-client-documents-and-syncs] +===== Documents and syncs + +The connector will fetch all files and folders the service account has access to. + +It will attempt to extract the content from Google Suite documents (Google Docs, Google Sheets and Google Slides) and regular files. + +[NOTE] +==== +* Content from files bigger than 10 MB won't be extracted +* Permissions are not synced by default. +You must first enable <>. +Otherwise, *all documents* indexed to an Elastic deployment will be visible to *all users with access* to that Elastic Deployment. +==== + +[discrete#es-connectors-google-drive-client-sync-types] +====== Sync types + +<> are supported by default for all connectors. + +This connector also supports <>. + +[discrete#es-connectors-google-drive-client-sync-rules] +===== Sync rules + +<> are identical for all connectors and are available by default. + +Advanced sync rules are not available for this connector in the present version. +Currently filtering is controlled via ingest pipelines. + +[discrete#es-connectors-google-drive-client-document-level-security] +===== Document level security + +Document level security (DLS) enables you to restrict access to documents based on a user’s permissions. +Refer to <> on this page for how to enable DLS for this connector. + +[NOTE] +==== +Refer to <> to learn how to ingest data from a connector with DLS enabled, when building a search application. +The example uses SharePoint Online as the data source, but the same steps apply to every connector. +==== + +[discrete#es-connectors-google-drive-client-content-extraction] +===== Content extraction + +See <> for more information. + +[discrete#es-connectors-google-drive-client-end-to-end-testing] +===== End-to-end testing + +The connector framework enables operators to run functional tests against a real data source. Refer to <> for more details. + +To perform E2E testing for the Google Drive connector, run the following command: + +[source,shell] +---- +make ftest NAME=google_drive +---- + +For faster tests, add the `DATA_SIZE=small` flag: + +[source,shell] +---- +make ftest NAME=google_drive DATA_SIZE=small +---- + +[discrete#es-connectors-google-drive-client-known-issues] +===== Known issues + +There are currently no known issues for this connector. + +[discrete#es-connectors-google-drive-client-troubleshooting] +===== Troubleshooting + +See <>. + +[discrete#es-connectors-google-drive-client-security] +===== Security + +See <>. + +// Closing the collapsible section +=============== diff --git a/docs/reference/connector/docs/connectors-graphql.asciidoc b/docs/reference/connector/docs/connectors-graphql.asciidoc new file mode 100644 index 0000000000000..bc7083d482e79 --- /dev/null +++ b/docs/reference/connector/docs/connectors-graphql.asciidoc @@ -0,0 +1,248 @@ +[#es-connectors-graphql] +=== Elastic GraphQL connector reference +++++ +GraphQL +++++ + +// Attributes used in this file +:service-name: GraphQL +:service-name-stub: graphql + +The Elastic GraphQL connector is written in Python using the https://github.com/elastic/connectors/tree/main[Elastic connector framework]. View the https://github.com/elastic/connectors/blob/main/connectors/sources/graphql.py[source code for this connector]. + +[discrete#es-connectors-graphql-connector-availability-and-prerequisites] +==== Availability and prerequisites + +This connector was introduced in Elastic *8.14.0*, available as a *self-managed* self-managed connector. + +To use this connector, satisfy all <>. +Importantly, you must deploy the connectors service on your own infrastructure. +You have two deployment options: + +* <>. Use this option if you're comfortable working with Python and want to iterate quickly locally. +* <>. Use this option if you want to deploy the connectors to a server, or use a container orchestration platform. + +[NOTE] +==== +This connector is in *technical preview* and is subject to change. The design and code is less mature than official GA features and is being provided as-is with no warranties. Technical preview features are not subject to the support SLA of official GA features. +==== + +[discrete#es-connectors-graphql-connector-usage] +==== Usage + +To set up this connector in the UI, select the *GraphQL* tile when creating a new connector under *Search -> Connectors*. + +If you're already familiar with how connectors work, you can also use the {ref}/connector-apis.html[Connector APIs]. + +For additional operations, see <>. + +[discrete#es-connectors-graphql-connector-docker] +==== Deploy with Docker + +include::_connectors-docker-instructions.asciidoc[] + +[discrete#es-connectors-graphql-connector-configuration] +==== Configuration + +[discrete#es-connectors-graphql-connector-configure-graphql-connector] +===== Configure GraphQL connector + +Note the following configuration fields: + +`http_endpoint` (required):: +Base URL of the GraphQL endpoint. +*Example*: `https://api.xyz.com/graphql` + +`http_method` (required):: +`GET` or `POST`. + +`authentication_method`(required):: +Select from `No Auth`, `Basic Auth`, and `Bearer Token`. + +`username`:: +Required when using basic authentication. + +`password`:: +Required when using basic authentication. + +`token`:: +Required when using bearer token authentication. + +`graphql_query` (required):: +Query used to fetch data from the source. +Can contain variables provided in the `graphql_variables` field. +The connector will substitute the variables in the query with values from `graphql_variables` and make a GraphQL query to the source. ++ +*Example*: ++ +[source,js] +---- +query getUser($id: ID!) { + user(id: $id) { + name + email + } +} +---- +// NOTCONSOLE + +`graphql_variables`:: +A JSON object of key/value pairs containing variables used in the GraphQL query. +The connector will substitute the variables in the query with the values provided here and make a GraphQL query to the source. ++ +*Example*: ++ +For the GraphQL query `query getUser($id: ID!) { user(id: $id) { name } }` ++ +* Where the value of `graphql_variables` is `{"id": "123"}` +* The connector will execute `query getUser { user(id: "123") { name } }` to fetch data from the source + +`graphql_object_to_id_map` (required):: +A JSON mapping between GraphQL response objects to index and their ID fields. +The connector will fetch data for each object (JSON key) and use the provided ID field (JSON value) to index the object into Elasticsearch. +The connector will index all fields for each object specified in the mapping. +Use dot `(.)` notation to specify the full path from the root of the GraphQL response to the desired object. ++ +*Example*: ++ +The GraphQL query `query getUser { organization { users{ user_id name email} } }` fetches all available users from the source. +To index every user as a separate document configure this field as below. ++ +[source,js] +---- +{ + "organization.users": "user_id" +} +---- +// NOTCONSOLE ++ +In this example `user_id` is unique in every user document. Therefore, we set `user_id` as the value for `organization.users`. ++ +[NOTE] +==== +The path provided in this field should only contain JSON objects and not lists. +==== + +`headers`:: +JSON object containing custom headers to be sent with each GraphQL request: ++ +[source,js] +---- +{ + "content-type": "Application/json" +} +---- +// NOTCONSOLE + +`pagination_model` (required):: +This field specifies the pagination model to be used by the connector. +The connector supports `No pagination` and `Cursor-based pagination` pagination models. ++ +For cursor-based pagination, add `pageInfo {endCursor hasNextPage}` and an `after` argument variable in your query at the desired node (`Pagination key`). +Use the `after` query argument with a variable to iterate through pages. +The default value for this field is `No pagination`. Example: ++ +For `Cursor-based pagination`, the query should look like this example: ++ +[source,js] +---- +query getUsers($cursor: String!) { + sampleData { + users(after: $cursor) { + pageInfo { + endCursor + hasNextPage + } + nodes { + first_name + last_name + address + } + } + } +} +---- +// NOTCONSOLE ++ +The value of `pagination_key` is `sampleData.users` so it must contain: ++ +* `pageInfo {endCursor hasNextPage}` +* the `after` argument with a variable when using cursor-based pagination + +`pagination_key` (required):: +Specifies which GraphQL object is used for pagination. +Use `.` to provide the full path of the object from the root of the response. ++ +*Example*: ++ +* `organization.users` + +`connection_timeout`:: +Specifies the maximum time in seconds to wait for a response from the GraphQL source. +Default value is *30 seconds*. + +[discrete#es-connectors-graphql-connector-documents-and-syncs] +==== Documents and syncs + +The connector syncs the objects and entities based on GraphQL Query and GraphQL Object List. + +[discrete#es-connectors-graphql-connector-sync-types] +==== Sync types + +<> are supported by default for all connectors. + +This connector currently does not support <>. + +[discrete#es-connectors-graphql-connector-sync-rules] +==== Sync rules + +<> are identical for all connectors and are available by default. + +[discrete#es-connectors-graphql-connector-advanced-sync-rules] +==== Advanced Sync Rules + +Advanced sync rules are not available for this connector in the present version. + +[discrete#es-connectors-graphql-connector-connector-client-operations] +==== Connector Client operations + +[discrete#es-connectors-graphql-connector-end-to-end-testing] +===== End-to-end Testing + +The connector framework enables operators to run functional tests against a real data source, using Docker Compose. +You don't need a running Elasticsearch instance or GraphQL source to run this test. + +Refer to <> for more details. + +To perform E2E testing for the GraphQL connector, run the following command: + +```shell +$ make ftest NAME=graphql +``` +For faster tests, add the `DATA_SIZE=small` flag: + +[source,shell] +---- +make ftest NAME=graphql DATA_SIZE=small +---- + +By default, `DATA_SIZE=MEDIUM`. + +[discrete#es-connectors-graphql-connector-known-issues] +==== Known issues + +* Every document will be updated in every sync. +* If the same field name exists with different types across different objects, the connector might raise a mapping parser exception. + +Refer to <> for a list of known issues for all connectors. + +[discrete#es-connectors-graphql-connector-troubleshooting] +==== Troubleshooting + +See <>. + +[discrete#es-connectors-graphql-connector-security] +==== Security + +See <>. + diff --git a/docs/reference/connector/docs/connectors-hosted-tutorial-mongo.asciidoc b/docs/reference/connector/docs/connectors-hosted-tutorial-mongo.asciidoc new file mode 100644 index 0000000000000..a1f7048705555 --- /dev/null +++ b/docs/reference/connector/docs/connectors-hosted-tutorial-mongo.asciidoc @@ -0,0 +1,193 @@ +[#es-mongodb-start] +=== MongoDB managed connector tutorial +++++ +Managed connector tutorial (MongoDB) +++++ + +// Learn how to use the <> to sync data from https://www.mongodb.com/docs/atlas/[MongoDB Atlas^] to an Elastic Cloud deployment. + +This tutorial explains how to set up automatic, ongoing syncs from a MongoDB database to a search-optimized index on Elastic Cloud. +We'll use an Atlas deployment, together with built-in sample datasets in this tutorial, but you can also use your own data. + +This tutorial is an example of: + +* How to use the <>, which is compatible with MongoDB Atlas and on premises MongoDB servers. +See <>. +* How to use any connector available as a <>. +The workflow in this tutorial applies to all Elastic managed connectors. + +This tutorial has three main sections: + +* <>: First you'll need to get *MongoDB Atlas* up and running. +(*Skip this step* if you already have a MongoDB instance you'd like to use.) +** You'll create a free account, set up a free Atlas cluster, and load some sample data. +* <> Once that's done, you'll need to gather some details about your Atlas cluster, so you can connect it to an Elastic Cloud deployment. +* <>: Next, you'll need to get *Elastic Cloud* up and running. +** Then you'll need to create an Elasticsearch index and configure the Elastic connector to interface with your Atlas cluster. +We'll do all this in the Kibana UI. +** Once configured, you'll set a syncing schedule to start indexing your MongoDB data into Elasticsearch and ensure it stays up to date. + +[discrete#es-mongodb-start-atlas-setup] +== Set up MongoDB Atlas + +Follow the steps in the MongoDB documentation to https://www.mongodb.com/docs/atlas/getting-started[create a free Atlas account^]: + +* Create and deploy a free Atlas cluster. +* Under *Security > Network Access*, add IP access to `0.0.0.0/0`. +This CIDR-notation allows connections from any IP address. +This enables the Elastic connector, running on Elastic Cloud, to access Atlas. +See https://www.mongodb.com/docs/atlas/security/add-ip-address-to-list/[the Atlas documentation^] for complete instructions. +* Create a *database user*, with a username and password combination. +Select *Add new database user* in *Security > Database access*. +Keep these details handy, as you'll need them to configure the connector later. + +[discrete#es-mongodb-start-load-sample-data] +=== Load sample data into Atlas + +In this example we'll use the sample data available to MongoDB Atlas. +You can do this in the MongoDB Atlas UI. + +Use the *Load Sample Dataset* button in the Atlas UI, under *Database Deployments*. +Find this by selecting the *"..."* button next to your cluster name. + +.Loading sample data in Atlas UI +image::images/mongodb-load-sample-data.png[Load sample data in Atlas UI] + +Detailed instructions are available in the https://www.mongodb.com/docs/atlas/sample-data[Atlas documentation^]. + +[discrete#es-mongodb-start-view-sample-data] +=== Browse sample data in Atlas UI + +Once loaded, you can view your sample data in the Atlas UI, by selecting your database deployment's *Browse Collections* button. +Confirm that the sample databases have been added to your database deployment. + +In this example, we'll use the https://www.mongodb.com/docs/atlas/sample-data/sample-mflix/[`sample_mflix`^] dataset, which contains data on movies and movie theaters. +The database contains collections for certain metadata, including users and comments on specific movies. +We'll focus on the `comments` collection in this example. +Each document contains a comment, and information such as the commenter's name and email address. + +.A sample MongoDB document from the `comments` collection +image::images/mongodb-sample-document.png[Sample document from the comments collection] + +Later, once the connector transforms MongoDB documents into Elasticsearch documents, you can compare their structure. + +We've added data to our MongoDB Atlas cluster, and now we need to configure the Elastic MongoDB connector. + +[discrete#es-mongodb-start-gather-details] +== Gather details about your MongoDB instance + +Before we switch over to working in Elastic Cloud, we need to gather some details about our MongoDB Atlas cluster. +We'll need these details to configure the Elastic MongoDB connector. +You can find these details in the Atlas UI. + +Find the following details: + +* *Host*: The URI of your MongoDB Atlas cluster. +This should look like `mongodb+srv://.hjksqfc.mongodb.net`. +Find this by https://www.mongodb.com/docs/atlas/tutorial/connect-to-your-cluster/#connect-to-your-atlas-cluster[connecting to your cluster^] in the MongoDB Atlas UI. +** One way to find this URI is to select *Connect with MongoDB Shell* and copy the connection string from the CLI instructions. +* *Database*: The name of the database you want to sync. +In this example, we'll use the `sample_mflix` database. +* *Collection*: The name of the collection you want to sync. +In this example, we'll use the `comments` collection of the `sample_mflix` database. +* *Username*: The username you created earlier, in the the setup phase. +* *Password*: The password you created earlier. + +Keep these details handy! + +[discrete#es-mongodb-start-elastic-cloud] +== Set up Elastic Cloud + +Everything is set up in MongoDB Atlas and we have the details we need to configure the Elastic MongoDB connector. +First we'll need to get an Elastic Cloud deployment up and running. + +[discrete#es-mongodb-start-create-deployment] +=== Create an {ecloud} deployment + +[NOTE] +==== +This step is for users who are new to Elastic Cloud. +Skip this step if your team already has an Elastic Cloud deployment. +==== + +Log in to https://cloud.elastic.co/[Elastic Cloud^], and use the UI to create a deployment. +You'll need to run version *8.5.0+* or later. + +Read <> for full details. + +Once you're deployment is created, navigate to *Search*. + +[discrete#es-mongodb-start-create-index] +=== Create an Elasticsearch index + +The Elastic connector will sync your MongoDB data into a search-optimized Elasticsearch index. +The first step is to create your index in the Kibana UI. + +In the main menu navigate to *Search > Content > Indices*. + +Follow these steps to create your index: + +* Select *Create an Elasticsearch index*. +* Choose *Connector* as your ingestion method. +* Select the *MongoDB* connector type. +* Name your new index, for example `search-mongo-sample`, then save. +This takes you to the *Configuration* tab of your index overview page. + +Next we need to input our Atlas details to configure the connector. + +[discrete#es-mongodb-start-configure-connector] +=== Configure the MongoDB connector + +Using the <>, configure the MongoDB connector. +Enter the details under the *Configuration* step. + +Set the *Direct connection* option to `false` for this example. +You must enable SSL/TLS for MongoDB Atlas, so toggle on the *SSL/TLS Connection* option. + +.Example configuration for the MongoDB connector +image::images/mongodb-connector-config.png[Example configuration for the MongoDB connector, width=350] + +Once you've entered these details, select *Save configuration*. + +[discrete#es-mongodb-start-launch-sync] +=== Begin syncing + +Once you've configured your MongoDB connector, it's time to schedule a sync. + +The UI will take you to the *Scheduling* tab of your index overview page. +We'll schedule a recurring sync for this example, which will run every day at midnight. + +In the *Scheduling* tab: + +* Toggle *Enable recurring syncs with the following schedule*. +* Select *Frequency*, "Every" `day`. +* Select *Time*, "At" `00:00`. +* *Save* this sync schedule. + +Once you save your sync schedule, the connector will start syncing your MongoDB Atlas data into Elasticsearch. + +[discrete#es-mongodb-start-verify-documents] +=== Verify documents + +[TIP] +==== +Our <> explains how documents in your MongoDB database and collection are extracted and transformed into documents in your Elasticsearch index. +==== + +If all the configuration details are correct, the sync will begin and documents will start to appear in your Elasticsearch index. + +As soon as your first documents are synced, you can view the documents and inspect the mapping for the index: + +* In Kibana, navigate to *Search* > *Content* > *Indices*. +* Select your index, for example `search-mongo-sample`. +* Choose the *Documents* tab to view the synced documents. +Expand a document to view its fields. + +[discrete#es-mongodb-start-learn-more] +== Learn more + +* Refer to the <> for detailed information about the connector, including how *sync rules* work. +* For an overview of all Elastic managed connectors, see <>. +* Learn about <> for Elastic managed connectors. +* Learn about {ref}/ingest-pipeline-search.html[ingest pipelines for Search indices] +* Refer to the official https://www.mongodb.com/docs/atlas/[MongoDB Atlas documentation^] for MongoDB-specific questions. diff --git a/docs/reference/connector/docs/connectors-jira.asciidoc b/docs/reference/connector/docs/connectors-jira.asciidoc new file mode 100644 index 0000000000000..b11070b1a581a --- /dev/null +++ b/docs/reference/connector/docs/connectors-jira.asciidoc @@ -0,0 +1,544 @@ +[#es-connectors-jira] +=== Elastic Jira connector reference +++++ +Jira +++++ +// Attributes used in this file +:service-name: Jira +:service-name-stub: jira + +The _Elastic Jira connector_ is a <> for https://www.atlassian.com/software/jira[Atlassian Jira^]. +This connector is written in Python using the {connectors-python}[Elastic connector framework^]. + +View the {connectors-python}/connectors/sources/{service-name-stub}.py[*source code* for this connector^] (branch _{connectors-branch}_, compatible with Elastic _{minor-version}_). + +.Choose your connector reference +******************************* +Are you using a managed connector on Elastic Cloud or a self-managed connector? Expand the documentation based on your deployment method. +******************************* + +// //////// //// //// //// //// //// //// //////// +// //////// NATIVE CONNECTOR REFERENCE /////// +// //////// //// //// //// //// //// //// //////// + +[discrete#es-connectors-jira-native-connector-reference] +==== *Elastic managed connector reference* + +.View *Elastic managed connector* reference + +[%collapsible] +=============== + +[discrete#es-connectors-jira-availability-prerequisites] +===== Availability and prerequisites + +This connector is available as a *managed connector* on Elastic Cloud, as of *8.9.1*. + +[NOTE] +==== +Jira Data Center support was added in 8.13.0 in technical preview and is subject to change. The design and code is less mature than official GA features and is being provided as-is with no warranties. Technical preview features are not subject to the support SLA of official GA features. +==== + +To use this connector natively in Elastic Cloud, satisfy all <>. + +[discrete#es-connectors-jira-create-native-connector] +===== Create a {service-name} connector +include::_connectors-create-native.asciidoc[] + +[discrete#es-connectors-jira-usage] +===== Usage + +To use this connector natively in Elastic Cloud, see <>. + +For additional operations, see <>. + +[discrete#es-connectors-jira-compatability] +===== Compatibility + +* Jira Cloud, Jira Server, and Jira Data Center *versions 7 or later*. + +[discrete#es-connectors-jira-configuration] +===== Configuration + +The following configuration fields are required to set up the connector: + +Jira data source:: +Dropdown to determine the Jira platform type: `Jira Cloud`, `Jira Server`, or `Jira Data Center`. Default value is `Jira Cloud`. + +Jira Data Center username:: +The username of the account for Jira Data Center. + +Jira Data Center password:: +The password of the account to be used for Jira Data Center. + +Jira Cloud service account id:: +Email address to authenticate with Jira Cloud. Example: jane.doe@example.com + +Jira Cloud API token:: +The API Token to authenticate with Jira Cloud. + +Jira Server username:: +The username of the account for Jira Server. + +Jira Server password:: +The password of the account to be used for Jira Server. + +Jira Cloud service account id:: +The account email for Jira Cloud. + +Jira Cloud API token:: +The API Token to authenticate with Jira Cloud. + +Jira host url:: +The domain where Jira is hosted. Examples: + +* https://192.158.1.38:8080/ +* https://test_user.atlassian.net/ + +Jira project keys:: +Comma-separated list of https://support.atlassian.com/jira-software-cloud/docs/what-is-an-issue/#Workingwithissues-Projectkeys[Project Keys^] to fetch data from Jira server or cloud. If the value is `*` the connector will fetch data from all projects present in the configured projects. Default value is `*`. Examples: + +* `EC`, `TP` +* `*` + +Enable SSL:: +Whether SSL verification will be enabled. Default value is `False`. + +SSL certificate:: +Content of SSL certificate. Note: In case of `ssl_enabled` is `False`, the `ssl_ca` value will be ignored. Example certificate: ++ +[source, txt] +---- +-----BEGIN CERTIFICATE----- +MIID+jCCAuKgAwIBAgIGAJJMzlxLMA0GCSqGSIb3DQEBCwUAMHoxCzAJBgNVBAYT +... +7RhLQyWn2u00L7/9Omw= +-----END CERTIFICATE----- +---- + +Enable document level security:: +Toggle to enable <>. +When enabled, full syncs will fetch access control lists for each document and store them in the `_allow_access_control` field. +Access control syncs fetch users' access control lists and store them in a separate index. ++ +[NOTE] +==== +To access user data in Jira Administration, the account you created must be granted *Product Access* for Jira Administration. +This access needs to be provided by an administrator from the http://admin.atlassian.com/[Atlassian Admin], and the access level granted should be `Product Admin`. +==== + +[discrete#es-connectors-jira-documents-syncs] +===== Documents and syncs + +The connector syncs the following objects and entities: + +* **Projects** +** Includes metadata such as description, project key, project type, lead name, etc. +* **Issues** +** All types of issues including Task, Bug, Sub-task, Enhancement, Story, etc. +** Includes metadata such as issue type, parent issue details, fix versions, affected versions, resolution, attachments, comments, sub-task details, priority, custom fields, etc. +* **Attachments** + +**Note:** Archived projects and issues are not indexed. + +[NOTE] +==== +* Content from files bigger than 10 MB won't be extracted. (Self-managed connectors can use the <> to handle larger binary files.) +* Permissions are not synced by default. +You must first enable <>. +Otherwise, *all documents* indexed to an Elastic deployment will be visible to *all users with access* to that Elastic Deployment. +==== + +[discrete#es-connectors-jira-sync-types] +====== Sync types + +<> are supported by default for all connectors. + +This connector also supports <>. + +[discrete#es-connectors-jira-sync-rules] +===== Sync rules + +<> are identical for all connectors and are available by default. + +This connector supports <> for remote filtering. +These rules cover complex query-and-filter scenarios that cannot be expressed with basic sync rules. +Advanced sync rules are defined through a source-specific DSL JSON snippet. + +[discrete#es-connectors-jira-sync-rules-examples] +====== Advanced sync rules example + +*Example 1*: Queries to index content based on status of Jira issues. + +[source,js] +---- +[ + { + "query": "project = Collaboration AND status = 'In Progress'" + }, + { + "query": "status IN ('To Do', 'In Progress', 'Closed')" + } +] +---- +// NOTCONSOLE + +*Example 2*: Query to index data based on priority of issues for given projects. + +[source,js] +---- +[ + { + "query": "priority in (Blocker, Critical) AND project in (ProjA, ProjB, ProjC)" + } +] +---- +// NOTCONSOLE + +*Example 3*: Query to index data based on assignee and created time. + +[source,js] +---- +[ + { + "query": "assignee is EMPTY and created < -1d" + } +] +---- +// NOTCONSOLE + +[discrete#es-connectors-jira-document-level-security] +===== Document level security + +Document level security (DLS) enables you to restrict access to documents based on a user’s permissions. +Refer to <> on this page for how to enable DLS for this connector. + +[WARNING] +==== +Enabling DLS for your connector will cause a significant performance degradation, as the API calls to the data source required for this functionality are rate limited. This impacts the speed at which your content can be retrieved. +==== + +[WARNING] +==== +When the `data_source` is set to Confluence Data Center or Server, the connector will only fetch 1000 users for access control syncs, due a https://auth0.com/docs/manage-users/user-search/retrieve-users-with-get-users-endpoint#limitations[limitation in the API used^]. +==== + +[NOTE] +==== +Refer to <> to learn how to ingest data from a connector with DLS enabled, when building a search application. +The example uses SharePoint Online as the data source, but the same steps apply to every connector. +==== + +[discrete#es-connectors-jira-content-extraction] +===== Content Extraction + +See <>. + +[discrete#es-connectors-jira-known-issues] +===== Known issues + +* *Enabling document-level security impacts performance.* ++ +Enabling DLS for your connector will cause a significant performance degradation, as the API calls to the data source required for this functionality are rate limited. This impacts the speed at which your content can be retrieved. + +Refer to <> for a list of known issues for all connectors. + +[discrete#es-connectors-jira-troubleshooting] +===== Troubleshooting + +See <>. + +[discrete#es-connectors-jira-security] +===== Security + +See <>. + +// Closing the collapsible section +=============== + + +// //////// //// //// //// //// //// //// //////// +// //////// CONNECTOR CLIENT REFERENCE /////// +// //////// //// //// //// //// //// //// //////// + +[discrete#es-connectors-jira-connector-client-reference] +==== *Self-managed connector* + +.View *self-managed connector* reference + +[%collapsible] +=============== + +[discrete#es-connectors-jira-client-availability-prerequisites] +===== Availability and prerequisites + +This connector is available as a self-managed *self-managed connector*. +This self-managed connector is compatible with Elastic versions *8.7.0+*. + +[NOTE] +==== +Jira Data Center support was added in 8.13.0 in technical preview and is subject to change. The design and code is less mature than official GA features and is being provided as-is with no warranties. Technical preview features are not subject to the support SLA of official GA features. +==== + +To use this connector, satisfy all <>. + +[discrete#es-connectors-jira-create-connector-client] +===== Create a {service-name} connector +include::_connectors-create-client.asciidoc[] + +[discrete#es-connectors-jira-client-usage] +===== Usage + +To use this connector as a *self-managed connector*, see <> +For additional usage operations, see <>. + +[discrete#es-connectors-jira-client-compatability] +===== Compatibility + +* Jira Cloud, Jira Server, and Jira Data Center *versions 7 or later*. + +[discrete#es-connectors-jira-client-configuration] +===== Configuration + +[TIP] +==== +When using the <> workflow, initially these fields will use the default configuration set in the {connectors-python}/connectors/sources/jira.py[connector source code^]. +These are set in the `get_default_configuration` function definition. + +These configurable fields will be rendered with their respective *labels* in the Kibana UI. +Once connected, you'll be able to update these values in Kibana. +==== + +The following configuration fields are required to set up the connector: + +`data_source`:: +Dropdown to determine the Jira platform type: `Jira Cloud`, `Jira Server`, or `Jira Data Center`. Default value is `Jira Cloud`. + +`data_center_username`:: +The username of the account for Jira Data Center. + +`data_center_password`:: +The password of the account to be used for Jira Data Center. + +`username`:: +The username of the account for Jira Server. + +`password`:: +The password of the account to be used for Jira Server. + +`account_email`:: +Email address to authenticate with Jira Cloud. Example: jane.doe@example.com + +`api_token`:: +The API Token to authenticate with Jira Cloud. + +`jira_url`:: +The domain where Jira is hosted. Examples: + +* https://192.158.1.38:8080/ +* https://test_user.atlassian.net/ + +`projects`:: +Comma-separated list of https://support.atlassian.com/jira-software-cloud/docs/what-is-an-issue/#Workingwithissues-Projectkeys[Project Keys^] to fetch data from Jira server or cloud. If the value is `*` the connector will fetch data from all projects present in the configured projects. Default value is `*`. Examples: ++ +* `EC`, `TP` +* `*` ++ +[WARNING] +==== +This field can be bypassed by advanced sync rules. +==== + +`ssl_enabled`:: +Whether SSL verification will be enabled. Default value is `False`. + +`ssl_ca`:: +Content of SSL certificate. Note: In case of `ssl_enabled` is `False`, the `ssl_ca` value will be ignored. Example certificate: ++ +[source, txt] +---- +-----BEGIN CERTIFICATE----- +MIID+jCCAuKgAwIBAgIGAJJMzlxLMA0GCSqGSIb3DQEBCwUAMHoxCzAJBgNVBAYT +... +7RhLQyWn2u00L7/9Omw= +-----END CERTIFICATE----- +---- + +`retry_count`:: +The number of retry attempts after failed request to Jira. Default value is 3. + +`concurrent_downloads`:: +The number of concurrent downloads for fetching the attachment content. This speeds up the content extraction of attachments. Defaults to 100. + +`use_document_level_security`:: +Toggle to enable <>. +When enabled, full syncs will fetch access control lists for each document and store them in the `_allow_access_control` field. +Access control syncs fetch users' access control lists and store them in a separate index. ++ +[NOTE] +==== +To access user data in Jira Administration, the account you created must be granted *Product Access* for Jira Administration. +This access needs to be provided by an administrator from the http://admin.atlassian.com/[Atlassian Admin], and the access level granted should be `Product Admin`. +==== + +`use_text_extraction_service`:: +Requires a separate deployment of the <>. +Requires that ingest pipeline settings disable text extraction. +Default value is `False`. + +[discrete#es-connectors-jira-client-docker] +===== Deployment using Docker + +include::_connectors-docker-instructions.asciidoc[] + +[discrete#es-connectors-jira-client-documents-syncs] +===== Documents and syncs + +The connector syncs the following objects and entities: + +* **Projects** +** Includes metadata such as description, project key, project type, lead name, etc. +* **Issues** +** All types of issues including Task, Bug, Sub-task, Enhancement, Story, etc. +** Includes metadata such as issue type, parent issue details, fix versions, affected versions, resolution, attachments, comments, sub-task details, priority, custom fields, etc. +* **Attachments** + +**Note:** Archived projects and issues are not indexed. + +[NOTE] +==== +* Content from files bigger than 10 MB won't be extracted +* Permissions are not synced by default. +You must first enable <>. +Otherwise, *all documents* indexed to an Elastic deployment will be visible to *all users with access* to that Elastic Deployment. +==== + +[discrete#es-connectors-jira-client-sync-types] +====== Sync types + +<> are supported by default for all connectors. + +This connector also supports <>. + +[discrete#es-connectors-jira-client-sync-rules] +===== Sync rules + +<> are identical for all connectors and are available by default. + +This connector supports <> for remote filtering. +These rules cover complex query-and-filter scenarios that cannot be expressed with basic sync rules. +Advanced sync rules are defined through a source-specific DSL JSON snippet. + +[discrete#es-connectors-jira-client-sync-rules-examples] +====== Advanced sync rules example + +*Example 1*: Queries to index content based on status of Jira issues. + +[source,js] +---- +[ + { + "query": "project = Collaboration AND status = 'In Progress'" + }, + { + "query": "status IN ('To Do', 'In Progress', 'Closed')" + } +] +---- +// NOTCONSOLE + +*Example 2*: Query to index data based on priority of issues for given projects. + +[source,js] +---- +[ + { + "query": "priority in (Blocker, Critical) AND project in (ProjA, ProjB, ProjC)" + } +] +---- +// NOTCONSOLE + +*Example 3*: Query to index data based on assignee and created time. + +[source,js] +---- +[ + { + "query": "assignee is EMPTY and created < -1d" + } +] +---- +// NOTCONSOLE + +[discrete#es-connectors-jira-client-document-level-security] +===== Document level security + +Document level security (DLS) enables you to restrict access to documents based on a user’s permissions. +Refer to <> on this page for how to enable DLS for this connector. + +[WARNING] +==== +Enabling DLS for your connector will cause a significant performance degradation, as the API calls to the data source required for this functionality are rate limited. This impacts the speed at which your content can be retrieved. +==== + +[WARNING] +==== +When the `data_source` is set to Confluence Data Center or Server, the connector will only fetch 1000 users for access control syncs, due a https://auth0.com/docs/manage-users/user-search/retrieve-users-with-get-users-endpoint#limitations[limitation in the API used^]. +==== + +[NOTE] +==== +Refer to <> to learn how to ingest data from a connector with DLS enabled, when building a search application. +The example uses SharePoint Online as the data source, but the same steps apply to every connector. +==== + +[discrete#es-connectors-jira-client-content-extraction] +===== Content Extraction + +See <>. + +[discrete#es-connectors-jira-client-connector-client-operations] +===== Self-managed connector operations + +[discrete#es-connectors-jira-client-testing] +===== End-to-end testing + +The connector framework enables operators to run functional tests against a real data source. +Refer to <> for more details. + +To perform E2E testing for the Jira connector, run the following command: + +[source,shell] +---- +$ make ftest NAME=jira +---- + +For faster tests, add the `DATA_SIZE=small` flag: + +[source,shell] +---- +make ftest NAME=jira DATA_SIZE=small +---- + +[discrete#es-connectors-jira-client-known-issues] +===== Known issues + +* *Enabling document-level security impacts performance.* ++ +Enabling DLS for your connector will cause a significant performance degradation, as the API calls to the data source required for this functionality are rate limited. This impacts the speed at which your content can be retrieved. + +Refer to <> for a list of known issues for all connectors. + +[discrete#es-connectors-jira-client-troubleshooting] +===== Troubleshooting + +See <>. + +[discrete#es-connectors-jira-client-security] +===== Security + +See <>. + +// Closing the collapsible section +=============== diff --git a/docs/reference/connector/docs/connectors-known-issues.asciidoc b/docs/reference/connector/docs/connectors-known-issues.asciidoc new file mode 100644 index 0000000000000..e8074df9526c6 --- /dev/null +++ b/docs/reference/connector/docs/connectors-known-issues.asciidoc @@ -0,0 +1,166 @@ +[#es-connectors-known-issues] +== Connector known issues +++++ +Known issues +++++ + +[discrete#es-connectors-known-issues-enterprie-search-service] +=== Enterprise Search service: self-managed connectors + +As of *8.10.0* self-managed connectors no longer require the Enterprise Search service to be running on your Elastic deployment. +However, if you are upgrading connectors from versions _earlier than 8.9_, you'll need to run Enterprise Search once to migrate your connectors to the new format. + +Some points to note about this migration: + +* This involves updating system indices that store configuration and sync history for your connectors. +* This is an in-place operation, meaning no temporary or backup indices will be created. +* Therefore, it is important to take a snapshot of the Elasticsearch cluster before upgrading— in the unlikely event of an index migration failure. + +If you have trouble with this migration, please contact support. + +[WARNING] +==== +To run self-managed connectors your self-deployed connector service version must match your Elasticsearch version. +For example, if you're running Elasticsearch 8.10.1, your connector service should be version 8.10.1.x. +Elastic does not support deployments running mismatched versions (except during upgrades). +==== + +[discrete#es-connectors-known-issues-connector-service] +=== Connector service + +The connector service has the following known issues: + +* *OOM errors when syncing large database tables* ++ +Syncs after the initial sync can cause out-of-memory (OOM) errors when syncing large database tables. +This occurs because database connectors load and store IDs in memory. +For tables with millions of records, this can lead to memory exhaustion if the connector service has insufficient RAM. ++ +To mitigate this issue, you can: ++ +** *Increase RAM allocation*: +*** *Elastic Cloud*: Upgrade the Enterprise Search instance to a larger size. Note that for Elastic managed connectors running on Elastic Cloud, the connector service runs on the Enterprise Search node. It only has access to up to 40% of the node’s RAM allocation. +*** *Self-managed*: Increase RAM allocation for the machine/container running the connector service. ++ +.RAM *sizing guidelines* +[%collapsible] +============== +The following table shows the estimated RAM usage for loading IDs into memory. +|=== +| *Number of IDs* | *Memory Usage in MB (2X buffer)* +| 1,000,000 +| ≈ 45.78 MB +| 10,000,000 +| ≈ 457.76 MB +| 50,000,000 +| ≈ 2288.82 MB (≈ 2.29 GB) +| 100,000,000 +| ≈ 4577.64 MB (≈ 4.58 GB) +|=== +============== ++ +** *Optimize* <>: +*** Review and optimize sync rules to filter and reduce data retrieved from the source before syncing. ++ +** *Use a self-managed connector* instead of a managed connector: +*** Because self-managed connectors run on your infrastructure, they are not subject to the same RAM limitations of the Enterprise Search node. + +* *Upgrades from deployments running on versions earlier than 8.9.0 can cause sync job failures* ++ +Due to a bug, the `job_type` field mapping will be missing after upgrading from deployments running on versions earlier than 8.9.0. +Sync jobs won't be displayed in the Kibana UI (job history) and the connector service won't be able to start new sync jobs. +*This will only occur if you have previously scheduled sync jobs.* ++ +To resolve this issue, you can manually add the missing field with the following command and trigger a sync job: ++ +[source,console] +---- +PUT .elastic-connectors-sync-jobs-v1/_mapping +{ + "properties": { + "job_type": { + "type": "keyword" + } + } +} +---- +// TEST[skip:TODO] + +* *The connector service will fail to sync when the connector tries to fetch more more than 2,147,483,647 (_2^31-1_) documents from a data source* ++ +A workaround is to manually partition the data to be synced using multiple search indices. ++ +* *Custom scheduling might break when upgrading from version 8.6 or earlier.* ++ +If you encounter the error `'custom_schedule_triggered': undefined method 'each' for nil:NilClass (NoMethodError)`, it means the custom scheduling feature migration failed. +You can use the following manual workaround: ++ +[source,console] +---- +POST /.elastic-connectors/_update/connector-id +{ + "doc": { + "custom_scheduling": {} + } +} +---- +// TEST[skip:TODO] ++ +This error can appear on Connectors or Crawlers that aren't the cause of the issue. +If the error continues, try running the above command for every document in the `.elastic-connectors` index. ++ +* *Connectors upgrading from 8.7 or earlier can be missing configuration fields* ++ +A connector that was created prior to 8.8 can sometimes be missing configuration fields. +This is a known issue for the MySQL connector but could also affect other connectors. ++ +If the self-managed connector raises the error `Connector for has missing configuration fields: , ...`, you can resolve the error by manually adding the missing configuration fields via the Dev Tools. +Only the following two field properties are required, as the rest will be autopopulated by the self-managed connector: ++ +** `type`: one of `str`, `int`, `bool`, or `list` +** `value`: any value, as long as it is of the correct `type` (`list` type values should be saved as comma-separated strings) ++ +[source,console] +---- +POST /.elastic-connectors/_update/connector_id +{ + "doc" : { + "configuration": { + "field_a": { + "type": "str", + "value": "" + }, + "field_b": { + "type": "bool", + "value": false + }, + "field_c": { + "type": "int", + "value": 1 + }, + "field_d": { + "type": "list", + "value": "a,b" + } + } + } +} +---- +// TEST[skip:TODO] ++ +* *Python connectors that upgraded from 8.7.1 will report document volumes in gigabytes (GB) instead of megabytes (MB)* ++ +As a result, true document volume will be under-reported by a factor of 1024. ++ +* *The following Elastic managed connectors will not run correctly on Elastic Cloud in 8.9.0.* +They are still available as self-managed connectors. +** Azure Blob Storage +** Confluence Cloud & Server +** Jira Cloud & Server +** Network drives + +[discrete#es-connectors-known-issues-specific] +=== Individual connector known issues + +Individual connectors may have additional known issues. +Refer to <> for connector-specific known issues. diff --git a/docs/reference/connector/docs/connectors-logs.asciidoc b/docs/reference/connector/docs/connectors-logs.asciidoc new file mode 100644 index 0000000000000..9e0000d442120 --- /dev/null +++ b/docs/reference/connector/docs/connectors-logs.asciidoc @@ -0,0 +1,63 @@ +[#es-connectors-logs] +=== Connector logs +++++ +Logs +++++ + +This document describes logs for <> and <>. + +[discrete#es-connectors-logs-enable] +==== Enable logs + +[discrete#es-connectors-logs-enable-cloud] +===== Elastic Cloud + +Elastic Cloud users need to {cloud}/ec-enable-logging-and-monitoring.html#ec-enable-logging-and-monitoring-steps[enable logging^] to view connector logs. +Go to *Cloud > Deployment > _your-deployment_ > Logs and metrics* to enable logs. + +Once enabled, <>. + +Configure the `log_level` user setting for Enterprise Search. See {cloud}/ec-manage-enterprise-search-settings.html[Add Enterprise Search user settings^] in the Elastic Cloud documentation. + +[discrete#es-connectors-logs-enable-self-managed] +===== self-managed connectors + +Note that self-managed deployments and self-managed connector logs are written to `STDOUT`. + +Self-managed connectors have the following logging options: + +* Use the `service.log_level` setting in your connector service configuration file to specify the log level for the service. +** Enable `elasticsearch.bulk.enable_operations_logging` to log the result of sending documents to Elasticsearch from connectors, for auditing and debugging. This setting depends on the `service.log_level` and will be logged at `DEBUG` level . +* Use the `elasticsearch.log_level` setting to specify the log level for the Elasticsearch _client_ used by the connector service. + +[discrete#es-connectors-logs-view] +==== View connector logs + +You can view logs in Kibana. + +You can filter by `service.type`: + +- `enterprise-search` +- `connectors` + +[discrete#es-connectors-logs-reference] +==== Logs reference + +Logs use Elastic Common Schema (ECS), without extensions. +See {ecs-ref}[the ECS Reference^] for more information. + +The fields logged are: + +* `@timestamp` +* `log.level` +* `ecs.version` +* `labels.index_date` +* `tags` +* `log.logger` +* `service.type` +* `service.version` +* `process.name` +* `process.pid` +* `process.thread.id` + +See {ref}/logging.html[Logging^] in the Elasticsearch documentation for more information. diff --git a/docs/reference/connector/docs/connectors-managed-service.asciidoc b/docs/reference/connector/docs/connectors-managed-service.asciidoc new file mode 100644 index 0000000000000..df76a5ce9093f --- /dev/null +++ b/docs/reference/connector/docs/connectors-managed-service.asciidoc @@ -0,0 +1,207 @@ +[#es-native-connectors] +== Elastic managed connectors + +.Naming history +**** +Elastic managed connectors were initially known as "native connectors". +You might find this term in older documentation. +**** + +Managed <> are available directly within your Elastic Cloud deployment. +No additional infrastructure is required. + +Managed connectors sync data sources directly to Elasticsearch indices. +Create these indices using the *Connector* workflow within {kib}. + +The following connectors are available as Elastic managed connectors. +Refer to each connector reference for additional information specific to each connector. + +include::_connectors-list-native.asciidoc[] + +[discrete#es-native-connectors-prerequisites] +=== Availability and prerequisites + +Managed connectors were introduced in Elastic version *8.5.0*. + +Your Elastic Cloud deployment must include the following Elastic services: + +* *Elasticsearch* +* *{kib}* +* {enterprise-search-ref}/server.html[*Enterprise Search*] + +Refer to _Native Integrations_ on the https://www.elastic.co/subscriptions/cloud[Elastic subscriptions page], in the *Elastic Search* section for managed connector licensing requirements. + +.Elastic Cloud IPs +[sidebar] +-- +Using our Elastic managed connectors involves outbound data transfer (egress) from your Elastic Cloud deployment. +If you have IP/firewall rules on your third party service, you'll need to add the Elastic Cloud egress static IP ranges to your service’s configuration. + +Refer to {cloud}/ec-static-ips.html[static IP ranges] in the Elastic Cloud documentation for the latest list of IP addresses. +-- + +[discrete#es-native-connectors-usage] +=== Usage in {kib} UI + +Follow the *Connector* workflow in {kib} to select the *Connector* ingestion method. +Choose a data source, create an Elasticsearch index, and configure a managed connector to manage the index. + +[.screenshot] +image::images/use-a-connector-workflow.png[] + +[discrete#es-native-connectors-select-connector] +==== Select a connector + +Choose the data source to sync from the available options and select *Continue*. + +[discrete#es-native-connectors-index] +==== Create index + +Create a new index to be managed by the connector: + +. Name your index and optionally change the language analyzer to match the human language of your data source. +(The index name will be automatically prefixed with `search-`.) +. Select *Create index*. + +The index is created and ready to <>. + +This operation requires: + +. Access to {kib} +. Permission to create or manage the index +. `write` {ref}/security-privileges.html[indices privilege^] for the `.elastic-connectors` index +. `manage_api_key` {ref}/security-privileges.html[cluster privilege^] to allow the API key to be created +. `write_connector_secrets` {ref}/security-privileges.html[cluster privilege^] to allow the API key to be stored as a secret + +[discrete#es-native-connectors-configuration] +==== Configure connector + +Create a new index to be managed by the connector. + +Continue from above, or navigate to the following location within the {kib} UI: + +*Search > Content > Elasticsearch indices* + +Choose the index to configure, and then choose the *Configuration* tab. + +Configure the connector: + +. Edit the name and description for the connector. +Your team can use this information to differentiate this index from other connector indices. +(These fields describe the _connector_ and are independent of the Elasticsearch index name.) +. Save your changes. +. Edit the data source configuration. +The fields here vary by connector. +Refer to the documentation for each connector for details (refer to list of Elastic managed connectors, above). +Refer to <> for security considerations. +. Save your changes. + +Optionally choose *Edit sync schedule* to begin <>. + +This operation requires: + +. Access to {kib} +. Permission to create or manage the index +. `write` {ref}/security-privileges.html[indices privilege^] for the `.elastic-connectors` index +. `manage_api_key` {ref}/security-privileges.html[cluster privilege^] to allow the API key to be created +. `write_connector_secrets` {ref}/security-privileges.html[cluster privilege^] to allow the API key to be stored as a secret + +[discrete#es-native-connectors-management] +==== Manage connector + +To manage documents, syncs, sync rules, ingest pipelines, and other connector features, refer to <>. + +[discrete#es-native-connectors-manage-API-keys] +=== Manage API keys + +[NOTE] +==== +API keys for Elastic managed connectors were introduced in Elastic version *8.13.0*. +Managed connectors created in earlier versions will not automatically use API keys upon upgrading to *8.13.0*. +Refer to <> for more information. +==== + +Managed connectors communicate with Elasticsearch using API keys. +When managing a managed connector through the Connectors page in the {kib} UI, API key management is handled automatically. +API keys for Elastic managed connectors have the `manage` permission for three indices: + +. The attached index +. The access control (ACL) index used for document level security +. The internal `.elastic-connectors` index. + +Changing the attached index through {kib} will automatically invalidate the existing API key and generate a new one. +If you want to rotate an existing API key, navigate to the *Configuration* tab. +Scroll down to *Manage API key* and select *Generate API key*. +This action will invalidate the previous API key, create a new API key, and update the connector secret. + +API keys for Elastic managed connectors are stored on an internal system index called `.connector-secrets`. +This index can only be written to through API requests by users with the `write_connector-secrets` cluster privilege. +Only the Enterprise Search instance has permission to read from this index. + +Users managing Elastic managed connectors will need the `write_connector_secrets` cluster privilege assigned to their role. +Refer to <> for security considerations. + +[discrete#es-native-connectors-manage-API-keys-programmatically] +==== Manage API keys programmatically + +You can also create and store API keys programmatically. + +.*Expand* the following section for details. +[%collapsible] +=================================== +include::_connectors-create-native-api-key.asciidoc[] +=================================== + +[discrete#es-native-connectors-enabling-API-keys-for-upgraded-connectors] +==== Enabling API keys for upgraded connectors + +Managed connectors created before *8.13.0* do not initially have API keys upon upgrading. +The attached indices of these connectors cannot be changed until the connector has been converted to use an API key. + +.*Expand* the following section for steps on enabling API keys for upgraded Elastic managed connectors. +[%collapsible] +=================================== +. Run the following command in *Dev Tools* to enable API keys for the connector, replacing values where indicated. ++ +[source, console,subs="+attributes"] +---- +POST .elastic-connectors/_update/connector_id +{ + "doc": { + "features": { + "native_connector_api_keys": { + "enabled": true + } + } + } +} +---- +// TEST[skip:requires connector_id] ++ +. Go back to the Connectors page and navigate to the *Configuration* tab. +. Scroll down to *Manage API key* and select *Generate API key*. + +Your managed connector is now using API keys to authorize ingestion into Elasticsearch. +=================================== + +[discrete#es-native-connectors-usage-api] +=== Usage via API + +In 8.12 we introduced a set of {ref}/connector-apis.html[Connector APIs] to create and manage Elastic connectors and sync jobs, along with a https://github.com/elastic/connectors/blob/main/docs/CLI.md[CLI tool]. +Use these tools if you'd like to work with connectors and sync jobs programmatically. + +[discrete#es-native-connectors-example] +=== End-to-end example + +The following example demonstrates how to use a managed connector on Elastic Cloud: <>. + +[discrete#es-native-connectors-convert] +=== Convert a managed connector + +You can convert a managed connector to a self-managed connector to be run on your own infrastructure. +You'll find instructions in the UI on the connector index's overview page. + +[WARNING] +==== +Converting a managed connector to a self-managed connector is an irreversible operation! +==== diff --git a/docs/reference/connector/docs/connectors-management.asciidoc b/docs/reference/connector/docs/connectors-management.asciidoc new file mode 100644 index 0000000000000..77f8b32cb0b05 --- /dev/null +++ b/docs/reference/connector/docs/connectors-management.asciidoc @@ -0,0 +1,9 @@ +[#es-connectors-management] +== Management topics + +Refer to the following sections: + +* <> +* <> +* <> +* <> \ No newline at end of file diff --git a/docs/reference/connector/docs/connectors-mongodb.asciidoc b/docs/reference/connector/docs/connectors-mongodb.asciidoc new file mode 100644 index 0000000000000..8e62437507009 --- /dev/null +++ b/docs/reference/connector/docs/connectors-mongodb.asciidoc @@ -0,0 +1,777 @@ +[#es-connectors-mongodb] +=== Elastic MongoDB connector reference +++++ +MongoDB +++++ +// Attributes used in this file +:service-name: MongoDB +:service-name-stub: mongodb + +The _Elastic MongoDB connector_ is a <> for https://www.mongodb.com[MongoDB^] data sources. +This connector is written in Python using the {connectors-python}[Elastic connector framework^]. + +View the {connectors-python}/connectors/sources/{service-name-stub}.py[*source code* for this connector^] (branch _{connectors-branch}_, compatible with Elastic _{minor-version}_). + +.Choose your connector reference +******************************* +Are you using a managed connector on Elastic Cloud or a self-managed connector? Expand the documentation based on your deployment method. +******************************* + +// //////// //// //// //// //// //// //// //////// +// //////// NATIVE CONNECTOR REFERENCE /////// +// //////// //// //// //// //// //// //// //////// + +[discrete#es-connectors-mongodb-native-connector-reference] +==== *Elastic managed connector reference* + +.View *Elastic managed connector* reference + +[%collapsible] +=============== + +[discrete#es-connectors-mongodb-prerequisites] +===== Availability and prerequisites + +This connector is available as a *managed connector* in Elastic versions *8.5.0 and later*. + +To use this connector natively in Elastic Cloud, satisfy all <>. + +[discrete#es-connectors-mongodb-compatibility] +===== Compatibility + +This connector is compatible with *MongoDB Atlas* and *MongoDB 3.6 and later*. + +The data source and your Elastic deployment must be able to communicate with each other over a network. + +[discrete#es-connectors-mongodb-configuration] +===== Configuration + +Each time you create an index to be managed by this connector, you will create a new connector configuration. +You will need some or all of the following information about the data source. + +Server hostname:: +The URI of the MongoDB host. +Examples: ++ +* `mongodb+srv://my_username:my_password@cluster0.mongodb.net/mydb?w=majority` +* `mongodb://127.0.0.1:27017` + +Username:: +The MongoDB username the connector will use. ++ +The user must have access to the configured database and collection. +You may want to create a dedicated, read-only user for each connector. + +Password:: +The MongoDB password the connector will use. + +Database:: +The MongoDB database to sync. +The database must be accessible using the configured username and password. + +Collection:: +The MongoDB collection to sync. +The collection must exist within the configured database. +The collection must be accessible using the configured username and password. + +Direct connection:: +Toggle to use the https://www.mongodb.com/docs/ruby-driver/current/reference/create-client/#direct-connection[direct connection option for the MongoDB client^]. +Disabled by default. + +SSL/TLS Connection:: +Toggle to establish a secure connection to the MongoDB server using SSL/TLS encryption. +Ensure that your MongoDB deployment supports SSL/TLS connections. +*Enable* if your MongoDB cluster uses DNS SRV records (namely MongoDB Atlas users). ++ +Disabled by default. + +Certificate Authority (.pem):: +Specifies the root certificate from the Certificate Authority. +The value of the certificate is used to validate the certificate presented by the MongoDB instance. +[TIP] +==== +Atlas users can leave this blank because https://www.mongodb.com/docs/atlas/reference/faq/security/#which-certificate-authority-signs-mongodb-atlas-tls-certificates-[Atlas uses a widely trusted root CA]. +==== + +Skip certificate verification:: +Skips various certificate validations (if SSL is enabled). +Disabled by default. +[NOTE] +==== +We strongly recommend leaving this option disabled in production environments. +==== + +[discrete#es-connectors-mongodb-create-native-connector] +===== Create a {service-name} connector +include::_connectors-create-native.asciidoc[] + +[discrete#es-connectors-mongodb-usage] +===== Usage + +To use this connector as a *managed connector*, use the *Connector* workflow. +See <>. + +For additional operations, see <>. + +[discrete#es-connectors-mongodb-example] +===== Example + +An example is available for this connector. +See <>. + +[discrete#es-connectors-mongodb-known-issues] +===== Known issues + +[discrete#es-connectors-mongodb-known-issues-ssl-tls-812] +====== SSL must be enabled for MongoDB Atlas + +* A bug introduced in *8.12.0* causes the connector to fail to sync Mongo *Atlas* urls (`mongo+srv`) unless SSL/TLS is enabled. +// https://github.com/elastic/sdh-enterprise-search/issues/1283#issuecomment-1919731668 + +[discrete#es-connectors-mongodb-known-issues-expressions-and-variables-in-aggregation-pipelines] +====== Expressions and variables in aggregation pipelines + +It's not possible to use expressions like `new Date()` inside an aggregation pipeline. +These expressions won't be evaluated by the underlying MongoDB client, but will be passed as a string to the MongoDB instance. +A possible workaround is to use https://www.mongodb.com/docs/manual/reference/aggregation-variables/[aggregation variables]. + +Incorrect (`new Date()` will be interpreted as string): +[source,js] +---- +{ + "aggregate": { + "pipeline": [ + { + "$match": { + "expiresAt": { + "$gte": "new Date()" + } + } + } + ] + } +} +---- +// NOTCONSOLE + +Correct (usage of https://www.mongodb.com/docs/manual/reference/aggregation-variables/#mongodb-variable-variable.NOW[$$NOW]): +[source,js] +---- +{ + "aggregate": { + "pipeline": [ + { + "$addFields": { + "current_date": { + "$toDate": "$$NOW" + } + } + }, + { + "$match": { + "$expr": { + "$gte": [ + "$expiresAt", + "$current_date" + ] + } + } + } + ] + } +} +---- +// NOTCONSOLE + +[discrete#es-connectors-mongodb-known-issues-tls-with-invalid-cert] +====== Connecting with self-signed or custom CA TLS Cert + +Currently, the MongoDB connector does not support working with self-signed or custom CA certs when connecting to your self-managed MongoDB host. + +[WARNING] +==== +The following workaround should not be used in production. +==== + +This can be worked around in development environments, by appending certain query parameters to the configured host. + +For example, if your host is `mongodb+srv://my.mongo.host.com`, appending `?tls=true&tlsAllowInvalidCertificates=true` will allow disabling TLS certificate verification. + +The full host in this example will look like this: + +`mongodb+srv://my.mongo.host.com/?tls=true&tlsAllowInvalidCertificates=true` + +See <> for any issues affecting all connectors. + +[discrete#es-connectors-mongodb-troubleshooting] +===== Troubleshooting + +See <>. + +[discrete#es-connectors-mongodb-security] +===== Security + +See <>. + +[discrete#es-connectors-mongodb-syncs] +===== Documents and syncs + +The following describes the default syncing behavior for this connector. +Use <> and {ref}/ingest-pipeline-search.html[ingest pipelines] to customize syncing for specific indices. + +All documents in the configured MongoDB database and collection are extracted and transformed into documents in your Elasticsearch index. + +* The connector creates one *Elasticsearch document* for each MongoDB document in the configured database and collection. +* For each document, the connector transforms each MongoDB field into an *Elasticsearch field*. +* For each field, Elasticsearch {ref}/dynamic-mapping.html[dynamically determines the *data type*^]. + +This results in Elasticsearch documents that closely match the original MongoDB documents. + +The Elasticsearch mapping is created when the first document is created. + +Each sync is a "full" sync. +For each MongoDB document discovered: + +* If it does not exist, the document is created in Elasticsearch. +* If it already exists in Elasticsearch, the Elasticsearch document is replaced and the version is incremented. +* If an existing Elasticsearch document no longer exists in the MongoDB collection, it is deleted from Elasticsearch. +* Embedded documents are stored as an `object` field in the parent document. + +This is recursive, because embedded documents can themselves contain embedded documents. + +[NOTE] +==== +* Files bigger than 10 MB won't be extracted +* Permissions are not synced. All documents indexed to an Elastic deployment will be visible to *all users with access* to that Elastic Deployment. +==== + +[discrete#es-connectors-mongodb-sync-rules] +===== Sync rules + +The following sections describe <> for this connector. + +<> are identical for all connectors and are available by default. + +<> for MongoDB can be used to express either `find` queries or aggregation pipelines. +They can also be used to tune options available when issuing these queries/pipelines. + +[discrete#es-connectors-mongodb-sync-rules-find] +====== `find` queries + +[NOTE] +==== +You must create a https://www.mongodb.com/docs/current/core/indexes/index-types/index-text/[text index^] on the MongoDB collection in order to perform text searches. +==== + +For `find` queries, the structure of this JSON DSL should look like: + +[source,js] +---- +{ + "find":{ + "filter": { + // find query goes here + }, + "options":{ + // query options go here + } + } +} + +---- +// NOTCONSOLE + +For example: + +[source,js] +---- +{ + "find": { + "filter": { + "$text": { + "$search": "garden", + "$caseSensitive": false + } + }, + "skip": 10, + "limit": 1000 + } +} +---- +// NOTCONSOLE + +`find` queries also support additional options, for example the `projection` object: + +[source,js] +---- +{ + "find": { + "filter": { + "languages": [ + "English" + ], + "runtime": { + "$gt":90 + } + }, + "projection":{ + "tomatoes": 1 + } + } +} +---- +// NOTCONSOLE +Where the available options are: + +* `allow_disk_use` (true, false) — When set to true, the server can write temporary data to disk while executing the find operation. This option is only available on MongoDB server versions 4.4 and newer. +* `allow_partial_results` (true, false) — Allows the query to get partial results if some shards are down. +* `batch_size` (Integer) — The number of documents returned in each batch of results from MongoDB. +* `filter` (Object) — The filter criteria for the query. +* `limit` (Integer) — The max number of docs to return from the query. +* `max_time_ms` (Integer) — The maximum amount of time to allow the query to run, in milliseconds. +* `no_cursor_timeout` (true, false) — The server normally times out idle cursors after an inactivity period (10 minutes) to prevent excess memory use. Set this option to prevent that. +* `projection` (Array, Object) — The fields to include or exclude from each doc in the result set. If an array, it should have at least one item. +* `return_key` (true, false) — Return index keys rather than the documents. +* `show_record_id` (true, false) — Return the `$recordId` for each doc in the result set. +* `skip` (Integer) — The number of docs to skip before returning results. + +[discrete#es-connectors-mongodb-sync-rules-aggregation] +====== Aggregation pipelines + +Similarly, for aggregation pipelines, the structure of the JSON DSL should look like: + +[source,js] +---- +{ + "aggregate":{ + "pipeline": [ + // pipeline elements go here + ], + "options": { + // pipeline options go here + } + } +} +---- +// NOTCONSOLE + +Where the available options are: + +* `allowDiskUse` (true, false) — Set to true if disk usage is allowed during the aggregation. +* `batchSize` (Integer) — The number of documents to return per batch. +* `bypassDocumentValidation` (true, false) — Whether or not to skip document level validation. +* `collation` (Object) — The collation to use. +* `comment` (String) — A user-provided comment to attach to this command. +* `hint` (String) — The index to use for the aggregation. +* `let` (Object) — Mapping of variables to use in the pipeline. See the server documentation for details. +* `maxTimeMs` (Integer) — The maximum amount of time in milliseconds to allow the aggregation to run. + +[discrete#es-connectors-mongodb-migration-from-ruby] +===== Migrating from the Ruby connector framework + +As part of the 8.8.0 release the MongoDB connector was moved from the {connectors-python}[Ruby connectors framework^] to the {connectors-python}[Elastic connector framework^]. + +This change introduces minor formatting modifications to data ingested from MongoDB: + +1. Nested object id field name has changed from "_id" to "id". For example, if you had a field "customer._id", this will now be named "customer.id". +2. Date format has changed from `YYYY-MM-DD'T'HH:mm:ss.fff'Z'` to `YYYY-MM-DD'T'HH:mm:ss` + +If your MongoDB connector stopped working after migrating from 8.7.x to 8.8.x, read the workaround outlined in <>. +If that does not work, we recommend deleting the search index attached to this connector and re-creating a MongoDB connector from scratch. + + +// Closing the collapsible section +=============== + + +// //////// //// //// //// //// //// //// //////// +// //////// CONNECTOR CLIENT REFERENCE /////// +// //////// //// //// //// //// //// //// //////// + +[discrete#es-connectors-mongodb-connector-client-reference] +==== *Self-managed connector* + +.View *self-managed connector* reference + +[%collapsible] +=============== + +[discrete#es-connectors-mongodb-client-prerequisites] +===== Availability and prerequisites + +This connector is also available as a *self-managed connector* from the *Elastic connector framework*. +To use this connector as a self-managed connector, satisfy all <>. + +[discrete#es-connectors-mongodb-client-compatibility] +===== Compatibility + +This connector is compatible with *MongoDB Atlas* and *MongoDB 3.6 and later*. + +The data source and your Elastic deployment must be able to communicate with each other over a network. + +[discrete#es-connectors-mongodb-client-configuration] +===== Configuration + +[TIP] +==== +When using the <> workflow, initially these fields will use the default configuration set in the {connectors-python}/connectors/sources/jira.py[connector source code^]. +These are set in the `get_default_configuration` function definition. + +These configurable fields will be rendered with their respective *labels* in the Kibana UI. +Once connected, you'll be able to update these values in Kibana. +==== + +The following configuration fields are required to set up the connector: + +`host`:: +The URI of the MongoDB host. +Examples: ++ +* `mongodb+srv://my_username:my_password@cluster0.mongodb.net/mydb?w=majority` +* `mongodb://127.0.0.1:27017` + + +`user`:: +The MongoDB username the connector will use. ++ +The user must have access to the configured database and collection. +You may want to create a dedicated, read-only user for each connector. + +`password`:: +The MongoDB password the connector will use. + +[NOTE] +==== +Anonymous authentication is supported for _testing purposes only_, but should not be used in production. +Omit the username and password, to use default values. +==== + +`database`:: +The MongoDB database to sync. +The database must be accessible using the configured username and password. + +`collection`:: +The MongoDB collection to sync. +The collection must exist within the configured database. +The collection must be accessible using the configured username and password. + +`direct_connection`:: +Whether to use the https://www.mongodb.com/docs/ruby-driver/current/reference/create-client/#direct-connection[direct connection option for the MongoDB client^]. +Default value is `False`. + +`ssl_enabled`:: +Whether to establish a secure connection to the MongoDB server using SSL/TLS encryption. +Ensure that your MongoDB deployment supports SSL/TLS connections. +*Enable* if your MongoDB cluster uses DNS SRV records (namely MongoDB Atlas users). ++ +Default value is `False`. + +`ssl_ca`:: +Specifies the root certificate from the Certificate Authority. +The value of the certificate is used to validate the certificate presented by the MongoDB instance. +[TIP] +==== +Atlas users can leave this blank because https://www.mongodb.com/docs/atlas/reference/faq/security/#which-certificate-authority-signs-mongodb-atlas-tls-certificates-[Atlas uses a widely trusted root CA]. +==== + +`tls_insecure`:: +Skips various certificate validations (if SSL is enabled). +Default value is `False`. +[NOTE] +==== +We strongly recommend leaving this option disabled in production environments. +==== + +[discrete#es-connectors-mongodb-create-connector-client] +===== Create a {service-name} connector +include::_connectors-create-client.asciidoc[] + +[discrete#es-connectors-mongodb-client-usage] +===== Usage + +To use this connector as a *self-managed connector*, see <> +For additional usage operations, see <>. + +[discrete#es-connectors-mongodb-client-example] +===== Example + +An example is available for this connector. +See <>. + +[discrete#es-connectors-mongodb-client-known-issues] +===== Known issues + +[discrete#es-connectors-mongodb-client-known-issues-ssl-tls-812] +====== SSL must be enabled for MongoDB Atlas + +* A bug introduced in *8.12.0* causes the connector to fail to sync Mongo *Atlas* urls (`mongo+srv`) unless SSL/TLS is enabled. +// https://github.com/elastic/sdh-enterprise-search/issues/1283#issuecomment-1919731668 + + +[discrete#es-connectors-mongodb-client-known-issues-expressions-and-variables-in-aggregation-pipelines] +====== Expressions and variables in aggregation pipelines + +It's not possible to use expressions like `new Date()` inside an aggregation pipeline. +These expressions won't be evaluated by the underlying MongoDB client, but will be passed as a string to the MongoDB instance. +A possible workaround is to use https://www.mongodb.com/docs/manual/reference/aggregation-variables/[aggregation variables]. + +Incorrect (`new Date()` will be interpreted as string): +[source,js] +---- +{ + "aggregate": { + "pipeline": [ + { + "$match": { + "expiresAt": { + "$gte": "new Date()" + } + } + } + ] + } +} +---- +// NOTCONSOLE + +Correct (usage of https://www.mongodb.com/docs/manual/reference/aggregation-variables/#mongodb-variable-variable.NOW[$$NOW]): +[source,js] +---- +{ + "aggregate": { + "pipeline": [ + { + "$addFields": { + "current_date": { + "$toDate": "$$NOW" + } + } + }, + { + "$match": { + "$expr": { + "$gte": [ + "$expiresAt", + "$current_date" + ] + } + } + } + ] + } +} +---- +// NOTCONSOLE + +[discrete#es-connectors-mongodb-client-known-issues-tls-with-invalid-cert] +====== Connecting with self-signed or custom CA TLS Cert + +Currently, the MongoDB connector does not support working with self-signed or custom CA certs when connecting to your self-managed MongoDB host. + +[WARNING] +==== +The following workaround should not be used in production. +==== + +This can be worked around in development environments, by appending certain query parameters to the configured host. + +For example, if your host is `mongodb+srv://my.mongo.host.com`, appending `?tls=true&tlsAllowInvalidCertificates=true` will allow disabling TLS certificate verification. + +The full host in this example will look like this: + +`mongodb+srv://my.mongo.host.com/?tls=true&tlsAllowInvalidCertificates=true` + +[discrete#es-connectors-mongodb-known-issues-docker-image-fails] +====== Docker image errors out for versions 8.12.0 and 8.12.1 + +A bug introduced in *8.12.0* causes the Connectors docker image to error out if run using MongoDB as its source. +The command line will output the error `cannot import name 'coroutine' from 'asyncio'`. +** This issue is fixed in versions *8.12.2* and *8.13.0*. +** This bug does not affect Elastic managed connectors. + +See <> for any issues affecting all connectors. + +[discrete#es-connectors-mongodb-client-troubleshooting] +===== Troubleshooting + +See <>. + +[discrete#es-connectors-mongodb-client-security] +===== Security + +See <>. + +[discrete#es-connectors-mongodb-client-docker] +===== Deployment using Docker + +include::_connectors-docker-instructions.asciidoc[] + +[discrete#es-connectors-mongodb-client-syncs] +===== Documents and syncs + +The following describes the default syncing behavior for this connector. +Use <> and {ref}/ingest-pipeline-search.html[ingest pipelines] to customize syncing for specific indices. + +All documents in the configured MongoDB database and collection are extracted and transformed into documents in your Elasticsearch index. + +* The connector creates one *Elasticsearch document* for each MongoDB document in the configured database and collection. +* For each document, the connector transforms each MongoDB field into an *Elasticsearch field*. +* For each field, Elasticsearch {ref}/dynamic-mapping.html[dynamically determines the *data type*^]. + +This results in Elasticsearch documents that closely match the original MongoDB documents. + +The Elasticsearch mapping is created when the first document is created. + +Each sync is a "full" sync. +For each MongoDB document discovered: + +* If it does not exist, the document is created in Elasticsearch. +* If it already exists in Elasticsearch, the Elasticsearch document is replaced and the version is incremented. +* If an existing Elasticsearch document no longer exists in the MongoDB collection, it is deleted from Elasticsearch. +* Embedded documents are stored as an `object` field in the parent document. + +This is recursive, because embedded documents can themselves contain embedded documents. + +[NOTE] +==== +* Files bigger than 10 MB won't be extracted +* Permissions are not synced. All documents indexed to an Elastic deployment will be visible to *all users with access* to that Elastic Deployment. +==== + +[discrete#es-connectors-mongodb-client-sync-rules] +===== Sync rules + +The following sections describe <> for this connector. + +<> are identical for all connectors and are available by default. + +<> for MongoDB can be used to express either `find` queries or aggregation pipelines. +They can also be used to tune options available when issuing these queries/pipelines. + +[discrete#es-connectors-mongodb-client-sync-rules-find] +====== `find` queries + +[NOTE] +==== +You must create a https://www.mongodb.com/docs/current/core/indexes/index-types/index-text/[text index^] on the MongoDB collection in order to perform text searches. +==== + +For `find` queries, the structure of this JSON DSL should look like: + +[source,js] +---- +{ + "find":{ + "filter": { + // find query goes here + }, + "options":{ + // query options go here + } + } +} + +---- +// NOTCONSOLE + +For example: + +[source,js] +---- +{ + "find": { + "filter": { + "$text": { + "$search": "garden", + "$caseSensitive": false + } + }, + "skip": 10, + "limit": 1000 + } +} +---- +// NOTCONSOLE + +`find` queries also support additional options, for example the `projection` object: + +[source,js] +---- +{ + "find": { + "filter": { + "languages": [ + "English" + ], + "runtime": { + "$gt":90 + } + }, + "projection":{ + "tomatoes": 1 + } + } +} +---- +// NOTCONSOLE +Where the available options are: + +* `allow_disk_use` (true, false) — When set to true, the server can write temporary data to disk while executing the find operation. This option is only available on MongoDB server versions 4.4 and newer. +* `allow_partial_results` (true, false) — Allows the query to get partial results if some shards are down. +* `batch_size` (Integer) — The number of documents returned in each batch of results from MongoDB. +* `filter` (Object) — The filter criteria for the query. +* `limit` (Integer) — The max number of docs to return from the query. +* `max_time_ms` (Integer) — The maximum amount of time to allow the query to run, in milliseconds. +* `no_cursor_timeout` (true, false) — The server normally times out idle cursors after an inactivity period (10 minutes) to prevent excess memory use. Set this option to prevent that. +* `projection` (Array, Object) — The fields to include or exclude from each doc in the result set. If an array, it should have at least one item. +* `return_key` (true, false) — Return index keys rather than the documents. +* `show_record_id` (true, false) — Return the `$recordId` for each doc in the result set. +* `skip` (Integer) — The number of docs to skip before returning results. + +[discrete#es-connectors-mongodb-client-sync-rules-aggregation] +====== Aggregation pipelines + +Similarly, for aggregation pipelines, the structure of the JSON DSL should look like: + +[source,js] +---- +{ + "aggregate":{ + "pipeline": [ + // pipeline elements go here + ], + "options": { + // pipeline options go here + } + } +} +---- +// NOTCONSOLE + +Where the available options are: + +* `allowDiskUse` (true, false) — Set to true if disk usage is allowed during the aggregation. +* `batchSize` (Integer) — The number of documents to return per batch. +* `bypassDocumentValidation` (true, false) — Whether or not to skip document level validation. +* `collation` (Object) — The collation to use. +* `comment` (String) — A user-provided comment to attach to this command. +* `hint` (String) — The index to use for the aggregation. +* `let` (Object) — Mapping of variables to use in the pipeline. See the server documentation for details. +* `maxTimeMs` (Integer) — The maximum amount of time in milliseconds to allow the aggregation to run. + +[discrete#es-connectors-mongodb-client-migration-from-ruby] +===== Migrating from the Ruby connector framework + +As part of the 8.8.0 release the MongoDB connector was moved from the {connectors-python}[Ruby connectors framework^] to the {connectors-python}[Elastic connector framework^]. + +This change introduces minor formatting modifications to data ingested from MongoDB: + +1. Nested object id field name has changed from "_id" to "id". For example, if you had a field "customer._id", this will now be named "customer.id". +2. Date format has changed from `YYYY-MM-DD'T'HH:mm:ss.fff'Z'` to `YYYY-MM-DD'T'HH:mm:ss` + +If your MongoDB connector stopped working after migrating from 8.7.x to 8.8.x, read the workaround outlined in <>. +If that does not work, we recommend deleting the search index attached to this connector and re-creating a MongoDB connector from scratch. + + +// Closing the collapsible section +=============== diff --git a/docs/reference/connector/docs/connectors-ms-sql.asciidoc b/docs/reference/connector/docs/connectors-ms-sql.asciidoc new file mode 100644 index 0000000000000..5b6b74e86e621 --- /dev/null +++ b/docs/reference/connector/docs/connectors-ms-sql.asciidoc @@ -0,0 +1,590 @@ +[#es-connectors-ms-sql] +=== Elastic Microsoft SQL connector reference +++++ +Microsoft SQL +++++ +// Attributes used in this file: +:service-name: Microsoft SQL +:service-name-stub: mssql + +The _Elastic Microsoft SQL connector_ is a <> for https://learn.microsoft.com/en-us/sql/[Microsoft SQL^] databases. +This connector is written in Python using the {connectors-python}[Elastic connector framework^]. + +View the {connectors-python}/connectors/sources/{service-name-stub}.py[*source code* for this connector^] (branch _{connectors-branch}_, compatible with Elastic _{minor-version}_). + +.Choose your connector reference +******************************* +Are you using a managed connector on Elastic Cloud or a self-managed connector? Expand the documentation based on your deployment method. +******************************* + +// //////// //// //// //// //// //// //// //////// +// //////// NATIVE CONNECTOR REFERENCE /////// +// //////// //// //// //// //// //// //// //////// + +[discrete#es-connectors-ms-sql-native-connector-reference] +==== *Elastic managed connector reference* + +.View *Elastic managed connector* reference + +[%collapsible] +=============== + +[discrete#es-connectors-ms-sql-availability-prerequisites] +===== Availability and prerequisites + +This connector is available as a *managed connector* in Elastic versions *8.8.0 and later*. +To use this connector natively in Elastic Cloud, satisfy all <>. + +[discrete#es-connectors-{service_type}-create-native-connector] +===== Create a {service-name} connector +include::_connectors-create-native.asciidoc[] + +[discrete#es-connectors-ms-sql-usage] +===== Usage + +To use this connector as a *managed connector*, use the *Connector* workflow. +See <>. + +Users require the `sysadmin` server role. + +For additional operations, see <>. + +[discrete#es-connectors-ms-sql-compatability] +===== Compatibility + +The following are compatible with Elastic connector frameworks: + +* Microsoft SQL Server versions 2017, 2019 +* Azure SQL +* Amazon RDS for SQL Server + +[discrete#es-connectors-ms-sql-configuration] +===== Configuration + +The following configuration fields are required to set up the connector: + +Host:: +The server host address where the Microsoft SQL Server is hosted. +Default value is `127.0.0.1`. +Examples: ++ +* `192.158.1.38` +* `demo.instance.demo-region.demo.service.com` + +Port:: +The port where the Microsoft SQL Server is hosted. Default value is `1433`. + +Username:: +The username of the account for Microsoft SQL Server. + +Password:: +The password of the account to be used for the Microsoft SQL Server. + +Database:: +Name of the Microsoft SQL Server database. +Examples: ++ +* `employee_database` +* `customer_database` + +Comma-separated list of tables:: +List of tables, separated by commas. +The Microsoft SQL connector will fetch data from all tables present in the configured database, if the value is `*` . +Default value is `*`. +Examples: ++ +* `table_1, table_2` +* `*` ++ +[WARNING] +==== +This field can be bypassed by advanced sync rules. +==== + +Schema:: +Name of the Microsoft SQL Server schema. +Default value is `dbo`. ++ +Examples: ++ +* `dbo` +* `custom_schema` + +Enable SSL:: +Toggle to enable SSL verification. +Default value is `False`. + +SSL certificate:: +Content of SSL certificate. +If SSL is disabled, the `ssl_ca` value will be ignored. ++ +.*Expand* to see an example certificate +[%collapsible] +==== +``` +-----BEGIN CERTIFICATE----- +MIID+jCCAuKgAwIBAgIGAJJMzlxLMA0GCSqGSIb3DQEBCwUAMHoxCzAJBgNVBAYT +... +7RhLQyWn2u00L7/9Omw= +-----END CERTIFICATE----- +``` +==== + +Validate host:: +Toggle to enable host validation. +Default value is `False`. + +[discrete#es-connectors-ms-sql-documents-syncs] +===== Documents and syncs + +* Tables with no primary key defined are skipped. +* If the `last_user_update` of `sys.dm_db_index_usage_stats` table is not available for a specific table and database then all data in that table will be synced. + +[NOTE] +==== +* Files bigger than 10 MB won't be extracted. +* Permissions are not synced. +**All documents** indexed to an Elastic deployment will be visible to **all users with access** to that Elastic Deployment. +==== + +[discrete#es-connectors-ms-sql-sync-rules] +===== Sync rules + +<> are identical for all connectors and are available by default. +For more information read <>. + +[discrete#es-connectors-ms-sql-sync-rules-advanced] +====== Advanced sync rules + +This connector supports <> for remote filtering. +These rules cover complex query-and-filter scenarios that cannot be expressed with basic sync rules. +Advanced sync rules are defined through a source-specific DSL JSON snippet. + +[NOTE] +==== +A <> is required for advanced sync rules to take effect. +==== + +Here are a few examples of advanced sync rules for this connector. + +.*Expand* to see example data +[%collapsible] +==== + +*`employee` table* + +[cols="3*", options="header"] +|=== +| emp_id | name | age +| 3 | John | 28 +| 10 | Jane | 35 +| 14 | Alex | 22 +|=== + +* +*`customer` table* + +[cols="3*", options="header"] +|=== +| c_id | name | age +| 2 | Elm | 24 +| 6 | Pine | 30 +| 9 | Oak | 34 +|=== +==== + +[discrete#es-connectors-ms-sql-sync-rules-advanced-queries] +======= Example: Two queries + +These rules fetch all records from both the `employee` and `customer` tables. The data from these tables will be synced separately to Elasticsearch. + +[source,js] +---- +[ + { + "tables": [ + "employee" + ], + "query": "SELECT * FROM employee" + }, + { + "tables": [ + "customer" + ], + "query": "SELECT * FROM customer" + } +] +---- +// NOTCONSOLE + +[discrete#es-connectors-ms-sql-sync-rules-example-one-where] +======= Example: One WHERE query + +This rule fetches only the records from the `employee` table where the `emp_id` is greater than 5. Only these filtered records will be synced to Elasticsearch. + +[source,js] +---- +[ + { + "tables": ["employee"], + "query": "SELECT * FROM employee WHERE emp_id > 5" + } +] +---- +// NOTCONSOLE + +[discrete#es-connectors-ms-sql-sync-rules-example-one-join] +======= Example: One JOIN query + +This rule fetches records by performing an INNER JOIN between the `employee` and `customer` tables on the condition that the `emp_id` in `employee` matches the `c_id` in `customer`. The result of this combined data will be synced to Elasticsearch. + +[source,js] +---- +[ + { + "tables": ["employee", "customer"], + "query": "SELECT * FROM employee INNER JOIN customer ON employee.emp_id = customer.c_id" + } +] +---- +// NOTCONSOLE + +[WARNING] +==== +When using advanced rules, a query can bypass the configuration field `tables`. +This will happen if the query specifies a table that doesn't appear in the configuration. +This can also happen if the configuration specifies `*` to fetch all tables while the advanced sync rule requests for only a subset of tables. +==== + +[discrete#es-connectors-ms-sql-known-issues] +===== Known issues + +There are no known issues for this connector. +See <> for any issues affecting all connectors. + +[discrete#es-connectors-ms-sql-troubleshooting] +===== Troubleshooting + +See <>. + +[discrete#es-connectors-ms-sql-security] +===== Security + +See <>. + + + + +This connector uses the https://github.com/elastic/connectors/blob/{branch}/connectors/sources/generic_database.py[generic database connector source code^] (branch _{connectors-branch}_, compatible with Elastic _{minor-version}_). + +View {connectors-python}/connectors/sources/mssql.py[additional code specific to this data source^] (branch _{connectors-branch}_, compatible with Elastic _{minor-version}_). + + +// Closing the collapsible section +=============== + + +// //////// //// //// //// //// //// //// //////// +// //////// CONNECTOR CLIENT REFERENCE /////// +// //////// //// //// //// //// //// //// //////// + +[discrete#es-connectors-ms-sql-connector-client-reference] +==== *Self-managed connector* + +.View *self-managed connector* reference + +[%collapsible] +=============== + +[discrete#es-connectors-ms-sql-client-availability-prerequisites] +===== Availability and prerequisites + +This connector is available as a self-managed *self-managed connector*. +To use this connector, satisfy all <>. + +[discrete#es-connectors-{service_type}-create-connector-client] +===== Create a {service-name} connector +include::_connectors-create-client.asciidoc[] + +[discrete#es-connectors-ms-sql-client-usage] +===== Usage + +Users require the `sysadmin` server role. + +To use this connector as a *self-managed connector*, see <> +For additional usage operations, see <>. + +[discrete#es-connectors-ms-sql-client-compatability] +===== Compatibility + +The following are compatible with Elastic connector frameworks: + +* Microsoft SQL Server versions 2017, 2019 +* Azure SQL +* Amazon RDS for SQL Server + +[discrete#es-connectors-ms-sql-client-configuration] +===== Configuration + +[TIP] +==== +When using the <> workflow, initially these fields will use the default configuration set in the {connectors-python}/connectors/sources/generic_database.py[connector source code^]. +Note that this data source uses the `generic_database.py` connector source code. + +Refer to {connectors-python}/connectors/sources/mssql.py[`mssql.py`^] for additional code, specific to this data source. +These configurable fields will be rendered with their respective *labels* in the Kibana UI. +Once connected, users will be able to update these values in Kibana. +==== + +The following configuration fields are required to set up the connector: + +`host`:: +The server host address where the Microsoft SQL Server is hosted. +Default value is `127.0.0.1`. +Examples: ++ +* `192.158.1.38` +* `demo.instance.demo-region.demo.service.com` + +`port`:: +The port where the Microsoft SQL Server is hosted. Default value is `9090`. + +`username`:: +The username of the account for Microsoft SQL Server. + +`password`:: +The password of the account to be used for the Microsoft SQL Server. + +`database`:: +Name of the Microsoft SQL Server database. +Examples: ++ +* `employee_database` +* `customer_database` + +`tables`:: +Comma-separated list of tables. +The Microsoft SQL connector will fetch data from all tables present in the configured database, if the value is `*` . +Default value is `*`. +Examples: ++ +* `table_1, table_2` +* `*` ++ +[WARNING] +==== +This field can be bypassed by advanced sync rules. +==== + +`fetch_size`:: +Rows fetched per request. + +`retry_count`:: +The number of retry attempts per failed request. + +`schema`:: +Name of the Microsoft SQL Server schema. +Default value is `dbo`. ++ +Examples: ++ +* `dbo` +* `custom_schema` + +`ssl_enabled`:: +SSL verification enablement. +Default value is `False`. + +`ssl_ca`:: +Content of SSL certificate. +If SSL is disabled, the `ssl_ca` value will be ignored. ++ +.*Expand* to see an example certificate +[%collapsible] +==== +``` +-----BEGIN CERTIFICATE----- +MIID+jCCAuKgAwIBAgIGAJJMzlxLMA0GCSqGSIb3DQEBCwUAMHoxCzAJBgNVBAYT +... +7RhLQyWn2u00L7/9Omw= +-----END CERTIFICATE----- +``` +==== + +`validate_host`:: +Host validation enablement. +Default value is `False`. + +[discrete#es-connectors-ms-sql-client-docker] +===== Deployment using Docker + +include::_connectors-docker-instructions.asciidoc[] + +[discrete#es-connectors-ms-sql-client-documents-syncs] +===== Documents and syncs + +* Tables with no primary key defined are skipped. +* If the `last_user_update` of `sys.dm_db_index_usage_stats` table is not available for a specific table and database then all data in that table will be synced. + +[NOTE] +==== +* Files bigger than 10 MB won't be extracted. +* Permissions are not synced. +**All documents** indexed to an Elastic deployment will be visible to **all users with access** to that Elastic Deployment. +==== + +[discrete#es-connectors-ms-sql-client-sync-rules] +===== Sync rules + +<> are identical for all connectors and are available by default. +For more information read <>. + +[discrete#es-connectors-ms-sql-client-sync-rules-advanced] +====== Advanced sync rules + +This connector supports <> for remote filtering. +These rules cover complex query-and-filter scenarios that cannot be expressed with basic sync rules. +Advanced sync rules are defined through a source-specific DSL JSON snippet. + +[NOTE] +==== +A <> is required for advanced sync rules to take effect. +==== + +Here are a few examples of advanced sync rules for this connector. + +.*Expand* to see example data +[%collapsible] +==== + +*`employee` table* + +[cols="3*", options="header"] +|=== +| emp_id | name | age +| 3 | John | 28 +| 10 | Jane | 35 +| 14 | Alex | 22 +|=== + +* +*`customer` table* + +[cols="3*", options="header"] +|=== +| c_id | name | age +| 2 | Elm | 24 +| 6 | Pine | 30 +| 9 | Oak | 34 +|=== +==== + +[discrete#es-connectors-ms-sql-client-sync-rules-advanced-queries] +======= Example: Two queries + +These rules fetch all records from both the `employee` and `customer` tables. The data from these tables will be synced separately to Elasticsearch. + +[source,js] +---- +[ + { + "tables": [ + "employee" + ], + "query": "SELECT * FROM employee" + }, + { + "tables": [ + "customer" + ], + "query": "SELECT * FROM customer" + } +] +---- +// NOTCONSOLE + +[discrete#es-connectors-ms-sql-client-sync-rules-example-one-where] +======= Example: One WHERE query + +This rule fetches only the records from the `employee` table where the `emp_id` is greater than 5. Only these filtered records will be synced to Elasticsearch. + +[source,js] +---- +[ + { + "tables": ["employee"], + "query": "SELECT * FROM employee WHERE emp_id > 5" + } +] +---- +// NOTCONSOLE + +[discrete#es-connectors-ms-sql-client-sync-rules-example-one-join] +======= Example: One JOIN query + +This rule fetches records by performing an INNER JOIN between the `employee` and `customer` tables on the condition that the `emp_id` in `employee` matches the `c_id` in `customer`. The result of this combined data will be synced to Elasticsearch. + +[source,js] +---- +[ + { + "tables": ["employee", "customer"], + "query": "SELECT * FROM employee INNER JOIN customer ON employee.emp_id = customer.c_id" + } +] +---- +// NOTCONSOLE + +[WARNING] +==== +When using advanced rules, a query can bypass the configuration field `tables`. +This will happen if the query specifies a table that doesn't appear in the configuration. +This can also happen if the configuration specifies `*` to fetch all tables while the advanced sync rule requests for only a subset of tables. +==== + +[discrete#es-connectors-ms-sql-client-client-operations-testing] +===== End-to-end testing + +The connector framework enables operators to run functional tests against a real data source. +Refer to <> for more details. + +To perform E2E testing for the Microsoft SQL connector, run the following command: + +[source,shell] +---- +make ftest NAME=mssql +---- + +For faster tests, add the `DATA_SIZE=small` flag: + +[source,shell] +---- +make ftest NAME=mssql DATA_SIZE=small +---- + +[discrete#es-connectors-ms-sql-client-known-issues] +===== Known issues + +There are no known issues for this connector. +See <> for any issues affecting all connectors. + +[discrete#es-connectors-ms-sql-client-troubleshooting] +===== Troubleshooting + +See <>. + +[discrete#es-connectors-ms-sql-client-security] +===== Security + +See <>. + + + + +This connector uses the https://github.com/elastic/connectors-python/blob/{branch}/connectors/sources/generic_database.py[generic database connector source code^] (branch _{connectors-branch}_, compatible with Elastic _{minor-version}_). + +View {connectors-python}/connectors/sources/mssql.py[additional code specific to this data source^] (branch _{connectors-branch}_, compatible with Elastic _{minor-version}_). + + +// Closing the collapsible section +=============== diff --git a/docs/reference/connector/docs/connectors-mysql.asciidoc b/docs/reference/connector/docs/connectors-mysql.asciidoc new file mode 100644 index 0000000000000..59a9724f1ffe4 --- /dev/null +++ b/docs/reference/connector/docs/connectors-mysql.asciidoc @@ -0,0 +1,538 @@ +[#es-connectors-mysql] +=== Elastic MySQL connector reference +++++ +MySQL +++++ +// Attributes used in this file: +:service-name: MySQL +:service-name-stub: mysql + +The _Elastic MySQL connector_ is a <> for https://www.mysql.com[MySQL^] data sources. +This connector is written in Python using the {connectors-python}[Elastic connector framework^]. + +View the {connectors-python}/connectors/sources/{service-name-stub}.py[*source code* for this connector^] (branch _{connectors-branch}_, compatible with Elastic _{minor-version}_). + +.Choose your connector reference +******************************* +Are you using a managed connector on Elastic Cloud or a self-managed connector? Expand the documentation based on your deployment method. +******************************* + +// //////// //// //// //// //// //// //// //////// +// //////// NATIVE CONNECTOR REFERENCE /////// +// //////// //// //// //// //// //// //// //////// + +[discrete#es-connectors-mysql-native-connector-reference] +==== *Elastic managed connector reference* + +.View *Elastic managed connector* reference + +[%collapsible] +=============== + +[discrete#es-connectors-mysql-prerequisites] +===== Availability and prerequisites + +This connector is available as a *managed connector* in Elastic versions *8.5.0 and later*. +To use this connector natively in Elastic Cloud, satisfy all <>. + +This connector has no additional prerequisites beyond the shared requirements, linked above. + +[discrete#es-connectors-mysql-compatibility] +===== Compatibility + +This connector is compatible with *MySQL 5.6 and later*. + +The connector is also compatible with *MariaDB* databases compatible with the above. + +The data source and your Elastic deployment must be able to communicate with each other over a network. + +[discrete#es-connectors-mysql-create-native-connector] +===== Create a {service-name} connector +include::_connectors-create-native.asciidoc[] + +[discrete#es-connectors-mysql-usage] +===== Usage + +To use this connector natively in Elastic Cloud, see <>. + +For additional operations, see <>. + +[discrete#es-connectors-mysql-configuration] +===== Configuration + +Each time you create an index to be managed by this connector, you will create a new connector configuration. +You will need some or all of the following information about the data source. + +Host:: +The IP address or domain name of the MySQL host, excluding port. +Examples: ++ +* `192.158.1.38` +* `localhost` + +Port:: +The port of the MySQL host. +Examples: ++ +* `3306` +* `3307` + +Username:: +The MySQL username the connector will use. ++ +The user must have access to the configured database. +You may want to create a dedicated, read-only user for each connector. + +Password:: +The MySQL password the connector will use. + +Database:: +The MySQL database to sync. +The database must be accessible using the configured username and password. ++ +Examples: ++ +* `products` +* `orders` + +Comma-separated list of tables:: +The tables in the configured database to sync. +One or more table names, separated by commas. +The tables must be accessible using the configured username and password. ++ +Examples: ++ +* `furniture, food, toys` +* `laptops` ++ +[TIP] +==== +This field can be bypassed when using advanced sync rules. +==== + +Enable SSL:: +Whether SSL verification will be enabled. +Default value is `True`. + +SSL Certificate:: +Content of SSL certificate. +If SSL is disabled, the SSL certificate value will be ignored. ++ +.*Expand* to see an example certificate +[%collapsible] +==== +``` +-----BEGIN CERTIFICATE----- +MIID+jCCAuKgAwIBAgIGAJJMzlxLMA0GCSqGSIb3DQEBCwUAMHoxCzAJBgNVBAYT +AlVTMQwwCgYDVQQKEwNJQk0xFjAUBgNVBAsTDURlZmF1bHROb2RlMDExFjAUBgNV +BAsTDURlZmF1bHRDZWxsMDExGTAXBgNVBAsTEFJvb3QgQ2VydGlmaWNhdGUxEjAQ +BgNVBAMTCWxvY2FsaG9zdDAeFw0yMTEyMTQyMjA3MTZaFw0yMjEyMTQyMjA3MTZa +MF8xCzAJBgNVBAYTAlVTMQwwCgYDVQQKEwNJQk0xFjAUBgNVBAsTDURlZmF1bHRO +b2RlMDExFjAUBgNVBAsTDURlZmF1bHRDZWxsMDExEjAQBgNVBAMTCWxvY2FsaG9z +dDCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBAMv5HCsJZIpI5zCy+jXV +z6lmzNc9UcVSEEHn86h6zT6pxuY90TYeAhlZ9hZ+SCKn4OQ4GoDRZhLPTkYDt+wW +CV3NTIy9uCGUSJ6xjCKoxClJmgSQdg5m4HzwfY4ofoEZ5iZQ0Zmt62jGRWc0zuxj +hegnM+eO2reBJYu6Ypa9RPJdYJsmn1RNnC74IDY8Y95qn+WZj//UALCpYfX41hko +i7TWD9GKQO8SBmAxhjCDifOxVBokoxYrNdzESl0LXvnzEadeZTd9BfUtTaBHhx6t +njqqCPrbTY+3jAbZFd4RiERPnhLVKMytw5ot506BhPrUtpr2lusbN5svNXjuLeea +MMUCAwEAAaOBoDCBnTATBgNVHSMEDDAKgAhOatpLwvJFqjAdBgNVHSUEFjAUBggr +BgEFBQcDAQYIKwYBBQUHAwIwVAYDVR0RBE0wS4E+UHJvZmlsZVVVSUQ6QXBwU3J2 +MDEtQkFTRS05MDkzMzJjMC1iNmFiLTQ2OTMtYWI5NC01Mjc1ZDI1MmFmNDiCCWxv +Y2FsaG9zdDARBgNVHQ4ECgQITzqhA5sO8O4wDQYJKoZIhvcNAQELBQADggEBAKR0 +gY/BM69S6BDyWp5dxcpmZ9FS783FBbdUXjVtTkQno+oYURDrhCdsfTLYtqUlP4J4 +CHoskP+MwJjRIoKhPVQMv14Q4VC2J9coYXnePhFjE+6MaZbTjq9WaekGrpKkMaQA +iQt5b67jo7y63CZKIo9yBvs7sxODQzDn3wZwyux2vPegXSaTHR/rop/s/mPk3YTS +hQprs/IVtPoWU4/TsDN3gIlrAYGbcs29CAt5q9MfzkMmKsuDkTZD0ry42VjxjAmk +xw23l/k8RoD1wRWaDVbgpjwSzt+kl+vJE/ip2w3h69eEZ9wbo6scRO5lCO2JM4Pr +7RhLQyWn2u00L7/9Omw= +-----END CERTIFICATE----- +``` +==== + +[discrete#es-connectors-mysql-known-issues] +===== Known issues + +This connector has the following known issues: + +* *Upgrading from a tech preview connector (8.7 or earlier) to 8.8 will cause the MySQL connector configuration to be invalid.* ++ +MySQL connectors prior to 8.8 can be missing some configuration fields that are required for the connector to run. +If you would like to continue using your MySQL connector after upgrading from 8.7 or earlier, run the script below to fix your connector's configuration. +This will populate the configuration with the missing fields. +The auxilliary information needed for the configuration will then be automatically added by by the self-managed connector. ++ +[source,console] +---- +POST /.elastic-connectors/_update/connector_id +{ + "doc" : { + "configuration": { + "tables": { + "type": "list", + "value": "*" + }, + "ssl_enabled": { + "type": "bool", + "value": false + }, + "ssl_ca": { + "type": "str", + "value": "" + }, + "fetch_size": { + "type": "int", + "value": 50 + }, + "retry_count": { + "type": "int", + "value": 3 + } + } + } +} +---- +// TEST[skip:TODO] ++ +* *Upgrading to 8.8 does not migrate MySQL sync rules.* ++ +After upgrading, you must re-create your sync rules. + +See <> for any issues affecting all connectors. + +[discrete#es-connectors-mysql-syncs] +===== Documents and syncs + +The following describes the default syncing behavior for this connector. +Use <> and {ref}/ingest-pipeline-search.html[ingest pipelines] to customize syncing for specific indices. + +All records in the MySQL database included in your connector configuration are extracted and transformed into documents in your Elasticsearch index. + +* For each row in your MySQL database table, the connector creates one *Elasticsearch document*. +* For each column, the connector transforms the column into an *Elasticsearch field*. +* Elasticsearch {ref}/dynamic-mapping.html[dynamically maps^] MySQL data types to *Elasticsearch data types*. +* Tables with no primary key defined are skipped. +* Field values that represent other records are replaced with the primary key for that record (composite primary keys are joined with `_`). + +The Elasticsearch mapping is created when the first document is created. + +Each sync is a "full" sync. + +For each MySQL row discovered: + +* If it does not exist, the document is created in Elasticsearch. +* If it already exists in Elasticsearch, the Elasticsearch document is replaced and the version is incremented. +* If an existing Elasticsearch document no longer exists in the MySQL table, it is deleted from Elasticsearch. + +[NOTE] +==== +* Files bigger than 10 MB won't be extracted +* Permissions are not synced by default. +*All documents* indexed to an Elastic deployment will be visible to *all users with access* to that Elastic Deployment. +==== + +[discrete#es-connectors-mysql-sync-rules] +===== Sync rules + +The following sections describe <> for this connector. + +<> are identical for all connectors and are available by default. + +<> for MySQL can be used to pass arbitrary SQL statements to a MySQL instance. + +[IMPORTANT] +==== +You need to specify the tables used in your custom query in the "tables" field. +==== + +For example: + +[source,js] +---- +[ + { + "tables": ["table1", "table2"], + "query": "SELECT ... FROM ..." + } +] +---- +// NOTCONSOLE + +[WARNING] +==== +When using advanced rules, a query can bypass the configuration field `tables`. +This will happen if the query specifies a table that doesn't appear in the configuration. +This can also happen if the configuration specifies `*` to fetch all tables while the advanced sync rule requests for only a subset of tables. +==== + +[discrete#es-connectors-mysql-troubleshooting] +===== Troubleshooting + +See <>. + +[discrete#es-connectors-mysql-security] +===== Security + +See <>. + +// Closing the collapsible section +=============== + + +// //////// //// //// //// //// //// //// //////// +// //////// CONNECTOR CLIENT REFERENCE /////// +// //////// //// //// //// //// //// //// //////// + +[discrete#es-connectors-mysql-connector-client-reference] +==== *Self-managed connector* + +.View *self-managed connector* reference + +[%collapsible] +=============== + +[discrete#es-connectors-mysql-client-prerequisites] +===== Availability and prerequisites + +This connector is available as a *managed connector* in Elastic versions *8.5.0 and later*. +To use this connector natively in Elastic Cloud, satisfy all <>. + +This connector is also available as a *self-managed connector* from the *Elastic connector framework*. +To use this connector as a self-managed connector, satisfy all <>. + +This connector has no additional prerequisites beyond the shared requirements, linked above. + +[discrete#es-connectors-mysql-create-connector-client] +===== Create a {service-name} connector +include::_connectors-create-client.asciidoc[] + +[discrete#es-connectors-mysql-client-usage] +===== Usage + +To use this connector as a *managed connector*, use the *Connector* workflow. +See <>. + +To use this connector as a *self-managed connector*, see <>. + +For additional operations, see <>. + +[discrete#es-connectors-mysql-client-compatibility] +===== Compatibility + +This connector is compatible with *MySQL 5.6 and later*. + +The connector is also compatible with *MariaDB* databases compatible with the above. + +The data source and your Elastic deployment must be able to communicate with each other over a network. + +[discrete#es-connectors-mysql-client-configuration] +===== Configuration + +Each time you create an index to be managed by this connector, you will create a new connector configuration. +You will need some or all of the following information about the data source. + +Host:: +The IP address or domain name of the MySQL host, excluding port. +Examples: ++ +* `192.158.1.38` +* `localhost` + +Port:: +The port of the MySQL host. +Examples: ++ +* `3306` +* `3307` + +Username:: +The MySQL username the connector will use. ++ +The user must have access to the configured database. +You may want to create a dedicated, read-only user for each connector. + +Password:: +The MySQL password the connector will use. + +Database:: +The MySQL database to sync. +The database must be accessible using the configured username and password. ++ +Examples: ++ +* `products` +* `orders` + +Tables:: +The tables in the configured database to sync. +One or more table names, separated by commas. +The tables must be accessible using the configured username and password. ++ +Examples: ++ +* `furniture, food, toys` +* `laptops` + +Enable SSL:: +Whether SSL verification will be enabled. +Default value is `True`. + +SSL Certificate:: +Content of SSL certificate. +If SSL is disabled, the SSL certificate value will be ignored. ++ +.*Expand* to see an example certificate +[%collapsible] +==== +``` +-----BEGIN CERTIFICATE----- +MIID+jCCAuKgAwIBAgIGAJJMzlxLMA0GCSqGSIb3DQEBCwUAMHoxCzAJBgNVBAYT +AlVTMQwwCgYDVQQKEwNJQk0xFjAUBgNVBAsTDURlZmF1bHROb2RlMDExFjAUBgNV +BAsTDURlZmF1bHRDZWxsMDExGTAXBgNVBAsTEFJvb3QgQ2VydGlmaWNhdGUxEjAQ +BgNVBAMTCWxvY2FsaG9zdDAeFw0yMTEyMTQyMjA3MTZaFw0yMjEyMTQyMjA3MTZa +MF8xCzAJBgNVBAYTAlVTMQwwCgYDVQQKEwNJQk0xFjAUBgNVBAsTDURlZmF1bHRO +b2RlMDExFjAUBgNVBAsTDURlZmF1bHRDZWxsMDExEjAQBgNVBAMTCWxvY2FsaG9z +dDCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBAMv5HCsJZIpI5zCy+jXV +z6lmzNc9UcVSEEHn86h6zT6pxuY90TYeAhlZ9hZ+SCKn4OQ4GoDRZhLPTkYDt+wW +CV3NTIy9uCGUSJ6xjCKoxClJmgSQdg5m4HzwfY4ofoEZ5iZQ0Zmt62jGRWc0zuxj +hegnM+eO2reBJYu6Ypa9RPJdYJsmn1RNnC74IDY8Y95qn+WZj//UALCpYfX41hko +i7TWD9GKQO8SBmAxhjCDifOxVBokoxYrNdzESl0LXvnzEadeZTd9BfUtTaBHhx6t +njqqCPrbTY+3jAbZFd4RiERPnhLVKMytw5ot506BhPrUtpr2lusbN5svNXjuLeea +MMUCAwEAAaOBoDCBnTATBgNVHSMEDDAKgAhOatpLwvJFqjAdBgNVHSUEFjAUBggr +BgEFBQcDAQYIKwYBBQUHAwIwVAYDVR0RBE0wS4E+UHJvZmlsZVVVSUQ6QXBwU3J2 +MDEtQkFTRS05MDkzMzJjMC1iNmFiLTQ2OTMtYWI5NC01Mjc1ZDI1MmFmNDiCCWxv +Y2FsaG9zdDARBgNVHQ4ECgQITzqhA5sO8O4wDQYJKoZIhvcNAQELBQADggEBAKR0 +gY/BM69S6BDyWp5dxcpmZ9FS783FBbdUXjVtTkQno+oYURDrhCdsfTLYtqUlP4J4 +CHoskP+MwJjRIoKhPVQMv14Q4VC2J9coYXnePhFjE+6MaZbTjq9WaekGrpKkMaQA +iQt5b67jo7y63CZKIo9yBvs7sxODQzDn3wZwyux2vPegXSaTHR/rop/s/mPk3YTS +hQprs/IVtPoWU4/TsDN3gIlrAYGbcs29CAt5q9MfzkMmKsuDkTZD0ry42VjxjAmk +xw23l/k8RoD1wRWaDVbgpjwSzt+kl+vJE/ip2w3h69eEZ9wbo6scRO5lCO2JM4Pr +7RhLQyWn2u00L7/9Omw= +-----END CERTIFICATE----- +``` +==== + +[discrete#es-connectors-mysql-client-known-issues] +===== Known issues + +This connector has the following known issues: + +* *Upgrading from a tech preview connector (8.7 or earlier) to 8.8 will cause the MySQL connector configuration to be invalid.* ++ +MySQL connectors prior to 8.8 can be missing some configuration fields that are required for the connector to run. +If you would like to continue using your MySQL connector after upgrading from 8.7 or earlier, run the script below to fix your connector's configuration. +This will populate the configuration with the missing fields. +The auxilliary information needed for the configuration will then be automatically added by by the self-managed connector. ++ +[source,console] +---- +POST /.elastic-connectors/_update/connector_id +{ + "doc" : { + "configuration": { + "tables": { + "type": "list", + "value": "*" + }, + "ssl_enabled": { + "type": "bool", + "value": false + }, + "ssl_ca": { + "type": "str", + "value": "" + }, + "fetch_size": { + "type": "int", + "value": 50 + }, + "retry_count": { + "type": "int", + "value": 3 + } + } + } +} +---- +// TEST[skip:TODO] ++ +* *Upgrading to 8.8 does not migrate MySQL sync rules.* ++ +After upgrading, you must re-create your sync rules. + +See <> for any issues affecting all connectors. + +[discrete#es-connectors-mysql-client-syncs] +===== Documents and syncs + +The following describes the default syncing behavior for this connector. +Use <> and {ref}/ingest-pipeline-search.html[ingest pipelines] to customize syncing for specific indices. + +All records in the MySQL database included in your connector configuration are extracted and transformed into documents in your Elasticsearch index. + +* For each row in your MySQL database table, the connector creates one *Elasticsearch document*. +* For each column, the connector transforms the column into an *Elasticsearch field*. +* Elasticsearch {ref}/dynamic-mapping.html[dynamically maps^] MySQL data types to *Elasticsearch data types*. +* Tables with no primary key defined are skipped. +* Field values that represent other records are replaced with the primary key for that record (composite primary keys are joined with `_`). + +The Elasticsearch mapping is created when the first document is created. + +Each sync is a "full" sync. + +For each MySQL row discovered: + +* If it does not exist, the document is created in Elasticsearch. +* If it already exists in Elasticsearch, the Elasticsearch document is replaced and the version is incremented. +* If an existing Elasticsearch document no longer exists in the MySQL table, it is deleted from Elasticsearch. + +[discrete#es-connectors-mysql-client-docker] +===== Deployment using Docker + +include::_connectors-docker-instructions.asciidoc[] + +[discrete#es-connectors-mysql-client-sync-rules] +===== Sync rules + +The following sections describe <> for this connector. + +<> are identical for all connectors and are available by default. + +<> for MySQL can be used to pass arbitrary SQL statements to a MySQL instance. + +[IMPORTANT] +==== +You need to specify the tables used in your custom query in the "tables" field. +==== + +For example: + +[source,js] +---- +[ + { + "tables": ["table1", "table2"], + "query": "SELECT ... FROM ..." + } +] +---- +// NOTCONSOLE + +[WARNING] +==== +When using advanced rules, a query can bypass the configuration field `tables`. +This will happen if the query specifies a table that doesn't appear in the configuration. +This can also happen if the configuration specifies `*` to fetch all tables while the advanced sync rule requests for only a subset of tables. +==== + +[discrete#es-connectors-mysql-client-troubleshooting] +===== Troubleshooting + +See <>. + +[discrete#es-connectors-mysql-client-security] +===== Security + +See <>. + +// Closing the collapsible section +=============== diff --git a/docs/reference/connector/docs/connectors-network-drive.asciidoc b/docs/reference/connector/docs/connectors-network-drive.asciidoc new file mode 100644 index 0000000000000..91c9d3b28c385 --- /dev/null +++ b/docs/reference/connector/docs/connectors-network-drive.asciidoc @@ -0,0 +1,533 @@ +[#es-connectors-network-drive] +=== Elastic network drive connector reference +++++ +Network drive +++++ + +// Attributes used in this file: +:service-name: Network drive +:service-name-stub: network_drive + +The _Elastic network drive connector_ is a <> for network drive data sources. +This connector is written in Python using the {connectors-python}[Elastic connector framework^]. + +View the {connectors-python}/connectors/sources/{service-name-stub}.py[*source code* for this connector^] (branch _{connectors-branch}_, compatible with Elastic _{minor-version}_). + +.Choose your connector reference +******************************* +Are you using a managed connector on Elastic Cloud or a self-managed connector? Expand the documentation based on your deployment method. +******************************* + +// //////// //// //// //// //// //// //// //////// +// //////// NATIVE CONNECTOR REFERENCE /////// +// //////// //// //// //// //// //// //// //////// + +[discrete#es-connectors-network-drive-native-connector-reference] +==== *Elastic managed connector reference* + +.View *Elastic managed connector* reference + +[%collapsible] +=============== + +[discrete#es-connectors-network-drive-prerequisites] +===== Availability and prerequisites + +This connector is available as a *managed connector* on Elastic Cloud, as of *8.9.1*. + +To use this connector natively in Elastic Cloud, satisfy all <>. + +[discrete#es-connectors-network-drive-usage] +===== Usage + +To use this connector natively in Elastic Cloud, see <>. + +[discrete#es-connectors-network-drive-configuration] +===== Configuration + +The following configuration fields are required to set up the connector: + +Username:: +The username of the account for the network drive. +The user must have at least **read** permissions for the folder path provided. + +Password:: +The password of the account to be used for crawling the network drive. + +IP address:: +The server IP address where the network drive is hosted. +Default value is `127.0.0.1`. + +Port:: +The server port where the network drive service is available. +Default value is `445`. + +Path:: +** The network drive path the connector will crawl to fetch files. +This is the name of the folder shared via SMB. +The connector uses the Python https://github.com/jborean93/smbprotocol[`smbprotocol`^] library which supports both *SMB v2* and *v3*. +** Accepts only one path— parent folders can be specified to widen the scope. +** The drive path should use *forward slashes* as path separators. +Example: ++ +* `admin/bin` + +Enable document level security:: +Toggle to enable document level security (DLS). When enabled: +* Full syncs will fetch access control lists for each document and store them in the `_allow_access_control` field. +* Access control syncs will fetch users' access control lists and store them in a separate index. +Default value is `false`. ++ +[TIP] +==== +Refer to <> for more information, including prerequisites and limitations. +==== + +Identity mappings:: +Path to a CSV file containing user and group SIDs (For Linux Network Drive). ++ +File should be formatted as follows: ++ +* Fields separated by semicolons (`;`) +* Three fields per line: `Username;User-SID;Group-SIDs` +* Group-SIDs are comma-separated and optional. ++ +*Example* with one username, user-sid and no group: ++ +[source,text] +---- +user1;S-1; +---- ++ +*Example* with one username, user-sid and two groups: ++ +[source,text] +---- +user1;S-1;S-11,S-22 +---- + +[discrete#es-connectors-network-drive-documents-syncs] +===== Documents and syncs + +The connector syncs folders as separate documents in Elasticsearch. +The following fields will be added for the document type `folder`: + +* `create_time` +* `title` +* `path` +* `modified` +* `time` +* `id` + +[NOTE] +==== +* Content from files bigger than 10 MB won't be extracted. (Self-managed connectors can use the <> to handle larger binary files.) +* Permissions are not synced by default. +You must first enable <>. +Otherwise, *all documents* indexed to an Elastic deployment will be visible to *all users with access* to that Elastic Deployment. +==== + +[discrete#es-connectors-network-drive-sync-types] +====== Sync types + +<> are supported by default for all connectors. + +This connector also supports <>. + +[discrete#es-connectors-network-drive-dls] +===== Document level security + +Document Level Security (DLS) enables you to restrict access to documents based on a user's permissions. +DLS facilitates the syncing of folder and file permissions, including both user and group level permissions. + +[NOTE] +==== +**Note:** Refer to <> to learn how to search data with DLS enabled, when building a search application. +==== + +[discrete#es-connectors-network-drive-dls-availability] +====== Availability + +* The present version of Network Drive connector offers DLS support for Windows network drives only. +* To fetch users and groups in a Windows network drive, account credentials added in the connector configuration should have access to the Powershell of the Windows Server where the network drive is hosted. + +[discrete#es-connectors-network-drive-sync-rules] +===== Sync rules + +<> are identical for all connectors and are available by default. + +[discrete#es-connectors-network-drive-sync-rules-advanced] +====== Advanced sync rules + +[NOTE] +==== +A <> is required for advanced sync rules to take effect. +==== + +Advanced sync rules are defined through a source-specific DSL JSON snippet. +Advanced sync rules for this connector use *glob patterns*. + +1. Each rule must contains a glob pattern. This pattern is then matched against all the available folder paths inside the configured drive path. +2. The pattern must begin with the `drive_path` field configured in the connector. +3. If the pattern matches any available folder paths, the contents directly within those folders will be fetched. + +The following sections provide examples of advanced sync rules for this connector. + +[discrete#es-connectors-network-drive-indexing-files-and-folders-recursively-within-folders] +======= Indexing files and folders recursively within folders + +[source,js] +---- +[ + { + "pattern": "Folder-shared/a/mock/**" + }, + { + "pattern": "Folder-shared/b/alpha/**" + } +] +---- +// NOTCONSOLE + +[discrete#es-connectors-network-drive-indexing-files-and-folders-directly-inside-folder] +======= Indexing files and folders directly inside folder + +[source,js] +---- +[ + { + "pattern": "Folder-shared/a/b/test" + } +] +---- +// NOTCONSOLE + +[discrete#es-connectors-network-drive-indexing-files-and-folders-directly-inside-a-set-of-folders] +======= Indexing files and folders directly inside a set of folders + +[source,js] +---- +[ + { + "pattern": "Folder-shared/org/*/all-tests/test[135]" + } +] +---- +// NOTCONSOLE + +[discrete#es-connectors-network-drive-excluding-files-and-folders-that-match-a-pattern] +======= Excluding files and folders that match a pattern + +[source,js] +---- +[ + { + "pattern": "Folder-shared/**/all-tests/test[!7]" + } +] +---- +// NOTCONSOLE + +[discrete#es-connectors-network-drive-content-extraction] +===== Content extraction + +See <>. + +[discrete#es-connectors-network-drive-known-issues] +===== Known issues + +There are no known issues for this connector. + +See <> for any issues affecting all connectors. + +[discrete#es-connectors-network-drive-troubleshooting] +===== Troubleshooting + +See <>. + +[discrete#es-connectors-network-drive-security] +===== Security + +See <>. + +// Closing the collapsible section +=============== + + +// //////// //// //// //// //// //// //// //////// +// //////// CONNECTOR CLIENT REFERENCE /////// +// //////// //// //// //// //// //// //// //////// + +[discrete#es-connectors-network-drive-connector-client-reference] +==== *Self-managed connector* + +.View *self-managed connector* reference + +[%collapsible] +=============== + +[discrete#es-connectors-network-drive-client-prerequisites] +===== Availability and prerequisites + +This connector is available as a self-managed *self-managed connector*. +This self-managed connector is compatible with Elastic versions *8.6.0+*. + +To use this connector, satisfy all <>. + +[discrete#es-connectors-network-drive-client-usage] +===== Usage + +To use this connector as a *self-managed connector*, see <>For additional usage operations, see <>. + +[discrete#es-connectors-network-drive-client-configuration] +===== Configuration + +[TIP] +==== +When using the <> workflow, initially these fields will use the default configuration set in the {connectors-python}/connectors/sources/network_drive.py[connector source code^]. +These are set in the `get_default_configuration` function definition. + +These configurable fields will be rendered with their respective *labels* in the Kibana UI. +Once connected, you'll be able to update these values in Kibana. +==== + +The following configuration fields are required to set up the connector: + +`username`:: +The username of the account for the network drive. +The user must have at least **read** permissions for the folder path provided. + +`password`:: +The password of the account to be used for crawling the network drive. + +`server_ip`:: +The server IP address where the network drive is hosted. +Default value is `127.0.0.1`. + +`server_port`:: +The server port where the network drive service is available. +Default value is `445`. + +`drive_path`:: +** The network drive path the connector will crawl to fetch files. +This is the name of the folder shared via SMB. +The connector uses the Python https://github.com/jborean93/smbprotocol[`smbprotocol`^] library which supports both *SMB v2* and *v3*. +** Accepts only one path— parent folders can be specified to widen the scope. +** The drive path should use *forward slashes* as path separators. +Example: ++ +* `admin/bin` + +`use_document_level_security`:: +Toggle to enable document level security (DLS). When enabled: +* Full syncs will fetch access control lists for each document and store them in the `_allow_access_control` field. +* Access control syncs will fetch users' access control lists and store them in a separate index. ++ +[TIP] +==== +Refer to <> for more information, including prerequisites and limitations. +==== + +`drive_type`:: +The type of network drive to be crawled. +The following options are available: ++ +* `Windows` +* `Linux` + +`identity_mappings`:: +Path to a CSV file containing user and group SIDs (For Linux Network Drive). ++ +File should be formatted as follows: ++ +* Fields separated by semicolons (`;`) +* Three fields per line: `Username;User-SID;Group-SIDs` +* Group-SIDs are comma-separated and optional. ++ +*Example* with one username, user-sid and no group: ++ +[source,text] +---- +user1;S-1; +---- ++ +*Example* with one username, user-sid and two groups: ++ +[source,text] +---- +user1;S-1;S-11,S-22 +---- + +[discrete#es-connectors-network-drive-client-docker] +===== Deployment using Docker + +include::_connectors-docker-instructions.asciidoc[] + +[discrete#es-connectors-network-drive-client-documents-syncs] +===== Documents and syncs + +The connector syncs folders as separate documents in Elasticsearch. +The following fields will be added for the document type `folder`: + +* `create_time` +* `title` +* `path` +* `modified` +* `time` +* `id` + + +[NOTE] +==== +* Content from files bigger than 10 MB won't be extracted +* Permissions are not synced by default. +You must first enable <>. +Otherwise, *all documents* indexed to an Elastic deployment will be visible to *all users with access* to that Elastic Deployment. +==== + +[discrete#es-connectors-network-drive-client-sync-types] +====== Sync types + +<> are supported by default for all connectors. + +This connector also supports <>. + +[discrete#es-connectors-network-drive-client-dls] +===== Document level security + +Document Level Security (DLS) enables you to restrict access to documents based on a user's permissions. +DLS facilitates the syncing of folder and file permissions, including both user and group level permissions. + +[NOTE] +==== +**Note:** Refer to <> to learn how to search data with DLS enabled, when building a search application. +==== + +[discrete#es-connectors-network-drive-client-dls-availability] +====== Availability + +* The Network Drive self-managed connector offers DLS support for both Windows and Linux network drives. +* To fetch users and groups in a Windows network drive, account credentials added in the connector configuration should have access to the Powershell of the Windows Server where the network drive is hosted. + +[discrete#es-connectors-network-drive-client-sync-rules] +===== Sync rules + +<> are identical for all connectors and are available by default. + +[discrete#es-connectors-network-drive-client-sync-rules-advanced] +====== Advanced sync rules + +[NOTE] +==== +A <> is required for advanced sync rules to take effect. +==== + +Advanced sync rules are defined through a source-specific DSL JSON snippet. +Advanced sync rules for this connector use *glob patterns*. + +1. Each rule must contains a glob pattern. This pattern is then matched against all the available folder paths inside the configured drive path. +2. The pattern must begin with the `drive_path` field configured in the connector. +3. If the pattern matches any available folder paths, the contents directly within those folders will be fetched. + +The following sections provide examples of advanced sync rules for this connector. + +[discrete#es-connectors-network-drive-client-indexing-files-and-folders-recursively-within-folders] +======= Indexing files and folders recursively within folders + +[source,js] +---- +[ + { + "pattern": "Folder-shared/a/mock/**" + }, + { + "pattern": "Folder-shared/b/alpha/**" + } +] +---- +// NOTCONSOLE + +[discrete#es-connectors-network-drive-client-indexing-files-and-folders-directly-inside-folder] +======= Indexing files and folders directly inside folder + +[source,js] +---- +[ + { + "pattern": "Folder-shared/a/b/test" + } +] +---- +// NOTCONSOLE + +[discrete#es-connectors-network-drive-client-indexing-files-and-folders-directly-inside-a-set-of-folders] +======= Indexing files and folders directly inside a set of folders + +[source,js] +---- +[ + { + "pattern": "Folder-shared/org/*/all-tests/test[135]" + } +] +---- +// NOTCONSOLE + +[discrete#es-connectors-network-drive-client-excluding-files-and-folders-that-match-a-pattern] +======= Excluding files and folders that match a pattern + +[source,js] +---- +[ + { + "pattern": "Folder-shared/**/all-tests/test[!7]" + } +] +---- +// NOTCONSOLE + +[discrete#es-connectors-network-drive-client-content-extraction] +===== Content extraction + +See <>. + +[discrete#es-connectors-network-drive-client-tests] +===== End-to-end tests + +The connector framework enables operators to run functional tests against a real data source. +Refer to <> for more details. + +To execute a functional test for the Network Drive self-managed connector, run the following command: + +[source,shell] +---- +$ make ftest NAME=network_drive +---- + +By default, this will use a medium-sized dataset. +For faster tests add the `DATA_SIZE=small` flag: + +[source,shell] +---- +make ftest NAME=network_drive DATA_SIZE=small +---- + +[discrete#es-connectors-network-drive-client-known-issues] +===== Known issues + +There are no known issues for this connector. + +See <> for any issues affecting all connectors. + +[discrete#es-connectors-network-drive-client-troubleshooting] +===== Troubleshooting + +See <>. + +[discrete#es-connectors-network-drive-client-security] +===== Security + +See <>. + +// Closing the collapsible section +=============== diff --git a/docs/reference/connector/docs/connectors-notion.asciidoc b/docs/reference/connector/docs/connectors-notion.asciidoc new file mode 100644 index 0000000000000..2d7a71bff20de --- /dev/null +++ b/docs/reference/connector/docs/connectors-notion.asciidoc @@ -0,0 +1,747 @@ +[#es-connectors-notion] +=== Elastic Notion Connector reference +++++ +Notion +++++ + +// Attributes (AKA variables) used in this file +:service-name: Notion +:service-name-stub: notion + +The Notion connector is written in Python using the {connectors-python}[Elastic connector framework^]. +View the {connectors-python}/connectors/sources/{service-name-stub}.py[*source code* for this connector^] (branch _{connectors-branch}_, compatible with Elastic _{minor-version}_). + +// //////// //// //// //// //// //// //// //////// +// //////// NATIVE CONNECTOR REFERENCE (MANAGED SERVICE) /////// +// //////// //// //// //// //// //// //// //////// + +[discrete#es-connectors-notion-native-connector-reference] +==== *Elastic managed connector reference* + +.View *Elastic managed connector* reference +[%collapsible] +=============== + +[discrete#es-connectors-notion-connector-availability-and-prerequisites] +===== Availability and prerequisites + +This managed connector was introduced in Elastic *8.14.0* as a managed service on Elastic Cloud. + +To use this connector natively in Elastic Cloud, satisfy all <>. + +[NOTE] +==== +This connector is in *beta* and is subject to change. The design and code is less mature than official GA features and is being provided as-is with no warranties. Beta features are not subject to the support SLA of official GA features. +==== + +[discrete#es-connectors-notion-connector-usage] +===== Usage + +To use this connector in the UI, select the *Notion* tile when creating a new connector under *Search -> Connectors*. + +If you're already familiar with how connectors work, you can also use the {ref}/connector-apis.html[Connector APIs]. + +For additional operations, see <>. + +[discrete#es-connectors-notion-create-native-connector] +===== Create a {service-name} connector +include::_connectors-create-native.asciidoc[] + +[discrete#es-connectors-notion-connector-connecting-to-notion] +===== Connecting to Notion + +To connect to Notion, the user needs to https://www.notion.so/help/create-integrations-with-the-notion-api#create-an-internal-integration[create an internal integration] for their Notion workspace, which can access resources using the Internal Integration Secret Token. Configure the Integration with following settings: + +1. Users must grant `READ` permission for content, comment and user capabilities for that integration from the Capabilities tab. + +2. Users must manually https://www.notion.so/help/add-and-manage-connections-with-the-api#add-connections-to-pages[add the integration as a connection] to the top-level pages in a workspace. Sub-pages will inherit the connections of the parent page automatically. + +[discrete#es-connectors-notion-connector-configuration] +===== Configuration + +Note the following configuration fields: + +`Notion Secret Key`(required):: +Secret token assigned to your integration, for a particular workspace. Example: + +* `zyx-123453-12a2-100a-1123-93fd09d67394` + +`Databases`(required):: +Comma-separated list of database names to be fetched by the connector. If the value is `*`, connector will fetch all the databases available in the workspace. Example: + +* `database1, database2` +* `*` + +`Pages`(required):: +Comma-separated list of page names to be fetched by the connector. If the value is `*`, connector will fetch all the pages available in the workspace. Examples: + +* `*` +* `Page1, Page2` + +`Index Comments`:: + +Toggle to enable fetching and indexing of comments from the Notion workspace for the configured pages, databases and the corresponding child blocks. Default value is `False`. + +[NOTE] +==== +Enabling comment indexing could impact connector performance due to increased network calls. Therefore, by default this value is `False`. +==== + +[discrete#es-connectors-notion-connector-content-extraction] +====== Content Extraction + +Refer to <>. + +[discrete#es-connectors-notion-connector-documents-and-syncs] +===== Documents and syncs + +The connector syncs the following objects and entities: + +* *Pages* +** Includes metadata such as `page name`, `id`, `last updated time`, etc. +* *Blocks* +** Includes metadata such as `title`, `type`, `id`, `content` (in case of file block), etc. +* *Databases* +** Includes metadata such as `name`, `id`, `records`, `size`, etc. +* *Users* +** Includes metadata such as `name`, `id`, `email address`, etc. +* *Comments* +** Includes the content and metadata such as `id`, `last updated time`, `created by`, etc. +** *Note*: Comments are excluded by default. + +[NOTE] +==== +* Files bigger than 10 MB won't be extracted. +* Permissions are not synced. *All documents* indexed to an Elastic deployment will be visible to *all users with access* to the relevant Elasticsearch index. +==== + +[discrete#es-connectors-notion-connector-sync-rules] +===== Sync rules + +<> are identical for all connectors and are available by default. + +[discrete#es-connectors-notion-connector-advanced-sync-rules] +===== Advanced sync rules + +[NOTE] +==== +A <> is required for advanced sync rules to take effect. +==== + +The following section describes *advanced sync rules* for this connector, to filter data in Notion _before_ indexing into {es}. +Advanced sync rules are defined through a source-specific DSL JSON snippet. + +Advanced sync rules for Notion take the following parameters: + +1. `searches`: Notion's search filter to search by title. +2. `query`: Notion's database query filter to fetch a specific database. + +[discrete#es-connectors-notion-connector-advanced-sync-rules-examples] +====== Examples + +[discrete] +======= Example 1 + +Indexing every page where the title contains `Demo Page`: + +[source,js] +---- + { + "searches": [ + { + "filter": { + "value": "page" + }, + "query": "Demo Page" + } + ] + } +---- +// NOTCONSOLE + +[discrete] +======= Example 2 + +Indexing every database where the title contains `Demo Database`: + +[source,js] +---- +{ + "searches": [ + { + "filter": { + "value": "database" + }, + "query": "Demo Database" + } + ] +} +---- +// NOTCONSOLE + +[discrete] +======= Example 3 + +Indexing every database where the title contains `Demo Database` and every page where the title contains `Demo Page`: + +[source,js] +---- +{ + "searches": [ + { + "filter": { + "value": "database" + }, + "query": "Demo Database" + }, + { + "filter": { + "value": "page" + }, + "query": "Demo Page" + } + ] +} +---- +// NOTCONSOLE + +[discrete] +======= Example 4 + +Indexing all pages in the workspace: + +[source,js] +---- +{ + "searches": [ + { + "filter": { + "value": "page" + }, + "query": "" + } + ] +} +---- +// NOTCONSOLE + +[discrete] +======= Example 5 + +Indexing all the pages and databases connected to the workspace: + +[source,js] +---- +{ + "searches":[ + { + "query":"" + } + ] +} +---- +// NOTCONSOLE + +[discrete] +======= Example 6 + +Indexing all the rows of a database where the record is `true` for the column `Task completed` and its property(datatype) is a checkbox: + +[source,js] +---- +{ + "database_query_filters": [ + { + "filter": { + "property": "Task completed", + "checkbox": { + "equals": true + } + }, + "database_id": "database_id" + } + ] +} +---- +// NOTCONSOLE + +[discrete] +======= Example 7 + +Indexing all rows of a specific database: + +[source,js] +---- +{ + "database_query_filters": [ + { + "database_id": "database_id" + } + ] +} +---- +// NOTCONSOLE + +[discrete] +======= Example 8 + +Indexing all blocks defined in `searches` and `database_query_filters`: + +[source,js] +---- +{ + "searches":[ + { + "query":"External tasks", + "filter":{ + "value":"database" + } + }, + { + "query":"External tasks", + "filter":{ + "value":"page" + } + } + ], + "database_query_filters":[ + { + "database_id":"notion_database_id1", + "filter":{ + "property":"Task completed", + "checkbox":{ + "equals":true + } + } + } + ] +} +---- +// NOTCONSOLE + +[NOTE] +==== +In this example the `filter` object syntax for `database_query_filters` is defined per the https://developers.notion.com/reference/post-database-query-filter[Notion documentation]. +==== + +[discrete#es-connectors-notion-connector-known-issues] +===== Known issues + +* *Updates to new pages may not be reflected immediately in the Notion API.* ++ +This could lead to these pages not being indexed by the connector, if a sync is initiated immediately after their addition. +To ensure all pages are indexed, initiate syncs a few minutes after adding pages to Notion. + +* *Notion's Public API does not support linked databases.* ++ +Linked databases in Notion are copies of a database that can be filtered, sorted, and viewed differently. +To fetch the information in a linked database, you need to target the original *source* database. +For more details refer to the https://developers.notion.com/docs/working-with-databases#linked-databases[Notion documentation]. + +* *Documents' `properties` objects are serialized as strings under `details`*. ++ +Notion's schema for `properties` is not consistent, and can lead to `document_parsing_exceptions` if indexed to Elasticsearch as an object. +For this reason, the `properties` object is instead serialized as a JSON string, and stored under the `details` field. +If you need to search a sub-object from `properties`, you may need to post-process the `details` field in an ingest pipeline to extract your desired subfield(s). + +Refer to <> for a list of known issues for all connectors. + +[discrete#es-connectors-notion-connector-troubleshooting] +===== Troubleshooting + +See <>. + +[discrete#es-connectors-notion-connector-security] +===== Security + +See <>. + + +// Closing the collapsible section +=============== + + +// //////// //// //// //// //// //// //// //////// +// //////// CONNECTOR CLIENT REFERENCE (SELF-MANAGED) /////// +// //////// //// //// //// //// //// //// //////// + +[discrete#es-connectors-notion-connector-client-reference] +==== *Self-managed connector reference* + +.View *self-managed connector* reference +[%collapsible] +=============== + +[discrete#es-connectors-notion-client-connector-availability-and-prerequisites] +===== Availability and prerequisites + +This connector was introduced in Elastic *8.13.0*, available as a *self-managed* self-managed connector. + +To use this connector, satisfy all <>. +Importantly, you must deploy the connectors service on your own infrastructure. +You have two deployment options: + +* <>. Use this option if you're comfortable working with Python and want to iterate quickly locally. +* <>. Use this option if you want to deploy the connectors to a server, or use a container orchestration platform. + +[NOTE] +==== +This connector is in *beta* and is subject to change. The design and code is less mature than official GA features and is being provided as-is with no warranties. Beta features are not subject to the support SLA of official GA features. +==== + +[discrete#es-connectors-notion-client-connector-usage] +===== Usage + +To use this connector in the UI, select the *Notion* tile when creating a new connector under *Search -> Connectors*. + +For additional operations, see <>. + +[discrete#es-connectors-notion-create-connector-client] +===== Create a {service-name} connector +include::_connectors-create-client.asciidoc[] + +[discrete#es-connectors-notion-client-connector-connecting-to-notion] +===== Connecting to Notion + +To connect to Notion, the user needs to https://www.notion.so/help/create-integrations-with-the-notion-api#create-an-internal-integration[create an internal integration] for their Notion workspace, which can access resources using the Internal Integration Secret Token. Configure the Integration with following settings: + +1. Users must grant `READ` permission for content, comment and user capabilities for that integration from the Capabilities tab. + +2. Users must manually https://www.notion.so/help/add-and-manage-connections-with-the-api#add-connections-to-pages[add the integration as a connection] to the top-level pages in a workspace. Sub-pages will inherit the connections of the parent page automatically. + +[discrete#es-connectors-notion-client-connector-docker] +===== Deploy with Docker + +include::_connectors-docker-instructions.asciidoc[] + +[discrete#es-connectors-notion-client-connector-configuration] +===== Configuration + +Note the following configuration fields: + +`Notion Secret Key`(required):: +Secret token assigned to your integration, for a particular workspace. Example: + +* `zyx-123453-12a2-100a-1123-93fd09d67394` + +`Databases`(required):: +Comma-separated list of database names to be fetched by the connector. If the value is `*`, connector will fetch all the databases available in the workspace. Example: + +* `database1, database2` +* `*` + +`Pages`(required):: +Comma-separated list of page names to be fetched by the connector. If the value is `*`, connector will fetch all the pages available in the workspace. Examples: + +* `*` +* `Page1, Page2` + +`Index Comments`:: + +Toggle to enable fetching and indexing of comments from the Notion workspace for the configured pages, databases and the corresponding child blocks. Default value is `False`. + +[NOTE] +==== +Enabling comment indexing could impact connector performance due to increased network calls. Therefore, by default this value is `False`. +==== + +[discrete#es-connectors-notion-client-connector-content-extraction] +====== Content Extraction + +Refer to <>. + +[discrete#es-connectors-notion-client-connector-documents-and-syncs] +===== Documents and syncs + +The connector syncs the following objects and entities: + +* *Pages* +** Includes metadata such as `page name`, `id`, `last updated time`, etc. +* *Blocks* +** Includes metadata such as `title`, `type`, `id`, `content` (in case of file block), etc. +* *Databases* +** Includes metadata such as `name`, `id`, `records`, `size`, etc. +* *Users* +** Includes metadata such as `name`, `id`, `email address`, etc. +* *Comments* +** Includes the content and metadata such as `id`, `last updated time`, `created by`, etc. +** *Note*: Comments are excluded by default. + +[NOTE] +==== +* Files bigger than 10 MB won't be extracted. +* Permissions are not synced. *All documents* indexed to an Elastic deployment will be visible to *all users with access* to the relevant Elasticsearch index. +==== + +[discrete#es-connectors-notion-client-connector-sync-rules] +===== Sync rules + +<> are identical for all connectors and are available by default. + +[discrete#es-connectors-notion-client-connector-advanced-sync-rules] +===== Advanced sync rules + +[NOTE] +==== +A <> is required for advanced sync rules to take effect. +==== + +The following section describes *advanced sync rules* for this connector, to filter data in Notion _before_ indexing into {es}. +Advanced sync rules are defined through a source-specific DSL JSON snippet. + +Advanced sync rules for Notion take the following parameters: + +1. `searches`: Notion's search filter to search by title. +2. `query`: Notion's database query filter to fetch a specific database. + +[discrete#es-connectors-notion-client-connector-advanced-sync-rules-examples] +====== Examples + +[discrete] +======= Example 1 + +Indexing every page where the title contains `Demo Page`: + +[source,js] +---- + { + "searches": [ + { + "filter": { + "value": "page" + }, + "query": "Demo Page" + } + ] + } +---- +// NOTCONSOLE + +[discrete] +======= Example 2 + +Indexing every database where the title contains `Demo Database`: + +[source,js] +---- +{ + "searches": [ + { + "filter": { + "value": "database" + }, + "query": "Demo Database" + } + ] +} +---- +// NOTCONSOLE + +[discrete] +======= Example 3 + +Indexing every database where the title contains `Demo Database` and every page where the title contains `Demo Page`: + +[source,js] +---- +{ + "searches": [ + { + "filter": { + "value": "database" + }, + "query": "Demo Database" + }, + { + "filter": { + "value": "page" + }, + "query": "Demo Page" + } + ] +} +---- +// NOTCONSOLE + +[discrete] +======= Example 4 + +Indexing all pages in the workspace: + +[source,js] +---- +{ + "searches": [ + { + "filter": { + "value": "page" + }, + "query": "" + } + ] +} +---- +// NOTCONSOLE + +[discrete] +======= Example 5 + +Indexing all the pages and databases connected to the workspace: + +[source,js] +---- +{ + "searches":[ + { + "query":"" + } + ] +} +---- +// NOTCONSOLE + +[discrete] +======= Example 6 + +Indexing all the rows of a database where the record is `true` for the column `Task completed` and its property(datatype) is a checkbox: + +[source,js] +---- +{ + "database_query_filters": [ + { + "filter": { + "property": "Task completed", + "checkbox": { + "equals": true + } + }, + "database_id": "database_id" + } + ] +} +---- +// NOTCONSOLE + +[discrete] +======= Example 7 + +Indexing all rows of a specific database: + +[source,js] +---- +{ + "database_query_filters": [ + { + "database_id": "database_id" + } + ] +} +---- +// NOTCONSOLE + +[discrete] +======= Example 8 + +Indexing all blocks defined in `searches` and `database_query_filters`: + +[source,js] +---- +{ + "searches":[ + { + "query":"External tasks", + "filter":{ + "value":"database" + } + }, + { + "query":"External tasks", + "filter":{ + "value":"page" + } + } + ], + "database_query_filters":[ + { + "database_id":"notion_database_id1", + "filter":{ + "property":"Task completed", + "checkbox":{ + "equals":true + } + } + } + ] +} +---- +// NOTCONSOLE + +[NOTE] +==== +In this example the `filter` object syntax for `database_query_filters` is defined per the https://developers.notion.com/reference/post-database-query-filter[Notion documentation]. +==== + +[discrete#es-connectors-notion-client-connector-connector-client-operations] +===== Connector Client operations + +[discrete#es-connectors-notion-client-connector-end-to-end-testing] +====== End-to-end Testing + +The connector framework enables operators to run functional tests against a real data source, using Docker Compose. +You don't need a running Elasticsearch instance or Notion source to run this test. + +Refer to <> for more details. + +To perform E2E testing for the Notion connector, run the following command: + + +[source,shell] +---- +$ make ftest NAME=notion +---- + +For faster tests, add the `DATA_SIZE=small` flag: + +[source,shell] +---- +make ftest NAME=notion DATA_SIZE=small +---- + +By default, `DATA_SIZE=MEDIUM`. + +[discrete#es-connectors-notion-client-connector-known-issues] +===== Known issues + +* *Updates to new pages may not be reflected immediately in the Notion API.* ++ +This could lead to these pages not being indexed by the connector, if a sync is initiated immediately after their addition. +To ensure all pages are indexed, initiate syncs a few minutes after adding pages to Notion. + +* *Notion's Public API does not support linked databases.* ++ +Linked databases in Notion are copies of a database that can be filtered, sorted, and viewed differently. +To fetch the information in a linked database, you need to target the original *source* database. +For more details refer to the https://developers.notion.com/docs/working-with-databases#linked-databases[Notion documentation]. + +* *Documents' `properties` objects are serialized as strings under `details`*. ++ +Notion's schema for `properties` is not consistent, and can lead to `document_parsing_exceptions` if indexed to Elasticsearch as an object. +For this reason, the `properties` object is instead serialized as a JSON string, and stored under the `details` field. +If you need to search a sub-object from `properties`, you may need to post-process the `details` field in an ingest pipeline to extract your desired subfield(s). + +Refer to <> for a list of known issues for all connectors. + +[discrete#es-connectors-notion-client-connector-troubleshooting] +===== Troubleshooting + +See <>. + +[discrete#es-connectors-notion-client-connector-security] +===== Security + +See <>. + + +// Closing the collapsible section +=============== diff --git a/docs/reference/connector/docs/connectors-onedrive.asciidoc b/docs/reference/connector/docs/connectors-onedrive.asciidoc new file mode 100644 index 0000000000000..7d1a21aeb78db --- /dev/null +++ b/docs/reference/connector/docs/connectors-onedrive.asciidoc @@ -0,0 +1,604 @@ +[#es-connectors-onedrive] +=== Elastic OneDrive connector reference +++++ +OneDrive +++++ +// Attributes used in this file +:service-name: OneDrive +:service-name-stub: onedrive + +The _Elastic OneDrive connector_ is a <> for OneDrive. +This connector is written in Python using the {connectors-python}[Elastic connector framework^]. + +View the {connectors-python}/connectors/sources/{service-name-stub}.py[*source code* for this connector^] (branch _{connectors-branch}_, compatible with Elastic _{minor-version}_). + +.Choose your connector reference +******************************* +Are you using a managed connector on Elastic Cloud or a self-managed connector? Expand the documentation based on your deployment method. +******************************* + +// //////// //// //// //// //// //// //// //////// +// //////// NATIVE CONNECTOR REFERENCE /////// +// //////// //// //// //// //// //// //// //////// + +[discrete#es-connectors-onedrive-native-connector-reference] +==== *Elastic managed connector reference* + +.View *Elastic managed connector* reference + +[%collapsible] +=============== + +[discrete#es-connectors-onedrive-availability-prerequisites] +===== Availability and prerequisites + +This connector is available as a *managed connector* as of Elastic version *8.11.0*. + +To use this connector natively in Elastic Cloud, satisfy all <>. + +[discrete#es-connectors-onedrive-create-native-connector] +===== Create a {service-name} connector +include::_connectors-create-native.asciidoc[] + +[discrete#es-connectors-onedrive-usage] +===== Usage + +To use this connector natively in Elastic Cloud, see <>. + +For additional operations, see <>. + +[discrete#es-connectors-onedrive-usage-connection] +====== Connecting to OneDrive + +To connect to OneDrive you need to https://learn.microsoft.com/en-us/azure/active-directory/develop/howto-create-service-principal-portal[create an Azure Active Directory application and service principal] that can access resources. + +Follow these steps: + +. Go to the https://portal.azure.com[Azure portal] and sign in with your Azure account. +. Navigate to the *Azure Active Directory* service. +. Select *App registrations* from the left-hand menu. +. Click on the *New registration* button to register a new application. +. Provide a name for your app, and optionally select the supported account types (e.g., single tenant, multi-tenant). +. Click on the *Register* button to create the app registration. +. After the registration is complete, you will be redirected to the app's overview page. Take note of the *Application (client) ID* value, as you'll need it later. +. Scroll down to the *API permissions* section and click on the *Add a permission* button. +. In the *Request API permissions* pane, select *Microsoft Graph* as the API. +. Choose the application permissions and select the following permissions under the *Application* tab: `User.Read.All`, `File.Read.All` +. Click on the *Add permissions* button to add the selected permissions to your app. +Finally, click on the *Grant admin consent* button to grant the required permissions to the app. This step requires administrative privileges. **NOTE**: If you are not an admin, you need to request the Admin to grant consent via their Azure Portal. +. Click on *Certificates & Secrets* tab. Go to Client Secrets. Generate a new client secret and keep a note of the string present under `Value` column. + +[discrete#es-connectors-onedrive-usage-configuration] +===== Configuration + +The following configuration fields are *required*: + +Azure application Client ID:: +Unique identifier for your Azure Application, found on the app's overview page. Example: +* `ab123453-12a2-100a-1123-93fd09d67394` + +Azure application Client Secret:: +String value that the application uses to prove its identity when requesting a token, available under the `Certificates & Secrets` tab of your Azure application menu. Example: +* `eyav1~12aBadIg6SL-STDfg102eBfCGkbKBq_Ddyu` + +Azure application Tenant ID:: +Unique identifier of your Azure Active Directory instance. Example: +* `123a1b23-12a3-45b6-7c8d-fc931cfb448d` + +Enable document level security:: +Toggle to enable <>. +When enabled: +* Full syncs will fetch access control lists for each document and store them in the `_allow_access_control` field. +* Access control syncs will fetch users' access control lists and store them in a separate index. + +[WARNING] +==== +Enabling DLS for your connector will cause a significant performance degradation, as the API calls to the data source required for this functionality are rate limited. +This impacts the speed at which your content can be retrieved. +==== + +[discrete#es-connectors-onedrive-usage-content-extraction] +===== Content Extraction + +Refer to <> for more details. + +[discrete#es-connectors-onedrive-documents-syncs] +===== Documents and syncs + +The connector syncs the following objects and entities: + +* *Files* +** Includes metadata such as file name, path, size, content, etc. +* *Folders* + +[NOTE] +==== +* Content from files bigger than 10 MB won't be extracted. (Self-managed connectors can use the <> to handle larger binary files.) +* Permissions are not synced by default. +You must first enable <>. +Otherwise, *all documents* indexed to an Elastic deployment will be visible to *all users with access* to that Elastic Deployment. +==== + +[discrete#es-connectors-onedrive-connectors-onedrive-sync-types] +====== Sync types + +<> are supported by default for all connectors. + +This connector also supports <>. + +[discrete#es-connectors-onedrive-dls] +===== Document level security + +Document level security (DLS) enables you to restrict access to documents based on a user’s permissions. This feature is available by default for the OneDrive connector. +See <> for how to enable DLS for this connector. + +Refer to <> for more details about this feature. + +[NOTE] +==== +Refer to <> to learn how to ingest data with DLS enabled, when building a search application. +==== + +[discrete#es-connectors-onedrive-documents-sync-rules] +===== Sync rules + +_Basic_ sync rules are identical for all connectors and are available by default. +For more information read <>. + +[discrete#es-connectors-onedrive-sync-rules-advanced] +====== Advanced sync rules + +This connector supports <> for remote filtering. +These rules cover complex query-and-filter scenarios that cannot be expressed with basic sync rules. +Advanced sync rules are defined through a source-specific DSL JSON snippet. + +[NOTE] +==== +A <> is required for advanced sync rules to take effect. +==== + +Here are a few examples of advanced sync rules for this connector. + +[discrete#es-connectors-onedrive-sync-rules-advanced-examples-1] +======= Example 1 + +This rule skips indexing for files with `.xlsx` and `.docx` extensions. +All other files and folders will be indexed. + +[source,js] +---- +[ + { + "skipFilesWithExtensions": [".xlsx" , ".docx"] + } +] +---- +// NOTCONSOLE + +[discrete#es-connectors-onedrive-sync-rules-advanced-examples-2] +======= Example 2 + +This rule focuses on indexing files and folders owned by `user1-domain@onmicrosoft.com` and `user2-domain@onmicrosoft.com` but excludes files with `.py` extension. + +[source,js] +---- +[ + { + "owners": ["user1-domain@onmicrosoft.com", "user2-domain@onmicrosoft.com"], + "skipFilesWithExtensions": [".py"] + } +] +---- +// NOTCONSOLE + +[discrete#es-connectors-onedrive-sync-rules-advanced-examples-3] +======= Example 3 + +This rule indexes only the files and folders directly inside the root folder, excluding any `.md` files. + +[source,js] +---- +[ + { + "skipFilesWithExtensions": [".md"], + "parentPathPattern": "/drive/root:" + } +] +---- +// NOTCONSOLE + +[discrete#es-connectors-onedrive-sync-rules-advanced-examples-4] +======= Example 4 + +This rule indexes files and folders owned by `user1-domain@onmicrosoft.com` and `user3-domain@onmicrosoft.com` that are directly inside the `abc` folder, which is a subfolder of any folder under the `hello` directory in the root. Files with extensions `.pdf` and `.py` are excluded. + +[source,js] +---- +[ + { + "owners": ["user1-domain@onmicrosoft.com", "user3-domain@onmicrosoft.com"], + "skipFilesWithExtensions": [".pdf", ".py"], + "parentPathPattern": "/drive/root:/hello/**/abc" + } +] +---- +// NOTCONSOLE + +[discrete#es-connectors-onedrive-sync-rules-advanced-examples-5] +======= Example 5 + +This example contains two rules. +The first rule indexes all files and folders owned by `user1-domain@onmicrosoft.com` and `user2-domain@onmicrosoft.com`. +The second rule indexes files for all other users, but skips files with a `.py` extension. + +[source,js] +---- +[ + { + "owners": ["user1-domain@onmicrosoft.com", "user2-domain@onmicrosoft.com"] + }, + { + "skipFilesWithExtensions": [".py"] + } +] +---- +// NOTCONSOLE + +[discrete#es-connectors-onedrive-sync-rules-advanced-examples-6] +======= Example 6 + +This example contains two rules. +The first rule indexes all files owned by `user1-domain@onmicrosoft.com` and `user2-domain@onmicrosoft.com`, excluding `.md` files. +The second rule indexes files and folders recursively inside the `abc` folder. + +[source,js] +---- +[ + { + "owners": ["user1-domain@onmicrosoft.com", "user2-domain@onmicrosoft.com"], + "skipFilesWithExtensions": [".md"] + }, + { + "parentPathPattern": "/drive/root:/abc/**" + } +] +---- +// NOTCONSOLE + +[discrete#es-connectors-onedrive-content-extraction] +===== Content Extraction + +See <>. + +[discrete#es-connectors-onedrive-known-issues] +===== Known issues + +* *Enabling document-level security impacts performance.* ++ +Enabling DLS for your connector will cause a significant performance degradation, as the API calls to the data source required for this functionality are rate limited. This impacts the speed at which your content can be retrieved. + +Refer to <> for a list of known issues for all connectors. + +[discrete#es-connectors-onedrive-troubleshooting] +===== Troubleshooting + +See <>. + +[discrete#es-connectors-onedrive-security] +===== Security + +See <>. +// Closing the collapsible section +=============== + + +// //////// //// //// //// //// //// //// //////// +// //////// CONNECTOR CLIENT REFERENCE /////// +// //////// //// //// //// //// //// //// //////// + +[discrete#es-connectors-onedrive-connector-client-reference] +==== *Self-managed connector* + +.View *self-managed connector* reference + +[%collapsible] +=============== + +[discrete#es-connectors-onedrive-client-availability-prerequisites] +===== Availability and prerequisites + +This connector is available as a self-managed *self-managed connector*. + +This self-managed connector is compatible with Elastic versions *8.10.0+*. + +To use this connector, satisfy all <>. + +[discrete#es-connectors-onedrive-create-connector-client] +===== Create a {service-name} connector +include::_connectors-create-client.asciidoc[] + +[discrete#es-connectors-onedrive-client-usage] +===== Usage +For additional operations, see <>. + +[discrete#es-connectors-onedrive-client-usage-connection] +====== Connecting to OneDrive + +To connect to OneDrive you need to https://learn.microsoft.com/en-us/azure/active-directory/develop/howto-create-service-principal-portal[create an Azure Active Directory application and service principal] that can access resources. + +Follow these steps: + +. Go to the https://portal.azure.com[Azure portal] and sign in with your Azure account. +. Navigate to the *Azure Active Directory* service. +. Select *App registrations* from the left-hand menu. +. Click on the *New registration* button to register a new application. +. Provide a name for your app, and optionally select the supported account types (e.g., single tenant, multi-tenant). +. Click on the *Register* button to create the app registration. +. After the registration is complete, you will be redirected to the app's overview page. Take note of the *Application (client) ID* value, as you'll need it later. +. Scroll down to the *API permissions* section and click on the *Add a permission* button. +. In the *Request API permissions* pane, select *Microsoft Graph* as the API. +. Choose the application permissions and select the following permissions under the *Application* tab: `User.Read.All`, `File.Read.All` +. Click on the *Add permissions* button to add the selected permissions to your app. +Finally, click on the *Grant admin consent* button to grant the required permissions to the app. This step requires administrative privileges. **NOTE**: If you are not an admin, you need to request the Admin to grant consent via their Azure Portal. +. Click on *Certificates & Secrets* tab. Go to Client Secrets. Generate a new client secret and keep a note of the string present under `Value` column. + +[discrete#es-connectors-onedrive-client-docker] +===== Deployment using Docker + +Self-managed connectors are run on your own infrastructure. + +include::_connectors-docker-instructions.asciidoc[] + +[discrete#es-connectors-onedrive-client-usage-configuration] +===== Configuration + +The following configuration fields are *required*: + +`client_id`:: +Azure application Client ID, unique identifier for your Azure Application, found on the app's overview page. Example: +* `ab123453-12a2-100a-1123-93fd09d67394` + +`client_secret`:: +Azure application Client Secret, string value that the application uses to prove its identity when requesting a token. Available under the `Certificates & Secrets` tab of your Azure application menu. Example: +* `eyav1~12aBadIg6SL-STDfg102eBfCGkbKBq_Ddyu` + +`tenant_id`:: +Azure application Tenant ID: unique identifier of your Azure Active Directory instance. Example: +* `123a1b23-12a3-45b6-7c8d-fc931cfb448d` + +`retry_count`:: +The number of retry attempts after failed request to OneDrive. Default value is `3`. + +`use_document_level_security`:: +Toggle to enable <>. +When enabled: +* Full syncs will fetch access control lists for each document and store them in the `_allow_access_control` field. +* Access control syncs will fetch users' access control lists and store them in a separate index. ++ +[WARNING] +==== +Enabling DLS for your connector will cause a significant performance degradation, as the API calls to the data source required for this functionality are rate limited. +This impacts the speed at which your content can be retrieved. +==== + +`use_text_extraction_service`:: +Requires a separate deployment of the <>. +Requires that ingest pipeline settings disable text extraction. +Default value is `False`. + +[discrete#es-connectors-onedrive-client-usage-content-extraction] +===== Content Extraction + +Refer to <> for more details. + +[discrete#es-connectors-onedrive-client-documents-syncs] +===== Documents and syncs + +The connector syncs the following objects and entities: + +* *Files* +** Includes metadata such as file name, path, size, content, etc. +* *Folders* + +[NOTE] +==== +* Content from files bigger than 10 MB won't be extracted by default. You can use the <> to handle larger binary files. +* Permissions are not synced by default. +You must first enable <>. +Otherwise, *all documents* indexed to an Elastic deployment will be visible to *all users with access* to that Elastic Deployment. +==== + +[discrete#es-connectors-onedrive-client-sync-types] +====== Sync types + +<> are supported by default for all connectors. + +This connector also supports <>. + +[discrete#es-connectors-onedrive-client-dls] +===== Document level security + +Document level security (DLS) enables you to restrict access to documents based on a user’s permissions. This feature is available by default for the OneDrive connector. +See <> for how to enable DLS for this connector. + +Refer to <> for more details about this feature. + +[NOTE] +==== +Refer to <> to learn how to ingest data with DLS enabled, when building a search application. +==== + +[discrete#es-connectors-onedrive-client-documents-sync-rules] +===== Sync rules + +_Basic_ sync rules are identical for all connectors and are available by default. +For more information read <>. + +[discrete#es-connectors-onedrive-client-sync-rules-advanced] +====== Advanced sync rules + +This connector supports <> for remote filtering. +These rules cover complex query-and-filter scenarios that cannot be expressed with basic sync rules. +Advanced sync rules are defined through a source-specific DSL JSON snippet. + +[NOTE] +==== +A <> is required for advanced sync rules to take effect. +==== + +Here are a few examples of advanced sync rules for this connector. + +[discrete#es-connectors-onedrive-client-sync-rules-advanced-examples-1] +======= Example 1 + +This rule skips indexing for files with `.xlsx` and `.docx` extensions. +All other files and folders will be indexed. + +[source,js] +---- +[ + { + "skipFilesWithExtensions": [".xlsx" , ".docx"] + } +] +---- +// NOTCONSOLE + +[discrete#es-connectors-onedrive-client-sync-rules-advanced-examples-2] +======= Example 2 + +This rule focuses on indexing files and folders owned by `user1-domain@onmicrosoft.com` and `user2-domain@onmicrosoft.com` but excludes files with `.py` extension. + +[source,js] +---- +[ + { + "owners": ["user1-domain@onmicrosoft.com", "user2-domain@onmicrosoft.com"], + "skipFilesWithExtensions": [".py"] + } +] +---- +// NOTCONSOLE + +[discrete#es-connectors-onedrive-client-sync-rules-advanced-examples-3] +======= Example 3 + +This rule indexes only the files and folders directly inside the root folder, excluding any `.md` files. + +[source,js] +---- +[ + { + "skipFilesWithExtensions": [".md"], + "parentPathPattern": "/drive/root:" + } +] +---- +// NOTCONSOLE + +[discrete#es-connectors-onedrive-client-sync-rules-advanced-examples-4] +======= Example 4 + +This rule indexes files and folders owned by `user1-domain@onmicrosoft.com` and `user3-domain@onmicrosoft.com` that are directly inside the `abc` folder, which is a subfolder of any folder under the `hello` directory in the root. Files with extensions `.pdf` and `.py` are excluded. + +[source,js] +---- +[ + { + "owners": ["user1-domain@onmicrosoft.com", "user3-domain@onmicrosoft.com"], + "skipFilesWithExtensions": [".pdf", ".py"], + "parentPathPattern": "/drive/root:/hello/**/abc" + } +] +---- +// NOTCONSOLE + +[discrete#es-connectors-onedrive-client-sync-rules-advanced-examples-5] +======= Example 5 + +This example contains two rules. +The first rule indexes all files and folders owned by `user1-domain@onmicrosoft.com` and `user2-domain@onmicrosoft.com`. +The second rule indexes files for all other users, but skips files with a `.py` extension. + +[source,js] +---- +[ + { + "owners": ["user1-domain@onmicrosoft.com", "user2-domain@onmicrosoft.com"] + }, + { + "skipFilesWithExtensions": [".py"] + } +] +---- +// NOTCONSOLE + +[discrete#es-connectors-onedrive-client-sync-rules-advanced-examples-6] +======= Example 6 + +This example contains two rules. +The first rule indexes all files owned by `user1-domain@onmicrosoft.com` and `user2-domain@onmicrosoft.com`, excluding `.md` files. +The second rule indexes files and folders recursively inside the `abc` folder. + +[source,js] +---- +[ + { + "owners": ["user1-domain@onmicrosoft.com", "user2-domain@onmicrosoft.com"], + "skipFilesWithExtensions": [".md"] + }, + { + "parentPathPattern": "/drive/root:/abc/**" + } +] +---- +// NOTCONSOLE + +[discrete#es-connectors-onedrive-client-content-extraction] +===== Content Extraction + +See <>. + +[discrete#es-connectors-onedrive-client-connector-client-operations] +===== Self-managed connector operations + +[discrete#es-connectors-onedrive-client-testing] +===== End-to-end testing + +The connector framework enables operators to run functional tests against a real data source. +Refer to <> for more details. + +To perform E2E testing for the GitHub connector, run the following command: + +[source,shell] +---- +$ make ftest NAME=onedrive +---- + +For faster tests, add the `DATA_SIZE=small` flag: + +[source,shell] +---- +make ftest NAME=onedrive DATA_SIZE=small +---- + +[discrete#es-connectors-onedrive-client-known-issues] +===== Known issues + +* *Enabling document-level security impacts performance.* ++ +Enabling DLS for your connector will cause a significant performance degradation, as the API calls to the data source required for this functionality are rate limited. This impacts the speed at which your content can be retrieved. + +Refer to <> for a list of known issues for all connectors. + +[discrete#es-connectors-onedrive-client-troubleshooting] +===== Troubleshooting + +See <>. + +[discrete#es-connectors-onedrive-client-security] +===== Security + +See <>. +// Closing the collapsible section +=============== diff --git a/docs/reference/connector/docs/connectors-opentext-documentum.asciidoc b/docs/reference/connector/docs/connectors-opentext-documentum.asciidoc new file mode 100644 index 0000000000000..e320062240428 --- /dev/null +++ b/docs/reference/connector/docs/connectors-opentext-documentum.asciidoc @@ -0,0 +1,162 @@ +[#es-connectors-opentext] +=== Elastic OpenText Documentum connector reference +++++ +OpenText Documentum +++++ + +// Attributes used in this file +:service-name: OpenText Documentum +:service-name-stub: opentext_documentum + +[WARNING] +==== +This connector is an *example connector* that serves as a building block for customizations and is subject to change. +Its source code currently lives on a https://github.com/elastic/connectors/blob/opentext-connector-backup/connectors/sources/opentext_documentum.py[feature branch] and is yet not part of the main Elastic Connectors codebase. +The design and code is less mature than supported features and is being provided as-is with no warranties. +This connector is not subject to the support SLA of supported features. +==== + +The Elastic OpenText Documentum connector is written in Python using the https://github.com/elastic/connectors/tree/main?tab=readme-ov-file#connector-framework[Elastic connector framework]. View the https://github.com/elastic/connectors/blob/opentext-connector-backup/connectors/sources/opentext_documentum.py[source code] for this example connector. + +[discrete#es-connectors-opentext-documentum-connector-availability-and-prerequisites] +==== Availability and prerequisites + +This *example connector* was introduced in Elastic *8.14.0*, available as a *self-managed* self-managed connector on a feature branch, for testing and development purposes only. + +To use this connector, satisfy all <>. +Importantly, you must deploy the connectors service on your own infrastructure. +You have two deployment options: + +* <>. Use this option if you're comfortable working with Python and want to iterate quickly locally. +* <>. Use this option if you want to deploy the connectors to a server, or use a container orchestration platform. + +[discrete#es-connectors-opentext-documentum-connector-usage] +==== Usage + +To set up this connector in the UI, select the *OpenText Documentum* tile when creating a new connector under *Search -> Connectors*. + +If you're already familiar with how connectors work, you can also use the {ref}/connector-apis.html[Connector APIs]. + +For additional operations, see <>. + +[discrete#es-connectors-opentext-documentum-connector-connecting-to-opentext-documentum] +==== Connecting to OpenText Documentum + +Basic authentication is used to connect with OpenText Documentum. + +[discrete#es-connectors-opentext-documentum-connector-configuration] +==== Configuration + +[discrete#es-connectors-opentext-documentum-connector-configure-opentext-documentum-connector] +===== Configure OpenText Documentum connector + +Note the following configuration fields: + +`OpenText Documentum host url` (required):: +The domain where OpenText Documentum is hosted. +Example: `https://192.158.1.38:2099/` + +`Username` (required):: +The username of the account to connect to OpenText Documentum. + +`Password` (required):: +The password of the account to connect to OpenText Documentum. + +`Repositories` (optional):: +Comma-separated list of repositories to fetch data from OpenText Documentum. If the value is `*` the connector will fetch data from all repositories present in the configured user’s account. ++ +Default value is `*`. ++ +Examples: ++ +* `elastic`, `kibana` +* `*` + +`Enable SSL` (optional):: +Enable SSL for the OpenText Documentum instance. + +`SSL Certificate` (Required if SSL is enabled):: +SSL certificate for the OpenText Documentum instance. +Example: ++ +``` +-----BEGIN CERTIFICATE----- +MIID+jCCAuKgAwIBAgIGAJJMzlxLMA0GCSqGSIb3DQEBCwUAMHoxCzAJBgNVBAYT +... +7RhLQyWn2u00L7/9Omw= +-----END CERTIFICATE----- +``` + +[discrete#es-connectors-opentext-documentum-connector-content-extraction] +===== Content Extraction + +Refer to <>. + +[discrete#es-connectors-opentext-documentum-connector-documents-and-syncs] +==== Documents and syncs + +The connector syncs the following objects and entities: + +* *Repositories* +* *Cabinets* +* *Files & Folders* + +[NOTE] +==== +* Files bigger than 10 MB won't be extracted. +* Permissions are not synced. *All documents* indexed to an Elastic deployment will be visible to *all users with access* to the destination Elasticsearch index. +==== + +[discrete#es-connectors-opentext-documentum-connector-sync-types] +===== Sync types +<> are supported by default for all connectors. + +<> are not available for this connector in the present version. + +[discrete#es-connectors-opentext-documentum-connector-sync-rules] +==== Sync rules + +<> are identical for all connectors and are available by default. + +Advanced sync rules are not available for this connector in the present version. + +[discrete#es-connectors-opentext-documentum-connector-connector-client-operations] +==== Connector Client operations + +[discrete#es-connectors-opentext-documentum-connector-end-to-end-testing] +===== End-to-end Testing + +The connector framework enables operators to run functional tests against a real data source, using Docker Compose. +You don't need a running Elasticsearch instance or OpenText Documentum source to run this test. + +Refer to <> for more details. + +To perform E2E testing for the OpenText Documentum connector, run the following command: + +```shell +$ make ftest NAME=opentext_documentum +``` +For faster tests, add the `DATA_SIZE=small` flag: + +[source,shell] +---- +make ftest NAME=opentext_documentum DATA_SIZE=small +---- + +By default, `DATA_SIZE=MEDIUM`. + + +[discrete#es-connectors-opentext-documentum-connector-known-issues] +==== Known issues + +* There are no known issues for this connector. Refer to <> for a list of known issues for all connectors. + +[discrete#es-connectors-opentext-documentum-connector-troubleshooting] +==== Troubleshooting + +See <>. + +[discrete#es-connectors-opentext-documentum-connector-security] +==== Security + +See <>. diff --git a/docs/reference/connector/docs/connectors-oracle.asciidoc b/docs/reference/connector/docs/connectors-oracle.asciidoc new file mode 100644 index 0000000000000..839a92985a7d5 --- /dev/null +++ b/docs/reference/connector/docs/connectors-oracle.asciidoc @@ -0,0 +1,395 @@ +[#es-connectors-oracle] +=== Elastic Oracle connector reference +++++ +Oracle +++++ + +// Attributes used in this file: +:service-name: Oracle +:service-name-stub: oracle + + +// //////// //// //// //// //// //// //// //////// +// //////// NATIVE CONNECTOR REFERENCE (MANAGED SERVICE) /////// +// //////// //// //// //// //// //// //// //////// + +[discrete#es-connectors-oracle-native-connector-reference] +==== *Elastic managed connector reference* + +.View *Elastic managed connector* reference +[%collapsible] +=============== + +[discrete#es-connectors-oracle-availability-prerequisites] +===== Availability and prerequisites + +This connector is available natively in Elastic Cloud as of *8.12.0*. +To use this connector, satisfy all <>. + +[discrete#es-connectors-oracle-create-native-connector] +===== Create a {service-name} connector +include::_connectors-create-native.asciidoc[] + +[discrete#es-connectors-oracle-usage] +===== Usage + +To use this connector as a *managed connector*, see <>. + +The database user requires `CONNECT` and `DBA` privileges and must be the owner of the tables to be indexed. + +[discrete#es-connectors-oracle-usage-secure-connection] +====== Secure connection + +To set up a secure connection the Oracle service must be installed on the system where the connector is running. + +Follow these steps: + +. Set the `oracle_home` parameter to your Oracle home directory. +If configuration files are not at the default location, set the `wallet_configuration_path` parameter. +. Create a directory to store the wallet. ++ +[source,shell] +---- +$ mkdir $ORACLE_HOME/ssl_wallet +---- +. Create file named `sqlnet.ora` at `$ORACLE_HOME/network/admin` and add the following content: ++ +[source,shell] +---- +WALLET_LOCATION = (SOURCE = (METHOD = FILE) (METHOD_DATA = (DIRECTORY = $ORACLE_HOME/ssl_wallet))) +SSL_CLIENT_AUTHENTICATION = FALSE +SSL_VERSION = 1.0 +SSL_CIPHER_SUITES = (SSL_RSA_WITH_AES_256_CBC_SHA) +SSL_SERVER_DN_MATCH = ON +---- ++ +. Run the following commands to create a wallet and attach an SSL certificate. +Replace the file name with your file name. ++ +[source,shell] +---- +$ orapki wallet create -wallet path-to-oracle-home/ssl_wallet -auto_login_only +$ orapki wallet add -wallet path-to-oracle-home/ssl_wallet -trusted_cert -cert path-to-oracle-home/ssl_wallet/root_ca.pem -auto_login_only +---- + +For more information, refer to this https://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/Appendix.Oracle.Options.SSL.html[Amazon RDS documentation^] about Oracle SSL. +Oracle docs: https://docs.oracle.com/database/121/DBSEG/asossl.htm#DBSEG070. +// TODO: check whether this link is appropriate + +For additional operations, see <>. + +[discrete#es-connectors-oracle-compatability] +===== Compatibility + +This connector is compatible with Oracle Database versions *18c*, *19c* and *21c*. + +[discrete#es-connectors-oracle-configuration] +===== Configuration + +Use the following configuration fields to set up the connector: + +Connection source:: +Dropdown to determine the Oracle Source Connection: `Service Name` or `SID`. Default value is `SID`. Select 'Service Name' option if connecting to a pluggable database. + +SID:: +SID of the database. + +Service name:: +Service name for the database. + +Host:: +The IP address or hostname of the Oracle database server. +Default value is `127.0.0.1`. + +Port:: +Port number of the Oracle database server. + +Username:: +Username to use to connect to the Oracle database server. + +Password:: +Password to use to connect to the Oracle database server. + +Comma-separated list of tables:: +Comma-separated list of tables to monitor for changes. +Default value is `*`. +Examples: +* `TABLE_1, TABLE_2` +* `*` + +[discrete#es-connectors-oracle-documents-syncs] +===== Documents and syncs + +* Tables with no primary key defined are skipped. +* If the table's system change number (SCN) value is not between the `min(SCN)` and `max(SCN)` values of the `SMON_SCN_TIME` table, the connector will not be able to retrieve the most recently updated time. + Data will therefore index in every sync. + For more details refer to the following https://community.oracle.com/tech/apps-infra/discussion/4076446/show-error-about-ora-08181-specified-number-is-not-a-valid-system-change-number-when-using-scn-t[discussion thread^]. +* The `sys` user is not supported, as it contains 1000+ system tables. If you need to work with the `sys` user, use either `sysdba` or `sysoper` and configure this as the username. + +[NOTE] +==== +* Files bigger than 10 MB won't be extracted. +* Permissions are not synced. +**All documents** indexed to an Elastic deployment will be visible to **all users with access** to that Elastic Deployment. +==== + +[discrete#es-connectors-oracle-sync-rules] +===== Sync rules + +<> are identical for all connectors and are available by default. + +Advanced sync rules are not available for this connector in the present version. +Currently, filtering is controlled by ingest pipelines. + +[discrete#es-connectors-oracle-known-issues] +===== Known issues + +There are no known issues for this connector. + +See <> for any issues affecting all connectors. + +[discrete#es-connectors-oracle-troubleshooting] +===== Troubleshooting + +See <>. + +[discrete#es-connectors-oracle-security] +===== Security + +See <>. + +[discrete#es-connectors-oracle-source] +===== Framework and source + +This connector is built with the {connectors-python}[Elastic connector framework^]. + +This connector uses the https://github.com/elastic/connectors/blob/{branch}/connectors/sources/generic_database.py[generic database connector source code^] (branch _{connectors-branch}_, compatible with Elastic _{minor-version}_). + +View {connectors-python}/connectors/sources/oracle.py[additional code specific to this data source^] (branch _{connectors-branch}_, compatible with Elastic _{minor-version}_). + + +// Closing the collapsible section +=============== + + +// //////// //// //// //// //// //// //// //////// +// //////// CONNECTOR CLIENT REFERENCE (SELF-MANAGED) /////// +// //////// //// //// //// //// //// //// //////// + +[discrete#es-connectors-oracle-connector-client-reference] +==== *Self-managed connector reference* + +.View *self-managed connector* reference +[%collapsible] +=============== + +[discrete#es-connectors-oracle-client-availability-prerequisites] +===== Availability and prerequisites + +This connector is available as a self-managed *self-managed connector*. +This self-managed connector is compatible with Elastic versions *8.6.0+*. +To use this connector, satisfy all <>. + +[discrete#es-connectors-oracle-create-connector-client] +===== Create a {service-name} connector +include::_connectors-create-client.asciidoc[] + +[discrete#es-connectors-oracle-client-usage] +===== Usage + +To use this connector as a *self-managed connector*, see <>. + +The database user requires `CONNECT` and `DBA` privileges and must be the owner of the tables to be indexed. + +[discrete#es-connectors-oracle-client-usage-secure-connection] +====== Secure connection + +To set up a secure connection the Oracle service must be installed on the system where the connector is running. + +Follow these steps: + +. Set the `oracle_home` parameter to your Oracle home directory. +If configuration files are not at the default location, set the `wallet_configuration_path` parameter. +. Create a directory to store the wallet. ++ +[source,shell] +---- +$ mkdir $ORACLE_HOME/ssl_wallet +---- +. Create file named `sqlnet.ora` at `$ORACLE_HOME/network/admin` and add the following content: ++ +[source,shell] +---- +WALLET_LOCATION = (SOURCE = (METHOD = FILE) (METHOD_DATA = (DIRECTORY = $ORACLE_HOME/ssl_wallet))) +SSL_CLIENT_AUTHENTICATION = FALSE +SSL_VERSION = 1.0 +SSL_CIPHER_SUITES = (SSL_RSA_WITH_AES_256_CBC_SHA) +SSL_SERVER_DN_MATCH = ON +---- ++ +. Run the following commands to create a wallet and attach an SSL certificate. +Replace the file name with your file name. ++ +[source,shell] +---- +$ orapki wallet create -wallet path-to-oracle-home/ssl_wallet -auto_login_only +$ orapki wallet add -wallet path-to-oracle-home/ssl_wallet -trusted_cert -cert path-to-oracle-home/ssl_wallet/root_ca.pem -auto_login_only +---- + +For more information, refer to this https://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/Appendix.Oracle.Options.SSL.html[Amazon RDS documentation^] about Oracle SSL. +Oracle docs: https://docs.oracle.com/database/121/DBSEG/asossl.htm#DBSEG070. +// TODO: check whether this link is appropriate + +For additional operations, see <>. + +[discrete#es-connectors-oracle-client-compatability] +===== Compatibility + +Oracle Database versions *18c*, *19c* and *21c* are compatible with Elastic connector frameworks. + +[discrete#es-connectors-oracle-client-configuration] +===== Configuration + +[TIP] +==== +When using the <> workflow, these fields will use the default configuration set in the https://github.com/elastic/connectors/blob/{branch}/connectors/sources/generic_database.py[connector source code^]. +Note that this data source uses the `generic_database.py` connector source code. +Refer to https://github.com/elastic/connectors/blob/{branch}/connectors/sources/oracle.py[`oracle.py`^] for additional code, specific to this data source. + +These configurable fields will be rendered with their respective *labels* in the Kibana UI. +Once connected, users will be able to update these values in Kibana. +==== + +Use the following configuration fields to set up the connector: + +`connection_source`:: +Determines the Oracle source: Service Name or SID. Default value is SID. Select 'Service Name' if connecting to a pluggable database. + +`sid`:: +SID of the database. + +`service_name`:: +Service name for the database. + +`host`:: +The IP address or hostname of the Oracle database server. +Default value is `127.0.0.1`. + +`port`:: +Port number of the Oracle database server. + +`username`:: +Username to use to connect to the Oracle database server. + +`password`:: +Password to use to connect to the Oracle database server. + +`tables`:: +Comma-separated list of tables to monitor for changes. +Default value is `*`. +Examples: +* `TABLE_1, TABLE_2` +* `*` + +`oracle_protocol`:: +Protocol which the connector uses to establish a connection. +Default value is `TCP`. +For secure connections, use `TCPS`. + +`oracle_home`:: +Path to Oracle home directory to run connector in thick mode for secured connection. +For unsecured connections, keep this field empty. + +`wallet_configuration_path`:: +Path to SSL Wallet configuration files. + +`fetch_size`:: +Number of rows to fetch per request. +Default value is `50`. + +`retry_count`:: +Number of retry attempts after failed request to Oracle Database. +Default value is `3`. + +[discrete#es-connectors-oracle-client-docker] +===== Deployment using Docker + +include::_connectors-docker-instructions.asciidoc[] + +[discrete#es-connectors-oracle-client-documents-syncs] +===== Documents and syncs + +* Tables with no primary key defined are skipped. +* If the table's system change number (SCN) value is not between the `min(SCN)` and `max(SCN)` values of the `SMON_SCN_TIME` table, the connector will not be able to retrieve the most recently updated time. + Data will therefore index in every sync. + For more details refer to the following https://community.oracle.com/tech/apps-infra/discussion/4076446/show-error-about-ora-08181-specified-number-is-not-a-valid-system-change-number-when-using-scn-t[discussion thread^]. +* The `sys` user is not supported, as it contains 1000+ system tables. If you need to work with the `sys` user, use either `sysdba` or `sysoper` and configure this as the username. + +[NOTE] +==== +* Files bigger than 10 MB won't be extracted. +* Permissions are not synced. +**All documents** indexed to an Elastic deployment will be visible to **all users with access** to that Elastic Deployment. +==== + +[discrete#es-connectors-oracle-client-sync-rules] +===== Sync rules + +<> are identical for all connectors and are available by default. + +Advanced sync rules are not available for this connector in the present version. +Currently, filtering is controlled by ingest pipelines. + +[discrete#es-connectors-oracle-client-operations] +===== Self-managed connector operations + +[discrete#es-connectors-oracle-client-operations-testing] +====== End-to-end testing + +The connector framework enables operators to run functional tests against a real data source. +Refer to <> for more details. + +To execute a functional test for the Oracle connector, run the following command: + +[source,shell] +---- +make ftest NAME=oracle +---- + +By default, this will use a medium-sized dataset. +To make the test faster add the `DATA_SIZE=small` argument: + +[source,shell] +---- +make ftest NAME=oracle DATA_SIZE=small +---- + +[discrete#es-connectors-oracle-client-known-issues] +===== Known issues + +There are no known issues for this connector. + +See <> for any issues affecting all connectors. + +[discrete#es-connectors-oracle-client-troubleshooting] +===== Troubleshooting + +See <>. + +[discrete#es-connectors-oracle-client-security] +===== Security + +See <>. + +[discrete#es-connectors-oracle-client-source] +===== Framework and source + +This connector is built with the {connectors-python}[Elastic connector framework^]. + +This connector uses the https://github.com/elastic/connectors/blob/{branch}/connectors/sources/generic_database.py[generic database connector source code^] (branch _{connectors-branch}_, compatible with Elastic _{minor-version}_). + +View {connectors-python}/connectors/sources/oracle.py[additional code specific to this data source^] (branch _{connectors-branch}_, compatible with Elastic _{minor-version}_). + + +// Closing the collapsible section +=============== diff --git a/docs/reference/connector/docs/connectors-outlook.asciidoc b/docs/reference/connector/docs/connectors-outlook.asciidoc new file mode 100644 index 0000000000000..e032fb243dfa9 --- /dev/null +++ b/docs/reference/connector/docs/connectors-outlook.asciidoc @@ -0,0 +1,446 @@ +[#es-connectors-outlook] +=== Elastic Outlook connector reference +++++ +Outlook +++++ +// Attributes used in this file +:service-name: Outlook +:service-name-stub: outlook + +The Elastic Outlook connector is built with the Elastic connector framework and is available as a self-managed <>. + + +// //////// //// //// //// //// //// //// //////// +// //////// NATIVE CONNECTOR REFERENCE (MANAGED SERVICE) /////// +// //////// //// //// //// //// //// //// //////// + +[discrete#es-connectors-outlook-native-connector-reference] +==== *Elastic managed connector reference* + +.View *Elastic managed connector* reference +[%collapsible] +=============== + +[discrete#es-connectors-outlook-availability-and-prerequisites] +===== Availability and prerequisites + +This connector is available as a *managed connector* (managed service) in Elastic Cloud. + +This connector is compatible with Elastic versions *8.13.0+*. + +To use this connector, satisfy all <>. + +[discrete#es-connectors-outlook-create-native-connector] +===== Create a {service-name} connector +include::_connectors-create-native.asciidoc[] + +[discrete#es-connectors-outlook-usage] +===== Usage + +To use this connector as a managed connector in Elastic Cloud, use the *Connectors* workflow in the Kibana UI. + +To create a new {service-name} connector: + +. Navigate to *Search -> Connectors* page in the Kibana UI. +. Select the *New Native Connector* button. +. Select the *{service-name}* connector. + +For additional operations, see <>. + +[discrete#es-connectors-outlook-connecting-to-outlook] +===== Connecting to Outlook + +Outlook connector supports both cloud (Office365 Outlook) and on-premises (Exchange Server) platforms. + +[discrete#es-connectors-outlook-connect-to-exchange-server] +====== Connect to Exchange Server + +In order to connect to Exchange server, the connector fetches Active Directory users with the help of `ldap3` python library. + +[discrete#es-connectors-outlook-connect-to-office365-outlook-outlook-cloud] +====== Connect to Office365 Outlook (Outlook Cloud) + +To integrate with the Outlook connector using Azure, follow these steps to create and configure an Azure application: + +1. Navigate to the https://portal.azure.com/[Azure Portal] and log in using your credentials. +2. Click on *App registrations* to register a new application. +3. Navigate to the *Overview* tab. Make a note of the `Client ID` and `Tenant ID`. +4. Click on the *Certificates & secrets* tab and create a new client secret. Keep this secret handy. +5. Go to the *API permissions* tab. + * Click on "Add permissions." + * Choose "APIs my organization uses." + * Search for and select "Office 365 Exchange Online." + * Add the `full_access_as_app` application permission. + +You can now use the Client ID, Tenant ID, and Client Secret you've noted to configure the Outlook connector. + +[discrete#es-connectors-outlook-configuration] +===== Configuration + +Outlook data source (required):: +Dropdown to determine Outlook platform type: `outlook_cloud` or `outlook_server`. Default value is `outlook_cloud`. + +Tenant ID:: +Required if data source is `outlook_cloud`. +The Tenant ID for the Azure account hosting the Outlook instance. + +Client ID:: +Required if data source is `outlook_cloud`. +The Client ID to authenticate with Outlook instance. + +Client Secret Value:: +Required if data source is `outlook_cloud`. +The Client Secret value to authenticate with Outlook instance. + +Exchange Server:: +Required if data source is `outlook_server`. +IP address to connect with Exchange server. Example: `127.0.0.1` + +Active Directory Server:: +Required if data source is `outlook_server`. +IP address to fetch users from Exchange Active Directory to fetch data. Example: `127.0.0.1` + +Exchange server username:: +Required if data source is `outlook_server`. +Username to authenticate with Exchange server. + +Exchange server password:: +Required if data source is `outlook_server`. +Password to authenticate with Exchange server. + +Exchange server domain name:: +Required if data source is `outlook_server`. +Domain name for Exchange server users such as `gmail.com` or `exchange.local`. + +Enable SSL:: +Whether SSL verification will be enabled. Default value is `False`. +*Note:* This configuration is applicable for `Outlook Server` only. + +SSL certificate:: +Required if ssl is enabled. +Content of SSL certificate. Example certificate: ++ +[text] +[source, txt] +---- +-----BEGIN CERTIFICATE----- +MIID+jCCAuKgAwIBAgIGAJJMzlxLMA0GCSqGSIb3DQEBCwUAMHoxCzAJBgNVBAYT +... +7RhLQyWn2u00L7/9Omw= +-----END CERTIFICATE----- +---- ++ +*Note:* This configuration is applicable for `Outlook Server` only. + +Document level security:: +Toggle to enable <>. +When enabled: ++ +* Full syncs will fetch access control lists for each document and store them in the `_allow_access_control` field. +* Access control syncs fetch users' access control lists and store them in a separate index. + +[discrete#es-connectors-outlook-content-extraction] +===== Content Extraction + +Refer to <>. + +[discrete#es-connectors-outlook-documents-and-syncs] +===== Documents and syncs + +The connector syncs the following objects and entities: + +* *Mails* +** *Inbox Mails* +** *Sent Mails* +** *Archive Mails* +** *Junk Mails* +* *Contacts* +* *Calendar Events* +* *Tasks* +* *Attachments* +** *Mail Attachments* +** *Task Attachments* +** *Calendar Attachments* + +[NOTE] +==== +* Content from files bigger than 10 MB won't be extracted. (Self-managed connectors can use the <> to handle larger binary files.) +* Permissions are not synced. *All documents* indexed to an Elastic deployment will be visible to *all users with access* to that Elastic Deployment. +==== + +[discrete#es-connectors-outlook-sync-types] +====== Sync types + +<> are supported by default for all connectors. + +This connector also supports <>. + +[discrete#es-connectors-outlook-document-level-security] +===== Document level security + +Document level security (DLS) enables you to restrict access to documents based on a user’s permissions. +Refer to <> on this page for how to enable DLS for this connector. + +[NOTE] +==== +Refer to <> to learn how to ingest data from a connector with DLS enabled, when building a search application. +The example uses SharePoint Online as the data source, but the same steps apply to every connector. +==== + +[discrete#es-connectors-outlook-sync-rules] +===== Sync rules + +<> are identical for all connectors and are available by default. + +[discrete#es-connectors-outlook-advanced-sync-rules] +===== Advanced Sync Rules + +Advanced sync rules are not available for this connector in the present version. + +[discrete#es-connectors-outlook-connector-client-operations] +===== Connector Client operations + +[discrete#es-connectors-outlook-end-to-end-testing] +====== End-to-end Testing + +*Note:* End-to-end testing is not available in the current version of the connector. + +[discrete#es-connectors-outlook-known-issues] +====== Known issues + +There are currently no known issues for this connector. +Refer to <> for a list of known issues for all connectors. + +[discrete#es-connectors-outlook-troubleshooting] +===== Troubleshooting + +See <>. + +[discrete#es-connectors-outlook-security] +===== Security + +See <>. + +[discrete#es-connectors-outlook-source] +===== Framework and source + +This connector is written in Python using the {connectors-python}[Elastic connector framework^]. + +View the {connectors-python}/connectors/sources/outlook.py[source code for this connector^] (branch _{connectors-branch}_, compatible with Elastic _{minor-version}_). + + +// Closing the collapsible section +=============== + + +// //////// //// //// //// //// //// //// //////// +// //////// CONNECTOR CLIENT REFERENCE (SELF-MANAGED) /////// +// //////// //// //// //// //// //// //// //////// + +[discrete#es-connectors-outlook-connector-client-reference] +==== *Self-managed connector reference* + +.View *self-managed connector* reference +[%collapsible] +=============== + +[discrete#es-connectors-outlook-client-availability-and-prerequisites] +===== Availability and prerequisites + +This connector is available as a self-managed *self-managed connector*. To use this connector, satisfy all <>. + +[discrete#es-connectors-outlook-create-connector-client] +===== Create a {service-name} connector +include::_connectors-create-client.asciidoc[] + +[discrete#es-connectors-outlook-client-usage] +===== Usage + +To use this connector as a *self-managed connector*, use the *Outlook* tile from the connectors list OR *Customized connector* workflow. + +For additional operations, see <>. + +[discrete#es-connectors-outlook-client-connecting-to-outlook] +===== Connecting to Outlook + +Outlook connector supports both cloud (Office365 Outlook) and on-premises (Exchange Server) platforms. + +[discrete#es-connectors-outlook-client-connect-to-exchange-server] +====== Connect to Exchange Server + +In order to connect to Exchange server, the connector fetches Active Directory users with the help of `ldap3` python library. + +[discrete#es-connectors-outlook-client-connect-to-office365-outlook-outlook-cloud] +====== Connect to Office365 Outlook (Outlook Cloud) + +To integrate with the Outlook connector using Azure, follow these steps to create and configure an Azure application: + +1. Navigate to the https://portal.azure.com/[Azure Portal] and log in using your credentials. +2. Click on *App registrations* to register a new application. +3. Navigate to the *Overview* tab. Make a note of the `Client ID` and `Tenant ID`. +4. Click on the *Certificates & secrets* tab and create a new client secret. Keep this secret handy. +5. Go to the *API permissions* tab. + * Click on "Add permissions." + * Choose "APIs my organization uses." + * Search for and select "Office 365 Exchange Online." + * Add the `full_access_as_app` application permission. + +You can now use the Client ID, Tenant ID, and Client Secret you've noted to configure the Outlook connector. + +[discrete#es-connectors-outlook-client-configuration] +===== Configuration + +`data_source`:: (required) +Dropdown to determine Outlook platform type: `outlook_cloud` or `outlook_server`. Default value is `outlook_cloud`. + +`tenant_id`:: (required if data source is outlook_cloud) +The Tenant ID for the Azure account hosting the Outlook instance. + +`client_id`:: (required if data source is outlook_cloud) +The Client ID to authenticate with Outlook instance. + +`client_secret`:: (required if data source is outlook_cloud) +The Client Secret value to authenticate with Outlook instance. + +`exchange_server`:: (required if data source is outlook_server) +IP address to connect with Exchange server. Example: `127.0.0.1` + +`active_directory_server`:: (required if data source is outlook_server) +IP address to fetch users from Exchange Active Directory to fetch data. Example: `127.0.0.1` + +`username`:: (required if data source is outlook_server) +Username to authenticate with Exchange server. + +`password`:: (required if data source is outlook_server) +Password to authenticate with Exchange server. + +`domain`:: (required if data source is outlook_server) +Domain name for Exchange server users such as `gmail.com` or `exchange.local`. + +`ssl_enabled`:: +Whether SSL verification will be enabled. Default value is `False`. +*Note:* This configuration is applicable for `Outlook Server` only. + +`ssl_ca`:: (required if ssl is enabled) +Content of SSL certificate. Example certificate: ++ +[text] +[source, txt] +---- +-----BEGIN CERTIFICATE----- +MIID+jCCAuKgAwIBAgIGAJJMzlxLMA0GCSqGSIb3DQEBCwUAMHoxCzAJBgNVBAYT +... +7RhLQyWn2u00L7/9Omw= +-----END CERTIFICATE----- +---- + +`use_text_extraction_service`:: +Use <>. +Default value is `False`. + +`document_level_security`:: +Toggle to enable <>. +When enabled: ++ +* Full syncs will fetch access control lists for each document and store them in the `_allow_access_control` field. +* Access control syncs fetch users' access control lists and store them in a separate index. + +*Note:* This configuration is applicable for `Outlook Server` only. + +[discrete#es-connectors-outlook-client-client-docker] +===== Deployment using Docker + +include::_connectors-docker-instructions.asciidoc[] + +[discrete#es-connectors-outlook-client-content-extraction] +===== Content Extraction + +Refer to <>. + +[discrete#es-connectors-outlook-client-documents-and-syncs] +===== Documents and syncs + +The connector syncs the following objects and entities: + +* *Mails* +** *Inbox Mails* +** *Sent Mails* +** *Archive Mails* +** *Junk Mails* +* *Contacts* +* *Calendar Events* +* *Tasks* +* *Attachments* +** *Mail Attachments* +** *Task Attachments* +** *Calendar Attachments* + +[NOTE] +==== +* Content from files bigger than 10 MB won't be extracted by default. You can use the <> to handle larger binary files. +* Permissions are not synced. *All documents* indexed to an Elastic deployment will be visible to *all users with access* to that Elastic Deployment. +==== + +[discrete#es-connectors-outlook-client-sync-types] +====== Sync types + +<> are supported by default for all connectors. + +This connector also supports <>. + +[discrete#es-connectors-outlook-client-document-level-security] +===== Document level security + +Document level security (DLS) enables you to restrict access to documents based on a user’s permissions. +Refer to <> on this page for how to enable DLS for this connector. + +[NOTE] +==== +Refer to <> to learn how to ingest data from a connector with DLS enabled, when building a search application. +The example uses SharePoint Online as the data source, but the same steps apply to every connector. +==== + +[discrete#es-connectors-outlook-client-sync-rules] +===== Sync rules + +<> are identical for all connectors and are available by default. + +[discrete#es-connectors-outlook-client-advanced-sync-rules] +===== Advanced Sync Rules + +Advanced sync rules are not available for this connector in the present version. + +[discrete#es-connectors-outlook-client-connector-client-operations] +===== Connector Client operations + +[discrete#es-connectors-outlook-client-end-to-end-testing] +====== End-to-end Testing + +*Note:* End-to-end testing is not available in the current version of the connector. + +[discrete#es-connectors-outlook-client-known-issues] +====== Known issues + +There are currently no known issues for this connector. +Refer to <> for a list of known issues for all connectors. + +[discrete#es-connectors-outlook-client-troubleshooting] +===== Troubleshooting + +See <>. + +[discrete#es-connectors-outlook-client-security] +===== Security + +See <>. + +[discrete#es-connectors-outlook-client-source] +===== Framework and source + +This connector is included in the {connectors-python}[Elastic connector framework^]. + +View the {connectors-python}/connectors/sources/outlook.py[source code for this connector^] (branch _{connectors-branch}_, compatible with Elastic _{minor-version}_). + + +// Closing the collapsible section +=============== diff --git a/docs/reference/connector/docs/connectors-postgresql.asciidoc b/docs/reference/connector/docs/connectors-postgresql.asciidoc new file mode 100644 index 0000000000000..1fe28f867337c --- /dev/null +++ b/docs/reference/connector/docs/connectors-postgresql.asciidoc @@ -0,0 +1,644 @@ +[#es-connectors-postgresql] +=== Elastic PostgreSQL connector reference +++++ +PostgreSQL +++++ +// Attributes used in this file +:service-name: PostgreSQL +:service-name-stub: postgresql + +The _Elastic PostgreSQL connector_ is a connector for https://www.postgresql.org[PostgreSQL^]. +This connector is written in Python using the {connectors-python}[Elastic connector framework^]. + +This connector uses the https://github.com/elastic/connectors/blob/{branch}/connectors/sources/generic_database.py[generic database connector source code^] (branch _{connectors-branch}_, compatible with Elastic _{minor-version}_). +View the specific {connectors-python}/connectors/sources/{service-name-stub}.py[*source code* for this connector^] (branch _{connectors-branch}_, compatible with Elastic _{minor-version}_). + + +.Choose your connector reference +******************************* +Are you using an Elastic managed connector on Elastic Cloud or a self-managed connector? Expand the documentation based on your deployment method. +******************************* + +// //////// //// //// //// //// //// //// //////// +// //////// NATIVE CONNECTOR REFERENCE /////// +// //////// //// //// //// //// //// //// //////// + +[discrete#connectors-postgresql-native-connector-reference] +=== *Elastic managed connector (Elastic Cloud)* + +.View *Elastic managed connector* reference + +[%collapsible] +=============== + +[discrete#connectors-postgresql-availability-prerequisites] +==== Availability and prerequisites + +This connector is available as an *Elastic managed connector* in Elastic versions *8.8.0 and later*. +To use this connector natively in Elastic Cloud, satisfy all <>. + +[discrete#connectors-postgresql-create-native-connector] +==== Create a {service-name} connector +include::_connectors-create-native.asciidoc[] + +[discrete#connectors-postgresql-usage] +==== Usage + +To use this connector as an *Elastic managed connector*, use the *Connector* workflow. +See <>. + +[TIP] +==== +Users must set `track_commit_timestamp` to `on`. +To do this, run `ALTER SYSTEM SET track_commit_timestamp = on;` in PostgreSQL server. +==== + +For additional operations, see <<-esconnectors-usage>>. + +[NOTE] +==== +For an end-to-end example of the connector client workflow, see <>. +==== + +[discrete#connectors-postgresql-compatibility] +==== Compatibility + +PostgreSQL versions 11 to 15 are compatible with the Elastic connector. + +[discrete#connectors-postgresql-configuration] +==== Configuration + +Set the following configuration fields: + +Host:: +The server host address where the PostgreSQL instance is hosted. +Examples: ++ +* `192.158.1.38` +* `demo.instance.demo-region.demo.service.com` + +Port:: +The port where the PostgreSQL instance is hosted. +Examples: ++ +* `5432` (default) + +Username:: +The username of the PostgreSQL account. + +Password:: +The password of the PostgreSQL account. + +Database:: +Name of the PostgreSQL database. +Examples: ++ +* `employee_database` +* `customer_database` + +Schema:: +The schema of the PostgreSQL database. + +Comma-separated List of Tables:: +A list of tables separated by commas. +The PostgreSQL connector will fetch data from all tables present in the configured database, if the value is `*` . +Default value is `*`. +Examples: ++ +* `table_1, table_2` +* `*` ++ +[WARNING] +==== +This field can be bypassed when using advanced sync rules. +==== + +Enable SSL:: +Toggle to enable SSL verification. +Disabled by default. + +SSL Certificate:: +Content of SSL certificate. +If SSL is disabled, the `ssl_ca` value will be ignored. ++ +.*Expand* to see an example certificate +[%collapsible] +==== +``` +-----BEGIN CERTIFICATE----- +MIID+jCCAuKgAwIBAgIGAJJMzlxLMA0GCSqGSIb3DQEBCwUAMHoxCzAJBgNVBAYT +AlVTMQwwCgYDVQQKEwNJQk0xFjAUBgNVBAsTDURlZmF1bHROb2RlMDExFjAUBgNV +BAsTDURlZmF1bHRDZWxsMDExGTAXBgNVBAsTEFJvb3QgQ2VydGlmaWNhdGUxEjAQ +BgNVBAMTCWxvY2FsaG9zdDAeFw0yMTEyMTQyMjA3MTZaFw0yMjEyMTQyMjA3MTZa +MF8xCzAJBgNVBAYTAlVTMQwwCgYDVQQKEwNJQk0xFjAUBgNVBAsTDURlZmF1bHRO +b2RlMDExFjAUBgNVBAsTDURlZmF1bHRDZWxsMDExEjAQBgNVBAMTCWxvY2FsaG9z +dDCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBAMv5HCsJZIpI5zCy+jXV +z6lmzNc9UcVSEEHn86h6zT6pxuY90TYeAhlZ9hZ+SCKn4OQ4GoDRZhLPTkYDt+wW +CV3NTIy9uCGUSJ6xjCKoxClJmgSQdg5m4HzwfY4ofoEZ5iZQ0Zmt62jGRWc0zuxj +hegnM+eO2reBJYu6Ypa9RPJdYJsmn1RNnC74IDY8Y95qn+WZj//UALCpYfX41hko +i7TWD9GKQO8SBmAxhjCDifOxVBokoxYrNdzESl0LXvnzEadeZTd9BfUtTaBHhx6t +njqqCPrbTY+3jAbZFd4RiERPnhLVKMytw5ot506BhPrUtpr2lusbN5svNXjuLeea +MMUCAwEAAaOBoDCBnTATBgNVHSMEDDAKgAhOatpLwvJFqjAdBgNVHSUEFjAUBggr +BgEFBQcDAQYIKwYBBQUHAwIwVAYDVR0RBE0wS4E+UHJvZmlsZVVVSUQ6QXBwU3J2 +MDEtQkFTRS05MDkzMzJjMC1iNmFiLTQ2OTMtYWI5NC01Mjc1ZDI1MmFmNDiCCWxv +Y2FsaG9zdDARBgNVHQ4ECgQITzqhA5sO8O4wDQYJKoZIhvcNAQELBQADggEBAKR0 +gY/BM69S6BDyWp5dxcpmZ9FS783FBbdUXjVtTkQno+oYURDrhCdsfTLYtqUlP4J4 +CHoskP+MwJjRIoKhPVQMv14Q4VC2J9coYXnePhFjE+6MaZbTjq9WaekGrpKkMaQA +iQt5b67jo7y63CZKIo9yBvs7sxODQzDn3wZwyux2vPegXSaTHR/rop/s/mPk3YTS +hQprs/IVtPoWU4/TsDN3gIlrAYGbcs29CAt5q9MfzkMmKsuDkTZD0ry42VjxjAmk +xw23l/k8RoD1wRWaDVbgpjwSzt+kl+vJE/ip2w3h69eEZ9wbo6scRO5lCO2JM4Pr +7RhLQyWn2u00L7/9Omw= +-----END CERTIFICATE----- +``` +==== + +[discrete#connectors-postgresql-documents-syncs] +==== Documents and syncs + +* Tables must be owned by a PostgreSQL user. +* Tables with no primary key defined are skipped. +* To fetch the last updated time in PostgreSQL, `track_commit_timestamp` must be set to `on`. +Otherwise, all data will be indexed in every sync. + +[NOTE] +==== +* Files bigger than 10 MB won't be extracted. +* Permissions are not synced. +**All documents** indexed to an Elastic deployment will be visible to **all users with access** to that Elastic Deployment. +==== + +[discrete#connectors-postgresql-sync-rules] +==== Sync rules + +<> are identical for all connectors and are available by default. + +[discrete#connectors-postgresql-sync-rules-advanced] +===== Advanced sync rules + +[NOTE] +==== +A <> is required for advanced sync rules to take effect. +==== + +Advanced sync rules are defined through a source-specific DSL JSON snippet. + +[discrete#connectors-postgresql-sync-rules-advanced-example-data] +====== Example data + +Here is some example data that will be used in the following examples. + +[discrete#connectors-postgresql-sync-rules-advanced-example-data-1] +======= `employee` table + +[cols="3*", options="header"] +|=== +| emp_id | name | age +| 3 | John | 28 +| 10 | Jane | 35 +| 14 | Alex | 22 +|=== + +[discrete#connectors-postgresql-sync-rules-advanced-example-2] +======= `customer` table + +[cols="3*", options="header"] +|=== +| c_id | name | age +| 2 | Elm | 24 +| 6 | Pine | 30 +| 9 | Oak | 34 +|=== + +[discrete#connectors-postgresql-sync-rules-advanced-examples] +====== Advanced sync rules examples + +[discrete#connectors-postgresql-sync-rules-advanced-examples-1] +======= Multiple table queries + +[source,js] +---- +[ + { + "tables": [ + "employee" + ], + "query": "SELECT * FROM employee" + }, + { + "tables": [ + "customer" + ], + "query": "SELECT * FROM customer" + } +] +---- +// NOTCONSOLE + +[discrete#connectors-postgresql-sync-rules-advanced-examples-1-id-columns] +======= Multiple table queries with `id_columns` + +In 8.15.0, we added a new optional `id_columns` field in our advanced sync rules for the PostgreSQL connector. +Use the `id_columns` field to ingest tables which do not have a primary key. Include the names of unique fields so that the connector can use them to generate unique IDs for documents. + +[source,js] +---- +[ + { + "tables": [ + "employee" + ], + "query": "SELECT * FROM employee", + "id_columns": ["emp_id"] + }, + { + "tables": [ + "customer" + ], + "query": "SELECT * FROM customer", + "id_columns": ["c_id"] + } +] +---- +// NOTCONSOLE + +This example uses the `id_columns` field to specify the unique fields `emp_id` and `c_id` for the `employee` and `customer` tables, respectively. + +[discrete#connectors-postgresql-sync-rules-advanced-examples-2] +======= Filtering data with `WHERE` clause + +[source,js] +---- +[ + { + "tables": ["employee"], + "query": "SELECT * FROM employee WHERE emp_id > 5" + } +] +---- +// NOTCONSOLE + +[discrete#connectors-postgresql-sync-rules-advanced-examples-3] +======= `JOIN` operations + +[source,js] +---- +[ + { + "tables": ["employee", "customer"], + "query": "SELECT * FROM employee INNER JOIN customer ON employee.emp_id = customer.c_id" + } +] +---- +// NOTCONSOLE + +[WARNING] +==== +When using advanced rules, a query can bypass the configuration field `tables`. +This will happen if the query specifies a table that doesn't appear in the configuration. +This can also happen if the configuration specifies `*` to fetch all tables while the advanced sync rule requests for only a subset of tables. +==== + +[discrete#connectors-postgresql-known-issues] +==== Known issues + +There are no known issues for this connector. +Refer to <> for a list of known issues for all connectors. + +[discrete#connectors-postgresql-troubleshooting] +==== Troubleshooting + +See <>. + +[discrete#connectors-postgresql-security] +==== Security + +See <>. + +// Closing the collapsible section +=============== + +[discrete#es-connectors-postgresql-connector-client-reference] +=== *Self-managed connector* + +.View *self-managed connector* reference + +[%collapsible] +=============== + +[discrete#es-connectors-postgresql-client-availability-prerequisites] +==== Availability and prerequisites + +This connector is available as a self-managed *self-managed connector*. +To use this connector, satisfy all <>. + +[discrete#es-connectors-postgresql-create-connector-client] +==== Create a {service-name} connector +include::_connectors-create-client.asciidoc[] + +[discrete#es-connectors-postgresql-client-usage] +==== Usage + +To use this connector as a *self-managed connector*, see <>. +[TIP] +==== +Users must set `track_commit_timestamp` to `on`. +To do this, run `ALTER SYSTEM SET track_commit_timestamp = on;` in PostgreSQL server. +==== + +For additional operations, see. + +[NOTE] +==== +For an end-to-end example of the self-managed connector workflow, see <>. +==== + +[discrete#es-connectors-postgresql-client-compatibility] +==== Compatibility + +PostgreSQL versions 11 to 15 are compatible with Elastic connector frameworks. + +[discrete#es-connectors-postgresql-client-configuration] +==== Configuration + +[TIP] +==== +When using the <>, initially these fields will use the default configuration set in the https://github.com/elastic/connectors-python/blob/{branch}/connectors/sources/postgresql.py[connector source code^]. + +These configurable fields will be rendered with their respective *labels* in the Kibana UI. +Once connected, users will be able to update these values in Kibana. +==== + +Set the following configuration fields: + +`host`:: +The server host address where the PostgreSQL instance is hosted. +Examples: ++ +* `192.158.1.38` +* `demo.instance.demo-region.demo.service.com` + +`port`:: +The port where the PostgreSQL instance is hosted. +Examples: ++ +* `5432` +* `9090` + +`username`:: +The username of the PostgreSQL account. + +`password`:: +The password of the PostgreSQL account. + +`database`:: +Name of the PostgreSQL database. +Examples: ++ +* `employee_database` +* `customer_database` + +`schema`:: +The schema of the PostgreSQL database. + +`tables`:: +A list of tables separated by commas. +The PostgreSQL connector will fetch data from all tables present in the configured database, if the value is `*` . +Default value is `*`. +Examples: ++ +* `table_1, table_2` +* `*` ++ +[WARNING] +==== +This field can be bypassed when using advanced sync rules. +==== + +`ssl_enabled`:: +Whether SSL verification will be enabled. +Default value is `True`. + +`ssl_ca`:: +Content of SSL certificate (if SSL is enabled). +If SSL is disabled, the `ssl_ca` value will be ignored. ++ +.*Expand* to see an example certificate +[%collapsible] +==== +``` +-----BEGIN CERTIFICATE----- +MIID+jCCAuKgAwIBAgIGAJJMzlxLMA0GCSqGSIb3DQEBCwUAMHoxCzAJBgNVBAYT +AlVTMQwwCgYDVQQKEwNJQk0xFjAUBgNVBAsTDURlZmF1bHROb2RlMDExFjAUBgNV +BAsTDURlZmF1bHRDZWxsMDExGTAXBgNVBAsTEFJvb3QgQ2VydGlmaWNhdGUxEjAQ +BgNVBAMTCWxvY2FsaG9zdDAeFw0yMTEyMTQyMjA3MTZaFw0yMjEyMTQyMjA3MTZa +MF8xCzAJBgNVBAYTAlVTMQwwCgYDVQQKEwNJQk0xFjAUBgNVBAsTDURlZmF1bHRO +b2RlMDExFjAUBgNVBAsTDURlZmF1bHRDZWxsMDExEjAQBgNVBAMTCWxvY2FsaG9z +dDCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBAMv5HCsJZIpI5zCy+jXV +z6lmzNc9UcVSEEHn86h6zT6pxuY90TYeAhlZ9hZ+SCKn4OQ4GoDRZhLPTkYDt+wW +CV3NTIy9uCGUSJ6xjCKoxClJmgSQdg5m4HzwfY4ofoEZ5iZQ0Zmt62jGRWc0zuxj +hegnM+eO2reBJYu6Ypa9RPJdYJsmn1RNnC74IDY8Y95qn+WZj//UALCpYfX41hko +i7TWD9GKQO8SBmAxhjCDifOxVBokoxYrNdzESl0LXvnzEadeZTd9BfUtTaBHhx6t +njqqCPrbTY+3jAbZFd4RiERPnhLVKMytw5ot506BhPrUtpr2lusbN5svNXjuLeea +MMUCAwEAAaOBoDCBnTATBgNVHSMEDDAKgAhOatpLwvJFqjAdBgNVHSUEFjAUBggr +BgEFBQcDAQYIKwYBBQUHAwIwVAYDVR0RBE0wS4E+UHJvZmlsZVVVSUQ6QXBwU3J2 +MDEtQkFTRS05MDkzMzJjMC1iNmFiLTQ2OTMtYWI5NC01Mjc1ZDI1MmFmNDiCCWxv +Y2FsaG9zdDARBgNVHQ4ECgQITzqhA5sO8O4wDQYJKoZIhvcNAQELBQADggEBAKR0 +gY/BM69S6BDyWp5dxcpmZ9FS783FBbdUXjVtTkQno+oYURDrhCdsfTLYtqUlP4J4 +CHoskP+MwJjRIoKhPVQMv14Q4VC2J9coYXnePhFjE+6MaZbTjq9WaekGrpKkMaQA +iQt5b67jo7y63CZKIo9yBvs7sxODQzDn3wZwyux2vPegXSaTHR/rop/s/mPk3YTS +hQprs/IVtPoWU4/TsDN3gIlrAYGbcs29CAt5q9MfzkMmKsuDkTZD0ry42VjxjAmk +xw23l/k8RoD1wRWaDVbgpjwSzt+kl+vJE/ip2w3h69eEZ9wbo6scRO5lCO2JM4Pr +7RhLQyWn2u00L7/9Omw= +-----END CERTIFICATE----- +``` +==== + +[discrete#es-connectors-postgresql-client-docker] +==== Deployment using Docker + +include::_connectors-docker-instructions.asciidoc[] + +[discrete#es-connectors-postgresql-client-documents-syncs] +==== Documents and syncs + +* Tables must be owned by a PostgreSQL user. +* Tables with no primary key defined are skipped. +* To fetch the last updated time in PostgreSQL, `track_commit_timestamp` must be set to `on`. +Otherwise, all data will be indexed in every sync. + +[NOTE] +==== +* Files bigger than 10 MB won't be extracted. +* Permissions are not synced. +**All documents** indexed to an Elastic deployment will be visible to **all users with access** to that Elastic Deployment. +==== + +[discrete#es-connectors-postgresql-client-sync-rules] +==== Sync rules + +//sync-rules-basic,Basic sync rules are identical for all connectors and are available by default. + +[discrete#es-connectors-postgresql-client-sync-rules-advanced] +===== Advanced sync rules + +[NOTE] +==== +A //connectors-sync-types-full, full sync is required for advanced sync rules to take effect. +==== + +Advanced sync rules are defined through a source-specific DSL JSON snippet. + +[discrete#es-connectors-postgresql-client-sync-rules-advanced-example-data] +====== Example data + +Here is some example data that will be used in the following examples. + +[discrete#es-connectors-postgresql-client-sync-rules-advanced-example-data-1] +======= `employee` table + +[cols="3*", options="header"] +|=== +| emp_id | name | age +| 3 | John | 28 +| 10 | Jane | 35 +| 14 | Alex | 22 +|=== + +[discrete#es-connectors-postgresql-client-sync-rules-advanced-example-2] +======= `customer` table + +[cols="3*", options="header"] +|=== +| c_id | name | age +| 2 | Elm | 24 +| 6 | Pine | 30 +| 9 | Oak | 34 +|=== + +[discrete#es-connectors-postgresql-client-sync-rules-advanced-examples] +====== Advanced sync rules examples + +[discrete#es-connectors-postgresql-client-sync-rules-advanced-examples-1] +======== Multiple table queries + +[source,js] +---- +[ + { + "tables": [ + "employee" + ], + "query": "SELECT * FROM employee" + }, + { + "tables": [ + "customer" + ], + "query": "SELECT * FROM customer" + } +] +---- +// NOTCONSOLE + +[discrete#es-connectors-postgresql-client-sync-rules-advanced-examples-1-id-columns] +======== Multiple table queries with `id_columns` + +In 8.15.0, we added a new optional `id_columns` field in our advanced sync rules for the PostgreSQL connector. +Use the `id_columns` field to ingest tables which do not have a primary key. Include the names of unique fields so that the connector can use them to generate unique IDs for documents. + +[source,js] +---- +[ + { + "tables": [ + "employee" + ], + "query": "SELECT * FROM employee", + "id_columns": ["emp_id"] + }, + { + "tables": [ + "customer" + ], + "query": "SELECT * FROM customer", + "id_columns": ["c_id"] + } +] +---- +// NOTCONSOLE + +This example uses the `id_columns` field to specify the unique fields `emp_id` and `c_id` for the `employee` and `customer` tables, respectively. + +[discrete#es-connectors-postgresql-client-sync-rules-advanced-examples-2] +======== Filtering data with `WHERE` clause + +[source,js] +---- +[ + { + "tables": ["employee"], + "query": "SELECT * FROM employee WHERE emp_id > 5" + } +] +---- +// NOTCONSOLE + +[discrete#es-connectors-postgresql-client-sync-rules-advanced-examples-3] +======== `JOIN` operations + +[source,js] +---- +[ + { + "tables": ["employee", "customer"], + "query": "SELECT * FROM employee INNER JOIN customer ON employee.emp_id = customer.c_id" + } +] +---- +// NOTCONSOLE + +[WARNING] +==== +When using advanced rules, a query can bypass the configuration field `tables`. +This will happen if the query specifies a table that doesn't appear in the configuration. +This can also happen if the configuration specifies `*` to fetch all tables while the advanced sync rule requests for only a subset of tables. +==== + +[discrete#es-connectors-postgresql-client-client-operations-testing] +==== End-to-end testing + +The connector framework enables operators to run functional tests against a real data source. +Refer to <> for more details. + +To perform E2E testing for the PostgreSQL connector, run the following command: + +[source,shell] +---- +$ make ftest NAME=postgresql +---- + +For faster tests, add the `DATA_SIZE=small` flag: + +[source,shell] +---- +make ftest NAME=postgresql DATA_SIZE=small +---- + +[discrete#es-connectors-postgresql-client-known-issues] +==== Known issues + +There are no known issues for this connector. +Refer to <> for a list of known issues for all connectors. + +[discrete#es-connectors-postgresql-client-troubleshooting] +==== Troubleshooting + +See <>. + +[discrete#es-connectors-postgresql-client-security] +==== Security + +See <>. + +// Closing the collapsible section +=============== \ No newline at end of file diff --git a/docs/reference/connector/docs/connectors-redis.asciidoc b/docs/reference/connector/docs/connectors-redis.asciidoc new file mode 100644 index 0000000000000..7aad7b0b41497 --- /dev/null +++ b/docs/reference/connector/docs/connectors-redis.asciidoc @@ -0,0 +1,293 @@ +[#es-connectors-redis] +=== Redis connector reference +++++ +Redis +++++ + +// Attributes (AKA variables) used in this file +:service-name: Redis +:service-name-stub: redis + +The Redis connector is built with the Elastic connectors Python framework and is available as a self-managed <>. +View the {connectors-python}/connectors/sources/{service-name-stub}.py[*source code* for this connector^] (branch _{connectors-branch}_, compatible with Elastic _{minor-version}_). + +[discrete#es-connectors-redis-connector-availability-and-prerequisites] +==== Availability and prerequisites + +This connector was introduced in Elastic *8.13.0*, available as a *self-managed* self-managed connector. + +To use this connector, satisfy all <>. +Importantly, you must deploy the connectors service on your own infrastructure. +You have two deployment options: + +* <>. Use this option if you're comfortable working with Python and want to iterate quickly locally. +* <>. Use this option if you want to deploy the connectors to a server, or use a container orchestration platform. + +[NOTE] +==== +This connector is in *technical preview* and is subject to change. The design and code is less mature than official GA features and is being provided as-is with no warranties. Technical preview features are not subject to the support SLA of official GA features. +==== + +[discrete#es-connectors-redis-connector-usage] +==== Usage + +To set up this connector in the UI, select the *Redis* tile when creating a new connector under *Search -> Connectors*. + +For additional operations, see <>. + +[discrete#es-connectors-redis-connector-docker] +==== Deploy with Docker + +include::_connectors-docker-instructions.asciidoc[] + +[discrete#es-connectors-redis-connector-configuration] +==== Configuration + +`host` (required):: +The IP of your Redis server/cloud. Example: + +* `127.0.0.1` +* `redis-12345.us-east-1.ec2.cloud.redislabs.com` + +`port` (required):: +Port where the Redis server/cloud instance is hosted. Example: + +* `6379` + +`username` (optional):: +Username for your Redis server/cloud. Example: + +* `default` + +`password` (optional):: +Password for your Redis server/cloud instance. Example: + +* `changeme` + +`database` (required):: +List of database index for your Redis server/cloud. * will fetch data from all databases. Example: + +* `0,1,2` +* `*` ++ +[NOTE] +==== +This field is ignored when using advanced sync rules. +==== + +`ssl_enabled`:: +Toggle to use SSL/TLS. Disabled by default. + +`mutual_tls_enabled`:: +Toggle to use secure mutual SSL/TLS. Ensure that your Redis deployment supports mutual SSL/TLS connections. Disabled by default. Depends on `ssl_enabled`. + +`tls_certfile`:: +Specifies the certificate from the Certificate Authority. The value of the certificate is used to validate the certificate presented by the Redis instance. Depends on `mutual_tls_enabled`. + +`tls_keyfile`:: +Specifies the client private key. The value of the key is used to validate the connection in the Redis instance. +Depends on `mutual_tls_enabled`. + +[discrete#es-connectors-redis-connector-documents-and-syncs] +==== Documents and syncs + +The connector syncs the following objects and entities: + +* KEYS and VALUES of every database index + + +[NOTE] +==== +* Permissions are not synced. *All documents* indexed to an Elastic deployment will be visible to *all users with access* to the relevant Elasticsearch index. +==== + +[discrete#es-connectors-redis-connector-sync-rules] +==== Sync rules +<> are identical for all connectors and are available by default. + + +[discrete#es-connectors-redis-connector-advanced-sync-rules] +==== Advanced Sync Rules + +<> are defined through a source-specific DSL JSON snippet. + +Use advanced sync rules to filter data at the Redis source, without needing to index all data into Elasticsearch. + +They take the following parameters: + +1. `database`: Specify the Redis database index as an integer value. +2. `key_pattern`: 2. `key_pattern`: Pattern for finding keys in Redis. +3. `type`: Key type for the Redis. ++ +Supported values: + +* `HASH` +* `LIST` +* `SET` +* `STREAM` +* `STRING` +* `ZSET` + +[NOTE] +==== +Provide at least one of the following: `key_pattern` or `type`, or both. +==== + +[discrete#es-connectors-redis-connector-advanced-sync-rules-examples] +===== Advanced sync rules examples + +[discrete#es-connectors-redis-connector-advanced-sync-rules-example-1] +====== Example 1 + +*Fetch database records where keys start with `alpha`*: + +[source,js] +---- +[ + { + "database": 0, + "key_pattern": "alpha*" + } +] +---- +// NOTCONSOLE + +[discrete#es-connectors-redis-connector-advanced-sync-rules-example-2] +====== Example 2 + +*Fetch database records with exact match by specifying the full key name:* + +[source,js] +---- +[ + { + "database": 0, + "key_pattern": "alpha" + } +] +---- +// NOTCONSOLE + +[discrete#es-connectors-redis-connector-advanced-sync-rules-example-3] +====== Example 3 + +*Fetch database records where keys start with `test1`, `test2` or `test3`:* + +[source,js] +---- +[ + { + "database": 0, + "key_pattern": "test[123]" + } + +---- +// NOTCONSOLE + +[discrete#es-connectors-redis-connector-advanced-sync-rules-example-4] +====== Example 4 + +*Exclude database records where keys start with `test1`, `test2` or `test3`:* + +[source,js] +---- +[ + { + "database": 0, + "key_pattern": "test[^123]" + } +] +---- +// NOTCONSOLE + +[discrete#es-connectors-redis-connector-advanced-sync-rules-example-5] +====== Example 5 + +*Fetch all database records:* + +[source,js] +---- +[ + { + "database": 0, + "key_pattern": "*" + } +] +---- +// NOTCONSOLE + +[discrete#es-connectors-redis-connector-advanced-sync-rules-example-6] +====== Example 6 + +*Fetch all database records where type is `SET`:* + +[source,js] +---- +[ + { + "database": 0, + "key_pattern": "*", + "type": "SET" + } +] +---- +// NOTCONSOLE + +[discrete#es-connectors-redis-connector-advanced-sync-rules-example-7] +====== Example 7 + +*Fetch database records where type is `SET`*: + +[source,js] +---- +[ + { + "database": 0, + "type": "SET" + } +] +---- +// NOTCONSOLE + +[discrete#es-connectors-redis-connector-connector-client-operations] +==== Connector Client operations + +[discrete#es-connectors-redis-connector-end-to-end-testing] +===== End-to-end Testing + +The connector framework enables operators to run functional tests against a real data source, using Docker Compose. +You don't need a running Elasticsearch instance or Redis source to run this test. + +Refer to <> for more details. + +To perform E2E testing for the Redis connector, run the following command: + +```shell +$ make ftest NAME=redis +``` +For faster tests, add the `DATA_SIZE=small` flag: + +[source,shell] +---- +make ftest NAME=redis DATA_SIZE=small +---- + +By default, `DATA_SIZE=MEDIUM`. + +[discrete#es-connectors-redis-connector-known-issues] +==== Known issues + +* The last modified time is unavailable when retrieving keys/values from the Redis database. +As a result, *all objects* are indexed each time an advanced sync rule query is executed. + +Refer to <> for a list of known issues for all connectors. + +[discrete#es-connectors-redis-connector-troubleshooting] +==== Troubleshooting + +See <>. + +[discrete#es-connectors-redis-connector-security] +==== Security + +See <>. \ No newline at end of file diff --git a/docs/reference/connector/docs/connectors-refs.asciidoc b/docs/reference/connector/docs/connectors-refs.asciidoc new file mode 100644 index 0000000000000..51580410d5bc5 --- /dev/null +++ b/docs/reference/connector/docs/connectors-refs.asciidoc @@ -0,0 +1,64 @@ +[#es-connectors-refs] +== Connectors references + +The following pages contain the reference documentation for each individual connector: + +include::_connectors-list-clients.asciidoc[] + +include::connectors-azure-blob.asciidoc[] + +include::connectors-box.asciidoc[] + +include::connectors-confluence.asciidoc[] + +include::connectors-dropbox.asciidoc[] + +include::connectors-github.asciidoc[] + +include::connectors-gmail.asciidoc[] + +include::connectors-google-cloud.asciidoc[] + +include::connectors-google-drive.asciidoc[] + +include::connectors-graphql.asciidoc[] + +include::connectors-jira.asciidoc[] + +include::connectors-ms-sql.asciidoc[] + +include::connectors-mongodb.asciidoc[] + +include::connectors-mysql.asciidoc[] + +include::connectors-network-drive.asciidoc[] + +include::connectors-notion.asciidoc[] + +include::connectors-onedrive.asciidoc[] + +include::connectors-opentext-documentum.asciidoc[] + +include::connectors-oracle.asciidoc[] + +include::connectors-outlook.asciidoc[] + +include::connectors-postgresql.asciidoc[] + +include::connectors-redis.asciidoc[] + +include::connectors-s3.asciidoc[] + +include::connectors-salesforce.asciidoc[] + +include::connectors-servicenow.asciidoc[] + +include::connectors-sharepoint-online.asciidoc[] + +include::connectors-sharepoint.asciidoc[] + +include::connectors-slack.asciidoc[] + +include::connectors-teams.asciidoc[] + +include::connectors-zoom.asciidoc[] \ No newline at end of file diff --git a/docs/reference/connector/docs/connectors-release-notes.asciidoc b/docs/reference/connector/docs/connectors-release-notes.asciidoc new file mode 100644 index 0000000000000..723671b049bf2 --- /dev/null +++ b/docs/reference/connector/docs/connectors-release-notes.asciidoc @@ -0,0 +1,10 @@ +[#es-connectors-release-notes] +== Connector release notes +++++ +Release notes +++++ + +[INFO] +==== +Prior to version 8.16.0, the connector release notes were published as part of the https://www.elastic.co/guide/en/enterprise-search/current/changelog.html[Enterprise Search documentation]. +==== diff --git a/docs/reference/connector/docs/connectors-run-from-docker.asciidoc b/docs/reference/connector/docs/connectors-run-from-docker.asciidoc new file mode 100644 index 0000000000000..1688b945f05bc --- /dev/null +++ b/docs/reference/connector/docs/connectors-run-from-docker.asciidoc @@ -0,0 +1,95 @@ +[#es-connectors-run-from-docker] +=== Running from a Docker container + +[TIP] +==== +Use our <> to quickly get started with a full Elastic Stack deployment using Connectors. +==== + +Instead of running the Connectors Service from source, you can use the official Docker image to run the service in a container. + +As a prerequisite, you need to have an Elasticsearch and Kibana instance running. +From inside your Kibana UI, You will need to <> in the same manner as if you are running the service from source. + +When you are ready to run Connectors: + +*Step 1: Download sample configuration file* + +Download the sample configuration file. +You can either download it manually or run the following command: + +[source,sh] +---- +curl https://raw.githubusercontent.com/elastic/connectors/main/config.yml.example --output /connectors-config/config.yml +---- +// NOTCONSOLE + +Don't forget to change the `--output` argument value to the path where you want to save the `config.yml` file on your local system. +But keep note of where you wrote this file, as it is required in the `docker run` step below. + +*Step 2: Update the configuration file for your self-managed connector* + +Update the configuration file with the following settings to match your environment: + +* `elasticsearch.host` +* `elasticsearch.api_key` +* `connectors` + +If you're running the connector service against a Dockerized version of Elasticsearch and Kibana, your config file will look like this: + +[source,yaml,subs="attributes"] +---- +# When connecting to your cloud deployment you should edit the host value +elasticsearch.host: http://host.docker.internal:9200 +elasticsearch.api_key: + +connectors: + - + connector_id: + service_type: {service-name} # sharepoint_online (example) + api_key: # Optional. If not provided, the connector will use the elasticsearch.api_key instead + +---- + +*Step 3: Run the Docker image* + +Run the Docker image with the Connector Service using the following command: + +[source,sh,subs="attributes"] +---- +docker run \ +-v "/connectors-config:/config" \ # NOTE: you must change this path to match where the config.yml is located +--rm \ +--tty -i \ +--network host \ +docker.elastic.co/enterprise-search/elastic-connectors:{version}.0 \ +/app/bin/elastic-ingest \ +-c /config/config.yml +---- + +[TIP] +==== +For unreleased versions, append the `-SNAPSHOT` suffix to the version number. +For example, `docker.elastic.co/enterprise-search/elastic-connectors:8.14.0.0-SNAPSHOT`. +==== + +Find all available Docker images in the https://www.docker.elastic.co/r/enterprise-search/elastic-connectors[official registry]. + +[discrete#es-build-connector-finalizes-kibana] +==== Enter data source details in Kibana + +Once the connector service is running, it's time to head back to the Kibana UI to finalize the connector configuration. +In this step, you need to add the specific connection details about your data source instance, like URL, authorization credentials, etc. +As mentioned above, these details will vary based on the third-party data source you’re connecting to. + +For example, the PostgreSQL connector requires the following details: + +* *Host* +* *Port* +* *Username* +* *Password* +* *Database* +* *Comma-separated list of tables* + +You're now ready to run a sync. +Select the *Full sync* button in the Kibana UI to start ingesting documents into Elasticsearch. diff --git a/docs/reference/connector/docs/connectors-run-from-source.asciidoc b/docs/reference/connector/docs/connectors-run-from-source.asciidoc new file mode 100644 index 0000000000000..a5d1dc31074f2 --- /dev/null +++ b/docs/reference/connector/docs/connectors-run-from-source.asciidoc @@ -0,0 +1,103 @@ +[#es-connectors-run-from-source] +=== Running from the source code + +The basic way to run connectors is to clone the repository and run the code locally. +This is a good option if you are comfortable with Python and want to iterate quickly. + +[discrete#es-connectors-run-from-source-setup-kibana] +==== Initial setup in Kibana + +Follow the *Connector* workflow in the Kibana UI to select the *Connector* ingestion method. + +Next, complete these steps: + +1. Choose which third-party service you’d like to use by selecting a *data source*. +2. Create and name a new *Elasticsearch index*. +3. Generate a new *API key* and save it somewhere safe. +4. Name your connector and provide an optional description +5. *Convert* managed connector to a self-managed connector (_Only applicable if connector is also available natively_). This action is irreversible. +6. Copy the configuration block from the example shown on the screen. You’ll use this in a later step: ++ +[source,yaml] +---- +# ... +connectors: + - connector_id: + api_key: # Scoped API key for this connector (optional). If not specified, the top-level `elasticsearch.api_key` value is used. + service_type: gmail # example +---- + +[discrete#es-connectors-run-from-source-source-clone] +====== Clone the repository and edit `config.yml` + +Once you've created an index, and entered the access details for your data source, you're ready to deploy the connector service. + +First, you need to clone the `elastic/connectors` repository. + +Follow these steps: + +* Clone or fork the `connectors` repository locally with the following command: `git clone https://github.com/elastic/connectors`. +* Run `make config` to generate your initial `config.yml` file +* Open the `config.yml` configuration file in your editor of choice. +* Replace the values for `host` (your Elasticsearch endpoint), `api_key`, `connector_id`, and `service_type`. ++ +.*Expand* to see an example `config.yml` file +[%collapsible] +==== +Replace the values for `api_key`, `connector_id`, and `service_type` with the values you copied earlier. +[source,yaml] +---- +elasticsearch: + api_key: # Used to write data to .elastic-connectors and .elastic-connectors-sync-jobs + # Any connectors without a specific `api_key` value will default to using this key +connectors: + - connector_id: 1234 + api_key: # Used to write data to the `search-*` index associated with connector 1234 + # You may have multiple connectors in your config file! + - connector_id: 5678 + api_key: # Used to write data to the `search-*` index associated with connector 5678 + - connector_id: abcd # No explicit api key specified, so this connector will use +---- + +[discrete#es-connectors-run-from-source-api-keys] +[NOTE] +===== +**API keys for connectors** + +You can configure multiple connectors in your `config.yml` file. + +The Kibana UI enables you to create API keys that are scoped to a specific index/connector. +If you don't create an API key for a specific connector, the top-level `elasticsearch.api_key` or `elasticsearch.username:elasticsearch.password` value is used. + +If these top-level Elasticsearch credentials are not sufficiently privileged to write to individual connector indices, you'll need to create these additional, scoped API keys. + +Use the example above as a guide. +===== +==== + +[discrete#es-connectors-run-from-source-run] +====== Run the connector service + +[NOTE] +==== +You need Python version `3.10` or `3.11` to run the connectors service from source. +==== + +Once you've configured the connector code, you can run the connector service. + +In your terminal or IDE: + +. `cd` into the root of your `connectors` clone/fork. +. Run the following commands to compile and run the connector service: ++ +[source,shell] +---- +make install +make run +---- + +The connector service should now be running. +The UI will let you know that the connector has successfully connected to your Elasticsearch instance. + +As a reminder, here we're working locally. +In a production setup, you'll deploy the connector service to your own infrastructure. diff --git a/docs/reference/connector/docs/connectors-s3.asciidoc b/docs/reference/connector/docs/connectors-s3.asciidoc new file mode 100644 index 0000000000000..b4d08d3884631 --- /dev/null +++ b/docs/reference/connector/docs/connectors-s3.asciidoc @@ -0,0 +1,437 @@ +[#es-connectors-s3] +=== Elastic S3 connector reference +++++ +S3 +++++ +// Attributes used in this file: +:service-name: Amazon S3 +:service-name-stub: s3 + +The _Elastic S3 connector_ is a <> for https://aws.amazon.com/s3/[Amazon S3^] data sources. + + +// //////// //// //// //// //// //// //// //////// +// //////// NATIVE CONNECTOR REFERENCE (MANAGED SERVICE) /////// +// //////// //// //// //// //// //// //// //////// + +[discrete#es-connectors-s3-native-connector-reference] +==== *Elastic managed connector reference* + +.View *Elastic managed connector* reference +[%collapsible] +=============== + +[discrete#es-connectors-s3-prerequisites] +===== Availability and prerequisites + +This connector is available natively in Elastic Cloud as of version *8.12.0*. +To use this connector, satisfy all <>. + +[discrete#es-connectors-s3-create-native-connector] +===== Create a {service-name} connector +include::_connectors-create-native.asciidoc[] + +[discrete#es-connectors-s3-usage] +===== Usage + +To use this managed connector, see <>. + +For additional operations, see <>. + +S3 users will also need to <> + +[discrete#es-connectors-s3-usage-create-iam] +====== Create an IAM identity + +Users need to create an IAM identity to use this connector as a *self-managed connector*. +Refer to https://docs.aws.amazon.com/IAM/latest/UserGuide/getting-set-up.html[the AWS documentation^]. + +The https://docs.aws.amazon.com/IAM/latest/UserGuide/access_policies.html[policy^] associated with the IAM identity must have the following *AWS permissions*: + +* `ListAllMyBuckets` +* `ListBucket` +* `GetBucketLocation` +* `GetObject` + +[discrete#es-connectors-s3-compatibility] +===== Compatibility + +Currently the connector does not support S3-compatible vendors. + +[discrete#es-connectors-s3-configuration] +===== Configuration + +The following configuration fields are required to *set up* the connector: + +AWS Buckets:: +List of S3 bucket names. +`*` will fetch data from all buckets. +Examples: ++ +* `testbucket, prodbucket` +* `testbucket` +* `*` + +[NOTE] +==== +This field is ignored when using advanced sync rules. +==== + +AWS Access Key ID:: +Access Key ID for the AWS identity that will be used for bucket access. + +AWS Secret Key:: +Secret Access Key for the AWS identity that will be used for bucket access. + +[discrete#es-connectors-s3-documents-syncs] +===== Documents and syncs + +[NOTE] +==== +* Content from files bigger than 10 MB won't be extracted. (Self-managed connectors can use the <> to handle larger binary files.) +* Permissions are not synced. +**All documents** indexed to an Elastic deployment will be visible to **all users with access** to that Elastic Deployment. +==== + +[discrete#es-connectors-s3-sync-rules] +===== Sync rules + +<> are identical for all connectors and are available by default. + +[discrete#es-connectors-s3-sync-rules-advanced] +====== Advanced sync rules + +[NOTE] +==== +A <> is required for advanced sync rules to take effect. +==== + +Advanced sync rules are defined through a source-specific DSL JSON snippet. + +Use advanced sync rules to filter data to be fetched from Amazon S3 buckets. +They take the following parameters: + +1. `bucket`: S3 bucket the rule applies to. +2. `extension` (optional): Lists which file types to sync. Defaults to syncing all types. +3. `prefix` (optional): String of prefix characters. +The connector will fetch file and folder data that matches the string. +Defaults to `""` (syncs all bucket objects). + +[discrete#es-connectors-s3-sync-rules-advanced-examples] +======= Advanced sync rules examples + +*Fetching files and folders recursively by prefix* + +*Example*: Fetch files/folders in `folder1/docs`. + +[source,js] +---- +[ + { + "bucket": "bucket1", + "prefix": "folder1/docs" + } + +] +---- +// NOTCONSOLE + +*Example*: Fetch files/folder starting with `folder1`. + +[source,js] +---- +[ + { + "bucket": "bucket2", + "prefix": "folder1" + } +] +---- +// NOTCONSOLE + +*Fetching files and folders by specifying extensions* + +*Example*: Fetch all objects which start with `abc` and then filter using file extensions. + +[source,js] +---- +[ + { + "bucket": "bucket2", + "prefix": "abc", + "extension": [".txt", ".png"] + } +] +---- +// NOTCONSOLE + +[discrete#es-connectors-s3-content-extraction] +===== Content extraction + +See <>. + +[discrete#es-connectors-s3-known-issues] +===== Known issues + +There are no known issues for this connector. + +See <> for any issues affecting all connectors. + +[discrete#es-connectors-s3-troubleshooting] +===== Troubleshooting + +See <>. + +[discrete#es-connectors-s3-security] +===== Security + +See <>. + +[discrete#es-connectors-s3-source] +===== Framework and source + +This connector is built with the {connectors-python}[Elastic connector framework^]. + +View the {connectors-python}/connectors/sources/s3.py[source code for this connector^] (branch _{connectors-branch}_, compatible with Elastic _{minor-version}_). + + +// Closing the collapsible section +=============== + + +// //////// //// //// //// //// //// //// //////// +// //////// CONNECTOR CLIENT REFERENCE (SELF-MANAGED) /////// +// //////// //// //// //// //// //// //// //////// + +[discrete#es-connectors-s3-connector-client-reference] +==== *Self-managed connector reference* + +.View *self-managed connector* reference +[%collapsible] +=============== + +[discrete#es-connectors-s3-client-prerequisites] +===== Availability and prerequisites + +This connector is available as a self-managed *self-managed connector*. +This self-managed connector is compatible with Elastic versions *8.6.0+*. +To use this connector, satisfy all <>. + +[discrete#es-connectors-s3-create-connector-client] +===== Create a {service-name} connector +include::_connectors-create-client.asciidoc[] + +[discrete#es-connectors-s3-client-usage] +===== Usage + +To use this connector as a *self-managed connector*, see <>. + +For additional operations, see <>. + +S3 users will also need to <> + +[discrete#es-connectors-s3-client-usage-create-iam] +====== Create an IAM identity + +Users need to create an IAM identity to use this connector as a *self-managed connector*. +Refer to https://docs.aws.amazon.com/IAM/latest/UserGuide/getting-set-up.html[the AWS documentation^]. + +The https://docs.aws.amazon.com/IAM/latest/UserGuide/access_policies.html[policy^] associated with the IAM identity must have the following *AWS permissions*: + +* `ListAllMyBuckets` +* `ListBucket` +* `GetBucketLocation` +* `GetObject` + +[discrete#es-connectors-s3-client-compatibility] +===== Compatibility + +Currently the connector does not support S3-compatible vendors. + +[discrete#es-connectors-s3-client-configuration] +===== Configuration + +[TIP] +==== +When using the <> workflow, these fields will use the default configuration set in the https://github.com/elastic/connectors/blob/a5976d20cd8277ae46511f7176662afc889e56ec/connectors/sources/s3.py#L231-L258[connector source code^]. +These configurable fields will be rendered with their respective *labels* in the Kibana UI. +Once connected, you'll be able to update these values in Kibana. +==== + +The following configuration fields are required to *set up* the connector: + +`buckets`:: +List of S3 bucket names. +`*` will fetch data from all buckets. +Examples: ++ +* `testbucket, prodbucket` +* `testbucket` +* `*` + +[NOTE] +==== +This field is ignored when using advanced sync rules. +==== + +`aws_access_key_id`:: +Access Key ID for the AWS identity that will be used for bucket access. + +`aws_secret_access_key`:: +Secret Access Key for the AWS identity that will be used for bucket access. + +`read_timeout`:: +The `read_timeout` for Amazon S3. +Default value is `90`. + +`connect_timeout`:: +Connection timeout for crawling S3. +Default value is `90`. + +`max_attempts`:: +Maximum retry attempts. +Default value is `5`. + +`page_size`:: +Page size for iterating bucket objects in Amazon S3. +Default value is `100`. + +[discrete#es-connectors-s3-client-docker] +===== Deployment using Docker + +include::_connectors-docker-instructions.asciidoc[] + +[discrete#es-connectors-s3-client-documents-syncs] +===== Documents and syncs + +[NOTE] +==== +* Content from files bigger than 10 MB won't be extracted by default. You can use the <> to handle larger binary files. +* Permissions are not synced. +**All documents** indexed to an Elastic deployment will be visible to **all users with access** to that Elastic Deployment. +==== + +[discrete#es-connectors-s3-client-sync-rules] +===== Sync rules + +<> are identical for all connectors and are available by default. + +[discrete#es-connectors-s3-client-sync-rules-advanced] +====== Advanced sync rules + +[NOTE] +==== +A <> is required for advanced sync rules to take effect. +==== + +Advanced sync rules are defined through a source-specific DSL JSON snippet. + +Use advanced sync rules to filter data to be fetched from Amazon S3 buckets. +They take the following parameters: + +1. `bucket`: S3 bucket the rule applies to. +2. `extension` (optional): Lists which file types to sync. Defaults to syncing all types. +3. `prefix` (optional): String of prefix characters. +The connector will fetch file and folder data that matches the string. +Defaults to `""` (syncs all bucket objects). + +[discrete#es-connectors-s3-client-sync-rules-advanced-examples] +======= Advanced sync rules examples + +*Fetching files and folders recursively by prefix* + +*Example*: Fetch files/folders in `folder1/docs`. + +[source,js] +---- +[ + { + "bucket": "bucket1", + "prefix": "folder1/docs" + } + +] +---- +// NOTCONSOLE + +*Example*: Fetch files/folder starting with `folder1`. + +[source,js] +---- +[ + { + "bucket": "bucket2", + "prefix": "folder1" + } +] +---- +// NOTCONSOLE + +*Fetching files and folders by specifying extensions* + +*Example*: Fetch all objects which start with `abc` and then filter using file extensions. + +[source,js] +---- +[ + { + "bucket": "bucket2", + "prefix": "abc", + "extension": [".txt", ".png"] + } +] +---- +// NOTCONSOLE + +[discrete#es-connectors-s3-client-content-extraction] +===== Content extraction + +See <>. + +[discrete#es-connectors-s3-client-testing] +===== End-to-end testing + +The connector framework enables operators to run functional tests against a real data source. +Refer to <> for more details. + +To execute a functional test for the Amazon S3 *self-managed connector*, run the following command: + +[source,shell] +---- +make ftest NAME=s3 +---- + +By default, this will use a medium-sized dataset. +To make the test faster add the `DATA_SIZE=small` argument: + +[source,shell] +---- +make ftest NAME=s3 DATA_SIZE=small +---- + +[discrete#es-connectors-s3-client-known-issues] +===== Known issues + +There are no known issues for this connector. + +See <> for any issues affecting all connectors. + +[discrete#es-connectors-s3-client-troubleshooting] +===== Troubleshooting + +See <>. + +[discrete#es-connectors-s3-client-security] +===== Security + +See <>. + +[discrete#es-connectors-s3-client-source] +===== Framework and source + +This connector is built with the {connectors-python}[Elastic connector framework^]. + +View the {connectors-python}/connectors/sources/s3.py[source code for this connector^] (branch _{connectors-branch}_, compatible with Elastic _{minor-version}_). + +// Closing the collapsible section +=============== diff --git a/docs/reference/connector/docs/connectors-salesforce.asciidoc b/docs/reference/connector/docs/connectors-salesforce.asciidoc new file mode 100644 index 0000000000000..3676f7663089c --- /dev/null +++ b/docs/reference/connector/docs/connectors-salesforce.asciidoc @@ -0,0 +1,801 @@ +[#es-connectors-salesforce] +=== Elastic Salesforce connector reference +++++ +Salesforce +++++ +// Attributes used in this file +:service-name: Salesforce +:service-name-stub: salesforce + +The _Elastic Salesforce connector_ is a <> for https://www.salesforce.com/[Salesforce^] data sources. + + +// //////// //// //// //// //// //// //// //////// +// //////// NATIVE CONNECTOR REFERENCE (MANAGED SERVICE) /////// +// //////// //// //// //// //// //// //// //////// + +[discrete#es-connectors-salesforce-native-connector-reference] +==== *Elastic managed connector reference* + +.View *Elastic managed connector* reference +[%collapsible] +=============== + +[discrete#es-connectors-salesforce-availability-prerequisites] +===== Availability and prerequisites + +This connector is available as a *managed connector* in Elastic Cloud since *8.12.0*. +To use this connector, satisfy all <>. + +[discrete#es-connectors-salesforce-compatability] +===== Compatibility + +This connector is compatible with the following: + +* Salesforce +* Salesforce Sandbox + +[discrete#es-connectors-salesforce-create-native-connector] +===== Create a {service-name} connector +include::_connectors-create-native.asciidoc[] + +[discrete#es-connectors-salesforce-usage] +===== Usage + +To use this connector as a **self-managed connector**, use the *Connector* workflow in the Kibana UI. + +For additional operations, see <>. + +[NOTE] +==== +You need to create an Salesforce connected app with OAuth2.0 enabled to authenticate with Salesforce. +==== + +[discrete#es-connectors-salesforce-connected-app] +====== Create a Salesforce connected app + +The Salesforce connector authenticates with Salesforce through a *connected app*. +Follow the official Salesforce documentation for https://help.salesforce.com/s/articleView?id=sf.connected_app_client_credentials_setup.htm[Configuring a Connected App for the OAuth 2.0 Client Credentials Flow^]. + +When creating the connected app, in the section titled *API (Enable OAuth Settings)* ensure the following settings are _enabled_: + +* *Enable OAuth Settings* +* *Enable for Device Flow* +** *Callback URL* should be the Salesforce dummy callback URL, `https://test.salesforce.com/services/oauth2/success` +* *Require Secret for Web Server Flow* +* *Require Secret for Refresh Token Flow* +* *Enable Client Credentials Flow* + +All other options should be disabled. +Finally, in the section *Selected OAuth Scopes*, include the following OAuth scopes: + +* *Manage user data via APIs (api)* +* *Perform requests at any time (refresh_token, offline_access)* + +[discrete#es-connectors-salesforce-admin-prerequisites] +===== Salesforce admin requirements + +By default, the Salesforce connector requires global administrator permissions to access Salesforce data. +Expand the section below to learn how to create a custom Salesforce user with minimal permissions. + +.*Create a custom Salesforce user with minimal permissions* +[%collapsible] +========================== +By creating a custom profile with sufficient permissions from the Setup menu, you can remove the system administrator role requirement for fetching data from Salesforce. + +To create a new profile: + +1. From the Salesforce Setup menu, go to *Administration => Users => Profiles*. +2. Create a new profile. +3. Choose `Read Only` or `Standard User` from the *Existing Profile* dropdown. Name the profile and save it. ++ +[TIP] +==== +By default, `Read Only` or `Standard User` users have read permission to access all standard objects. +==== ++ +4. Edit the newly created profile. Under *Object Permissions*, assign at least `Read` access to the standard objects and custom objects you want to ingest into Elasticsearch. +5. Make sure the newly created profile has at least `Read` access for the following standard objects: + +* Account +* Campaign +* Case +* Contact +* EmailMessage +* Lead +* Opportunity +* User ++ +[TIP] +==== +If using <> you'll need to assign `Read` access for that specific object in the profile. +==== ++ +6. Go to *Users => Profiles* and assign the newly created profile to the user. +7. Go to *Connected apps*, select your app and then select *Edit policies*. +Assign the client credentials flow to the user with the custom profile in Salesforce. ++ +Now, the connector can be configured for this user profile to fetch all object records, without needing the system administration role. +========================== + +[discrete#es-connectors-salesforce-configuration] +===== Configuration + +The following settings are required to set up this connector: + +Domain (required):: +The domain for your Salesforce account. +This is the subdomain that appears in your Salesforce URL. +For example, if your Salesforce URL is `foo.my.salesforce.com`, then your domain would be `foo`. +If you are using Salesforce Sandbox, your URL will contain an extra subdomain and will look similar to `foo.sandbox.my.salesforce.com`. +In this case, your domain would be `foo.sandbox`. + +Client ID (required):: +The Client ID generated by your connected app. +The Salesforce documentation will sometimes also call this a *Consumer Key* + +Client Secret (required):: +The Client Secret generated by your connected app. +The Salesforce documentation will sometimes also call this a *Consumer Secret*. + +Enable document level security:: +Toggle to enable document level security (DLS). +Optional, disabled by default. +Refer to the <> for more information, including how to set various Salesforce permission types. ++ +When enabled: + +* Full syncs will fetch access control lists for each document and store them in the `_allow_access_control` field. +* Access control syncs will fetch users' access control lists and store them in a separate index. + +[discrete#es-connectors-salesforce-configuration-credentials] +====== Finding the Client ID and Client Secret + +The Client ID and Client Secret are not automatically shown to you after you create a connected app. +You can find them by taking the following steps: + +* Navigate to *Setup* +* Go to *Platform Tools > Apps > App Manager* +* Click on the triangle next to your app and select *View* +* After the page loads, click on *Manage Consumer Details* + +Your Client ID and Client Secret should now be visible at the top of the page. + +[discrete#es-connectors-salesforce-dls] +===== Document level security (DLS) + +<> enables you to restrict access to documents based on a user'­s permissions. +This feature is available by default for the Salesforce connector and supports both *standard and custom objects*. + +Salesforce allows users to set permissions in the following ways: + +* *Profiles* +* *Permission sets* +* *Permission set Groups* + +For guidance, refer to these https://howtovideos.hubs.vidyard.com/watch/B1bQnMFg2VyZq7V6zXQjPg#:~:text=This%20is%20a%20must%20watch,records%20in%20your%20Salesforce%20organization[video tutorials] about setting Salesforce permissions. + +To ingest any standard or custom objects, users must ensure that at least `Read` permission is granted to that object. +This can be granted using any of the following methods for setting permissions. + +[discrete#es-connectors-salesforce-dls-profiles] +====== Set Permissions using Profiles + +Refer to the https://help.salesforce.com/s/articleView?id=sf.admin_userprofiles.htm&type=5[Salesforce documentation] for setting permissions via Profiles. + +[discrete#es-connectors-salesforce-dls-permission-sets] +====== Set Permissions using Permissions Set + +Refer to the https://help.salesforce.com/s/articleView?id=sf.perm_sets_overview.htm&language=en_US&type=5[Salesforce documentation] for setting permissions via Permissions Sets. + +[discrete#es-connectors-salesforce-dls-permission-set-groups] +====== Set Permissions using Permissions Set group + +Refer to the https://help.salesforce.com/s/articleView?id=sf.perm_set_groups.htm&type=5[Salesforce documentation] for setting permissions via Permissions Set Groups. + +[discrete#es-connectors-salesforce-dls-assign-permissions] +====== Assign Profiles, Permission Set and Permission Set Groups to the User + +Once the permissions are set, assign the Profiles, Permission Set or Permission Set Groups to the user. +Follow these steps in Salesforce: + +1. Navigate to `Administration` under the `Users` section. +2. Select `Users` and choose the user to set the permissions to. +3. Set the `Profile`, `Permission Set` or `Permission Set Groups` created in the earlier steps. + +[discrete#es-connectors-salesforce-sync-rules] +===== Sync rules + +_Basic_ sync rules are identical for all connectors and are available by default. +For more information read <>. + +[discrete#es-connectors-salesforce-sync-rules-advanced] +====== Advanced sync rules + +[NOTE] +==== +A <> is required for advanced sync rules to take effect. +==== + +The following section describes *advanced sync rules* for this connector. +Advanced sync rules enable filtering of data in Salesforce _before_ indexing into Elasticsearch. + +They take the following parameters: + +. `query` : Salesforce query to filter the documents. +. `language` : Salesforce query language. +Allowed values are *SOQL* and *SOSL*. + +[discrete#es-connectors-salesforce-sync-rules-advanced-fetch-query-language] +======= Fetch documents based on the query and language specified + +**Example**: Fetch documents using SOQL query + +[source,js] +---- +[ + { + "query": "SELECT Id, Name FROM Account", + "language": "SOQL" + } +] +---- +// NOTCONSOLE + +**Example**: Fetch documents using SOSL query. + +[source,js] +---- +[ + { + "query": "FIND {Salesforce} IN ALL FIELDS", + "language": "SOSL" + } +] +---- +// NOTCONSOLE + +[discrete#es-connectors-salesforce-sync-rules-advanced-fetch-objects] +======= Fetch standard and custom objects using SOQL and SOSL queries + +**Example**: Fetch documents for standard objects via SOQL and SOSL query. + +[source,js] +---- +[ + { + "query": "SELECT Account_Id, Address, Contact_Number FROM Account", + "language": "SOQL" + }, + { + "query": "FIND {Alex Wilber} IN ALL FIELDS RETURNING Contact(LastModifiedDate, Name, Address)", + "language": "SOSL" + } +] +---- +// NOTCONSOLE + +**Example**: Fetch documents for custom objects via SOQL and SOSL query. + +[source,js] +---- +[ + { + "query": "SELECT Connector_Name, Version FROM Connector__c", + "language": "SOQL" + }, + { + "query": "FIND {Salesforce} IN ALL FIELDS RETURNING Connectors__c(Id, Connector_Name, Connector_Version)", + "language": "SOSL" + } +] +---- +// NOTCONSOLE + +[discrete#es-connectors-salesforce-sync-rules-advanced-fetch-standard-custom-fields] +======= Fetch documents with standard and custom fields + +**Example**: Fetch documents with all standard and custom fields for Account object. + +[source,js] +---- +[ + { + "query": "SELECT FIELDS(ALL) FROM Account", + "language": "SOQL" + } +] +---- +// NOTCONSOLE + +**Example**: Fetch documents with all custom fields for Connector object. + +[source,js] +---- +[ + { + "query": "SELECT FIELDS(CUSTOM) FROM Connector__c", + "language": "SOQL" + } +] +---- +// NOTCONSOLE + +**Example**: Fetch documents with all standard fields for Account object. + +[source,js] +---- +[ + { + "query": "SELECT FIELDS(STANDARD) FROM Account", + "language": "SOQL" + } +] +---- +// NOTCONSOLE + +[discrete#es-connectors-salesforce-documents-syncs] +===== Documents and syncs + +The connector syncs the following Salesforce objects: + +* *Accounts* +* *Campaigns* +* *Cases* +* *Contacts* +* *Content Documents* (files uploaded to Salesforce) +* *Leads* +* *Opportunities* + +The connector will not ingest any objects that it does not have permissions to query. + +[NOTE] +==== +* Content from files bigger than 10 MB won't be extracted. (Self-managed connectors can use the <> to handle larger binary files.) +* Permissions are not synced by default. You must enable <>. Otherwise, **all documents** indexed to an Elastic deployment will be visible to **all users with access** to that Elastic Deployment. +==== + +[discrete#es-connectors-salesforce-sync-types] +====== Sync types + +<> are supported by default for all connectors. + +This connector also supports <>. + +[discrete#es-connectors-salesforce-content-extraction] +===== Content Extraction + +The connector will retrieve Content Documents from your Salesforce source if they meet the following criteria: + +* Are attached to one or more objects that are synced +* Are of a file type that can be extracted + +This means that the connector will not ingest any Content Documents you have that are _not_ attached to a supported Salesforce object. +See <> for a list of supported object types. + +If a single Content Document is attached to multiple supported objects, only one Elastic document will be created for it. +This document will retain links to every object that it was connected to in the `related_ids` field. + +See <> for more specifics on content extraction. + +[discrete#es-connectors-salesforce-known-issues] +===== Known issues + +There are currently no known issues for this connector. +Refer to <> for a list of known issues for all connectors. + +[discrete#es-connectors-salesforce-security] +===== Security + +See <>. + +[discrete#es-connectors-salesforce-source] +===== Framework and source + +This connector is built with the {connectors-python}[Elastic connector framework^]. + +View the {connectors-python}/connectors/sources/salesforce.py[source code for this connector^] (branch _{connectors-branch}_, compatible with Elastic _{minor-version}_). + +// Closing the collapsible section +=============== + + +// //////// //// //// //// //// //// //// //////// +// //////// CONNECTOR CLIENT REFERENCE (SELF-MANAGED) /////// +// //////// //// //// //// //// //// //// //////// + +[discrete#es-connectors-salesforce-connector-client-reference] +==== *Self-managed connector reference* + +.View *self-managed connector* reference +[%collapsible] +=============== + +[discrete#es-connectors-salesforce-client-availability-prerequisites] +===== Availability and prerequisites + +This connector is available as a self-managed *self-managed connector*. +This self-managed connector is compatible with Elastic versions *8.10.0+*. +To use this connector, satisfy all <>. + +[discrete#es-connectors-salesforce-client-compatability] +===== Compatibility + +This connector is compatible with the following: + +* Salesforce +* Salesforce Sandbox + +[discrete#es-connectors-salesforce-create-connector-client] +===== Create a {service-name} connector +include::_connectors-create-client.asciidoc[] + +[discrete#es-connectors-salesforce-client-usage] +===== Usage + +To use this connector as a **self-managed connector**, use the *Connector* workflow in the Kibana UI. + +For additional operations, see <>. + +[NOTE] +==== +You need to create an Salesforce connected app with OAuth2.0 enabled to authenticate with Salesforce. +==== + +[discrete#es-connectors-salesforce-client-connected-app] +====== Create a Salesforce connected app + +The Salesforce connector authenticates with Salesforce through a *connected app*. +Follow the official Salesforce documentation for https://help.salesforce.com/s/articleView?id=sf.connected_app_client_credentials_setup.htm[Configuring a Connected App for the OAuth 2.0 Client Credentials Flow^]. + +When creating the connected app, in the section titled *API (Enable OAuth Settings)* ensure the following settings are _enabled_: + +* *Enable OAuth Settings* +* *Enable for Device Flow* +** *Callback URL* should be the Salesforce dummy callback URL, `https://test.salesforce.com/services/oauth2/success` +* *Require Secret for Web Server Flow* +* *Require Secret for Refresh Token Flow* +* *Enable Client Credentials Flow* + +All other options should be disabled. +Finally, in the section *Selected OAuth Scopes*, include the following OAuth scopes: + +* *Manage user data via APIs (api)* +* *Perform requests at any time (refresh_token, offline_access)* + +[discrete#es-connectors-client-salesforce-admin-prerequisites] +===== Salesforce admin requirements + +By default, the Salesforce connector requires global administrator permissions to access Salesforce data. +Expand the section below to learn how to create a custom Salesforce user with minimal permissions. + +.*Create a custom Salesforce user with minimal permissions* +[%collapsible] +========================== +By creating a custom profile with sufficient permissions from the Setup menu, you can remove the system administrator role requirement for fetching data from Salesforce. + +To create a new profile: + +1. From the Salesforce Setup menu, go to *Administration => Users => Profiles*. +2. Create a new profile. +3. Choose `Read Only` or `Standard User` from the *Existing Profile* dropdown. Name the profile and save it. ++ +[TIP] +==== +By default, `Read Only` or `Standard User` users have read permission to access all standard objects. +==== ++ +4. Edit the newly created profile. Under *Object Permissions*, assign at least `Read` access to the standard objects and custom objects you want to ingest into Elasticsearch. +5. Make sure the newly created profile has at least `Read` access for the following standard objects: + +* Account +* Campaign +* Case +* Contact +* EmailMessage +* Lead +* Opportunity +* User ++ +[TIP] +==== +If using <> you'll need to assign `Read` access for that specific object in the profile. +==== ++ +6. Go to *Users => Profiles* and assign the newly created profile to the user. +7. Go to *Connected apps*, select your app and then select *Edit policies*. +Assign the client credentials flow to the user with the custom profile in Salesforce. ++ +Now, the connector can be configured for this user profile to fetch all object records, without needing the system administration role. +========================== + +[discrete#es-connectors-salesforce-client-docker] +===== Deployment using Docker + +Self-managed connectors are run on your own infrastructure. + +include::_connectors-docker-instructions.asciidoc[] + +[discrete#es-connectors-salesforce-client-configuration] +===== Configuration + +The following settings are required to set up this connector: + +`domain`(required):: +The domain for your Salesforce account. +This is the subdomain that appears in your Salesforce URL. +For example, if your Salesforce URL is `foo.my.salesforce.com`, then your domain would be `foo`. +If you are using Salesforce Sandbox, your URL will contain an extra subdomain and will look similar to `foo.sandbox.my.salesforce.com`. +In this case, your domain would be `foo.sandbox`. + +`client_id`(required):: +The Client ID generated by your connected app. +The Salesforce documentation will sometimes also call this a *Consumer Key* + +`client_secret`(required):: +The Client Secret generated by your connected app. +The Salesforce documentation will sometimes also call this a *Consumer Secret*. + +`use_document_level_security`:: +Toggle to enable document level security (DLS). +Optional, disabled by default. +Refer to the <> for more information, including how to set various Salesforce permission types. ++ +When enabled: + +* Full syncs will fetch access control lists for each document and store them in the `_allow_access_control` field. +* Access control syncs will fetch users' access control lists and store them in a separate index. + +[discrete#es-connectors-salesforce-client-configuration-credentials] +====== Finding the Client ID and Client Secret + +The Client ID and Client Secret are not automatically shown to you after you create a connected app. +You can find them by taking the following steps: + +* Navigate to *Setup* +* Go to *Platform Tools > Apps > App Manager* +* Click on the triangle next to your app and select *View* +* After the page loads, click on *Manage Consumer Details* + +Your Client ID and Client Secret should now be visible at the top of the page. + +[discrete#es-connectors-salesforce-client-dls] +===== Document level security (DLS) + +<> enables you to restrict access to documents based on a user'­s permissions. +This feature is available by default for the Salesforce connector and supports both *standard and custom objects*. + +Salesforce allows users to set permissions in the following ways: + +* *Profiles* +* *Permission sets* +* *Permission set Groups* + +For guidance, refer to these https://howtovideos.hubs.vidyard.com/watch/B1bQnMFg2VyZq7V6zXQjPg#:~:text=This%20is%20a%20must%20watch,records%20in%20your%20Salesforce%20organization[video tutorials] about setting Salesforce permissions. + +To ingest any standard or custom objects, users must ensure that at least `Read` permission is granted to that object. +This can be granted using any of the following methods for setting permissions. + +[discrete#es-connectors-salesforce-client-dls-profiles] +====== Set Permissions using Profiles + +Refer to the https://help.salesforce.com/s/articleView?id=sf.admin_userprofiles.htm&type=5[Salesforce documentation] for setting permissions via Profiles. + +[discrete#es-connectors-salesforce-client-dls-permission-sets] +====== Set Permissions using Permissions Set + +Refer to the https://help.salesforce.com/s/articleView?id=sf.perm_sets_overview.htm&language=en_US&type=5[Salesforce documentation] for setting permissions via Permissions Sets. + +[discrete#es-connectors-salesforce-client-dls-permission-set-groups] +====== Set Permissions using Permissions Set group + +Refer to the https://help.salesforce.com/s/articleView?id=sf.perm_set_groups.htm&type=5[Salesforce documentation] for setting permissions via Permissions Set Groups. + +[discrete#es-connectors-salesforce-client-dls-assign-permissions] +====== Assign Profiles, Permission Set and Permission Set Groups to the User + +Once the permissions are set, assign the Profiles, Permission Set or Permission Set Groups to the user. +Follow these steps in Salesforce: + +1. Navigate to `Administration` under the `Users` section. +2. Select `Users` and choose the user to set the permissions to. +3. Set the `Profile`, `Permission Set` or `Permission Set Groups` created in the earlier steps. + +[discrete#es-connectors-salesforce-client-sync-rules] +===== Sync rules + +_Basic_ sync rules are identical for all connectors and are available by default. + +For more information read <>. + +[discrete#es-connectors-salesforce-client-sync-rules-advanced] +====== Advanced sync rules + +[NOTE] +==== +A <> is required for advanced sync rules to take effect. +==== + +The following section describes *advanced sync rules* for this connector. +Advanced sync rules enable filtering of data in Salesforce _before_ indexing into Elasticsearch. + +They take the following parameters: + +. `query` : Salesforce query to filter the documents. +. `language` : Salesforce query language. +Allowed values are *SOQL* and *SOSL*. + +[discrete#es-connectors-salesforce-client-sync-rules-advanced-fetch-query-language] +======= Fetch documents based on the query and language specified + +**Example**: Fetch documents using SOQL query + +[source,js] +---- +[ + { + "query": "SELECT Id, Name FROM Account", + "language": "SOQL" + } +] +---- +// NOTCONSOLE + +**Example**: Fetch documents using SOSL query. + +[source,js] +---- +[ + { + "query": "FIND {Salesforce} IN ALL FIELDS", + "language": "SOSL" + } +] +---- +// NOTCONSOLE + +[discrete#es-connectors-salesforce-client-sync-rules-advanced-fetch-objects] +======= Fetch standard and custom objects using SOQL and SOSL queries + +**Example**: Fetch documents for standard objects via SOQL and SOSL query. + +[source,js] +---- +[ + { + "query": "SELECT Account_Id, Address, Contact_Number FROM Account", + "language": "SOQL" + }, + { + "query": "FIND {Alex Wilber} IN ALL FIELDS RETURNING Contact(LastModifiedDate, Name, Address)", + "language": "SOSL" + } +] +---- +// NOTCONSOLE + +**Example**: Fetch documents for custom objects via SOQL and SOSL query. + +[source,js] +---- +[ + { + "query": "SELECT Connector_Name, Version FROM Connector__c", + "language": "SOQL" + }, + { + "query": "FIND {Salesforce} IN ALL FIELDS RETURNING Connectors__c(Id, Connector_Name, Connector_Version)", + "language": "SOSL" + } +] +---- +// NOTCONSOLE + +[discrete#es-connectors-salesforce-client-sync-rules-advanced-fetch-standard-custom-fields] +======= Fetch documents with standard and custom fields + +**Example**: Fetch documents with all standard and custom fields for Account object. + +[source,js] +---- +[ + { + "query": "SELECT FIELDS(ALL) FROM Account", + "language": "SOQL" + } +] +---- +// NOTCONSOLE + +**Example**: Fetch documents with all custom fields for Connector object. + +[source,js] +---- +[ + { + "query": "SELECT FIELDS(CUSTOM) FROM Connector__c", + "language": "SOQL" + } +] +---- +// NOTCONSOLE + +**Example**: Fetch documents with all standard fields for Account object. + +[source,js] +---- +[ + { + "query": "SELECT FIELDS(STANDARD) FROM Account", + "language": "SOQL" + } +] +---- +// NOTCONSOLE + +[discrete#es-connectors-salesforce-client-documents-syncs] +===== Documents and syncs + +The connector syncs the following Salesforce objects: + +* *Accounts* +* *Campaigns* +* *Cases* +* *Contacts* +* *Content Documents* (files uploaded to Salesforce) +* *Leads* +* *Opportunities* + + +The connector will not ingest any objects that it does not have permissions to query. + +[NOTE] +==== +* Content from files bigger than 10 MB won't be extracted by default. Use the <> to handle larger binary files. +* Permissions are not synced by default. You must enable <>. Otherwise, **all documents** indexed to an Elastic deployment will be visible to **all users with access** to that Elastic Deployment. +==== + +[discrete#es-connectors-salesforce-client-sync-types] +====== Sync types + +<> are supported by default for all connectors. + +This connector also supports <>. + +[discrete#es-connectors-salesforce-client-content-extraction] +===== Content Extraction + +The connector will retrieve Content Documents from your Salesforce source if they meet the following criteria: + +* Are attached to one or more objects that are synced +* Are of a file type that can be extracted + +This means that the connector will not ingest any Content Documents you have that are _not_ attached to a supported Salesforce object. +See <> for a list of supported object types. + +If a single Content Document is attached to multiple supported objects, only one Elastic document will be created for it. +This document will retain links to every object that it was connected to in the `related_ids` field. + +See <> for more specifics on content extraction. + +[discrete#es-connectors-salesforce-client-known-issues] +===== Known issues + +There are currently no known issues for this connector. +Refer to <> for a list of known issues for all connectors. + +[discrete#es-connectors-salesforce-client-security] +===== Security + +See <>. + +[discrete#es-connectors-salesforce-client-source] +===== Framework and source + +This connector is built with the {connectors-python}[Elastic connector framework^]. + +View the {connectors-python}/connectors/sources/salesforce.py[source code for this connector^] (branch _{connectors-branch}_, compatible with Elastic _{minor-version}_). + + +// Closing the collapsible section +=============== diff --git a/docs/reference/connector/docs/connectors-scalability.asciidoc b/docs/reference/connector/docs/connectors-scalability.asciidoc new file mode 100644 index 0000000000000..6ebcb58e11487 --- /dev/null +++ b/docs/reference/connector/docs/connectors-scalability.asciidoc @@ -0,0 +1,49 @@ +[#es-connectors-scalability] +=== Connector Scalability +++++ +Scalability +++++ + +[discrete#es-connectors-scalability-redundancy] +==== Redundancy + +Users can create a backup (secondary) server with an identical connector service setup (settings, code, etc..). +If the primary server running the connector service fails, users can start up the connector service on the secondary +server and restart the sync jobs. +Because connector definitions and job status information are all stored in Elasticsearch, there is no risk of data loss +or corruption when switching servers. + +However, note that any in-progress syncs will need to be restarted from scratch, and cannot be resumed where they were +interrupted from. + +[discrete#es-connectors-scalability-failover] +==== Failover + +There is currently no automatic failover or transfer of workload in case of failure. +If the server running the connector service fails, all outstanding connector sync jobs will go into a suspended state. +When the connector service returns (or if a replacement connector service is launched), it will identify any jobs that +need to be cleaned up and automatically restart (from scratch) the suspended jobs. + +[discrete#es-connectors-scalability-balancing] +==== Workload balancing + +There is currently no division/balancing of workload for a single sync job across multiple connector service deployments. +Once a sync job is claimed by a connector service, it will run the job to completion - unless the connector service +instance fails. +In that case, another connector service instance will pick up the suspended job and restart it (from scratch). + +In 8.8.0+, the Connector Service provides concurrency control when there are multiple connector services connected to +the same Elasticsearch cluster, with the following expectations: + +* Multiple sync jobs can be scheduled for a given search index but only 1 sync job can be executed for a search index at any single time. +* Each sync job can only be claimed by 1 connector service. +* Only 1 connector service can perform management tasks at a time, for example: populating service types and configurations, validating filters, etc. + +[discrete#es-connectors-scalability-horizontal] +==== Horizontal Scalability + +Horizontal scaling can work if there are multiple connector services running and are configured to allow concurrent +syncs via their `service.max_concurrent_syncs` settings. + +Hypothetically, multiple Connector Services would naturally load balance to some extent even though we do not currently +have explicit load balancing functionality. diff --git a/docs/reference/connector/docs/connectors-security.asciidoc b/docs/reference/connector/docs/connectors-security.asciidoc new file mode 100644 index 0000000000000..4cbeacf3f28f0 --- /dev/null +++ b/docs/reference/connector/docs/connectors-security.asciidoc @@ -0,0 +1,55 @@ +[#es-connectors-security] +=== Connectors security +++++ +Security +++++ + +This document describes security considerations for <> and <>. + +Elastic Cloud deployments have strong security defaults. +For example, data is encrypted by default, whether at rest or in transit. + +Self-managed deployments require more upfront work to ensure strong security. +Refer to {ref}/secure-cluster.html[Secure the Elastic Stack^] in the Elasticsearch documentation for more information. + +[discrete#es-native-connectors-security-connections] +==== Access to credentials + +Credentials for the data source — such as API keys or username/password pair— are stored in your deployment's `.elastic-connectors` Elasticsearch index. +Therefore, the credentials are visible to all Elastic users with the `read` {ref}/security-privileges.html[indices privilege^] for that index. +By default, the following Elastic users have this privilege: the `elastic` superuser and the `kibana_system` user. +Enterprise Search service account tokens can also read the `.elastic-connectors` index. + +[discrete#es-native-connectors-security-api-key] +==== Access to internally stored API keys + +API keys for Elastic managed connectors are stored in the internal system index `.connector-secrets`. +Access to this index is restricted to authorized API calls only. +The cluster privilege `write_connector_secrets` is required to store or update secrets through the API. +Only the Enterprise Search instance has permission to read from this index. + +[discrete#es-native-connectors-security-dls] +===== Document-level security + +Document-level security is available for a subset of connectors. +DLS is available by default for the following connectors: + +include::_connectors-list-dls.asciidoc[] + +Learn more about this feature in <>, including availability and prerequisites. + +[discrete#es-native-connectors-security-deployment] +==== Access to documents + +Data synced from your data source are stored as documents in the Elasticsearch index you created. +This data is visible to all Elastic users with the `read` {ref}/security-privileges.html[indices privilege^] for that index. +Be careful to ensure that access to this index is _at least_ as restrictive as access to the original data source. + +[discrete#es-native-connectors-security-encryption] +==== Encryption + +Elastic Cloud automatically encrypts data at rest. +Data in transit is automatically encrypted using `https`. + +Self-managed deployments must implement encryption at rest. +See {ref}/configuring-stack-security.html[Configure security for the Elastic Stack^] in the Elasticsearch documentation for more information. diff --git a/docs/reference/connector/docs/connectors-self-managed.asciidoc b/docs/reference/connector/docs/connectors-self-managed.asciidoc new file mode 100644 index 0000000000000..e119953019442 --- /dev/null +++ b/docs/reference/connector/docs/connectors-self-managed.asciidoc @@ -0,0 +1,123 @@ +[#es-build-connector] +== Self-managed connectors + +.Naming history +**** +Self-managed connectors were initially known as "connector clients". You might find this term in older documentation. +**** + +Self-managed <> are run on your own infrastructure. +This means they run outside of your Elastic deployment. + +You can run the <> from source or from a Docker container. + +We also have a quickstart option using *Docker Compose*, to spin up all the required services at once: Elasticsearch, Kibana, and the connectors service. +Refer to <> for more information. + +The following connectors are available as self-managed connectors: + +include::_connectors-list-clients.asciidoc[] + +[discrete#es-build-connector-prerequisites] +=== Availability and Elastic prerequisites + +[NOTE] +==== +Self-managed connectors currently don't support Windows. +Use this https://www.elastic.co/support/matrix#matrix_os[compatibility matrix^] to check which operating systems are supported by self-managed connectors. +Find this information under *self-managed connectors* on that page. +==== + +.*Expand* for Elastic prerequisites information +[%collapsible] +==== +Your Elastic deployment must include the following Elastic services: + +* *Elasticsearch* +* *Kibana* + +(A new Elastic Cloud deployment includes these services by default.) + +To run self-managed connectors, your self-deployed connector service version must match your Elasticsearch version. +For example, if you're running Elasticsearch 8.10.1, your connector service should be version 8.10.1.x. +Elastic does not support deployments running mismatched versions (except during upgrades). + +[NOTE] +====== +As of 8.10.0 _new_ self-managed connectors no longer require the Enterprise Search service. +However, if you are upgrading connectors from versions earlier than 8.9, you'll need to run Enterprise Search once to migrate your connectors to the new format. +In future releases, you may still need to run Enterprise Search for the purpose of migrations or upgrades. +====== + +You must have access to Kibana and have `write` {ref}/security-privileges.html[indices privileges^] for the `.elastic-connectors` index. + +To use connector clients in a self-managed environment, you must deploy the <>. + +*Support and licensing requirements* + +Depending on how you use self-managed connectors, support and licensing requirements will vary. + +Refer to the following subscriptions pages for details. +Find your connector of interest in the *Elastic Search* section under *Client Integrations*: + +* https://www.elastic.co/subscriptions/[Elastic self-managed subscriptions page] +* https://www.elastic.co/subscriptions/cloud[Elastic Cloud subscriptions page] + +Note the following information regarding support for self-managed connectors: + +* A converted but _unmodified_ managed connector is supported by Elastic. +* A converted but _customized_ managed connector is _not_ supported by Elastic. + +==== + +[discrete#es-build-connector-data-source-prerequisites] +.Data source prerequisites +**** +The first decision you need to make before deploying a connector is which third party service (data source) you want to sync to Elasticsearch. +Note that each data source will have specific prerequisites you'll need to meet to authorize the connector to access its data. +For example, certain data sources may require you to create an OAuth application, or create a service account. + +You'll need to check the individual connector documentation for these details. +**** + +[discrete#es-connectors-deploy-connector-service] +=== Deploy the connector service + +The connector service is a Python application that you must run on your own infrastructure when using self-managed connectors. +The source code is hosted in the https://github.com/elastic/connectors[elastic/connectors^] repository. + +You can run the connector service from source or use Docker: + +* <>. Use this option if you're comfortable working with Python and want to iterate quickly locally. +* <>. Use this option if you want to deploy the connectors to a server, or use a container orchestration platform. +** Refer to our <> for a quick way to spin up all the required services at once. + +[discrete#es-build-connector-example] +=== Tutorials + +* Follow our <> to learn how run the self-managed connector service and a set up a self-managed connector, *using the UI*. +* Follow our <> to learn how to set up a self-managed connector *using the* {ref}/connector-apis.html[*connector APIs*]. + +These examples use the PostgreSQL connector but the basic process is the same for all self-managed connectors. + +[discrete#es-build-connector-testing] +=== Connector testing + +The connector framework enables you to run end-to-end (E2E) tests on your self-managed connectors, against a real data source. + +To avoid tampering with a real Elasticsearch instance, E2E tests run an isolated Elasticsearch instance in Docker. +Configuration values are set in your `docker-compose.yml` file. +Docker Compose manages the setup of the development environment, including both the mock Elastic instance and mock data source. + +E2E tests use *default* configuration values for the connector. +Find instructions about testing in each connector's documentation. + +[discrete#es-build-connector-framework] +=== Connector framework + +The Elastic connector framework enables you to: + +* Customize existing self-managed connectors. +* Build your own self-managed connectors. + +Refer to <> for more information. diff --git a/docs/reference/connector/docs/connectors-servicenow.asciidoc b/docs/reference/connector/docs/connectors-servicenow.asciidoc new file mode 100644 index 0000000000000..089a3b405d8a5 --- /dev/null +++ b/docs/reference/connector/docs/connectors-servicenow.asciidoc @@ -0,0 +1,494 @@ +[#es-connectors-servicenow] +=== Elastic ServiceNow connector reference +++++ +ServiceNow +++++ +// Attributes used in this file +:service-name: ServiceNow +:service-name-stub: servicenow + +The _Elastic ServiceNow connector_ is a <> for https://www.servicenow.com[ServiceNow^]. + +This connector is written in Python using the {connectors-python}[Elastic connector framework^]. + +View the {connectors-python}/connectors/sources/{service-name-stub}.py[*source code* for this connector^] (branch _{connectors-branch}_, compatible with Elastic _{minor-version}_). + +.Choose your connector reference +******************************* +Are you using a managed connector on Elastic Cloud or a self-managed connector? Expand the documentation based on your deployment method. +******************************* + +// //////// //// //// //// //// //// //// //////// +// //////// NATIVE CONNECTOR REFERENCE /////// +// //////// //// //// //// //// //// //// //////// + +[discrete#es-connectors-servicenow-native-connector-reference] +==== *Elastic managed connector reference* + +.View *Elastic managed connector* reference + +[%collapsible] +=============== + +[discrete#es-connectors-servicenow-availability-prerequisites] +===== Availability and prerequisites + +The ServiceNow connector is available natively in Elastic Cloud since 8.10.0. + +To use this connector natively in Elastic Cloud, satisfy all <>. + +[discrete#es-connectors-servicenow-create-native-connector] +===== Create a {service-name} connector +include::_connectors-create-native.asciidoc[] + +[discrete#es-connectors-servicenow-usage] +===== Usage + +To use this connector natively in Elastic Cloud, see <>. + +For additional operations, see <> + +[discrete#es-connectors-servicenow-compatibility] +===== Compatibility + +The ServiceNow connector is compatible with the following versions of ServiceNow: + +* ServiceNow "Tokyo" +* ServiceNow "San Diego" +* ServiceNow "Rome" +* ServiceNow "Utah" +* ServiceNow "Vancouver" +* ServiceNow "Washington" +* ServiceNow "Xanadu" + +[discrete#es-connectors-servicenow-configuration] +===== Configuration + +The following configuration fields are required to set up the connector: + +ServiceNow URL:: +The host URL of the ServiceNow instance. + +Username:: +The username of the account used for ServiceNow. + +Password:: +The password of the account used for ServiceNow. + +Comma-separated list of services:: +Comma-separated list of services to fetch data from ServiceNow. If the value is `*`, the connector will fetch data from the list of basic services provided by ServiceNow: +- link:https://docs.servicenow.com/bundle/utah-platform-administration/page/administer/roles/concept/user.html[User] +- link:https://docs.servicenow.com/bundle/tokyo-it-service-management/page/product/incident-management/concept/c_IncidentManagement.html[Incident] +- link:https://docs.servicenow.com/bundle/tokyo-servicenow-platform/page/use/service-catalog-requests/task/t_AddNewRequestItems.html[Requested Item] +- link:https://docs.servicenow.com/bundle/tokyo-customer-service-management/page/product/customer-service-management/task/t_SearchTheKnowledgeBase.html[Knowledge] +- link:https://docs.servicenow.com/bundle/tokyo-it-service-management/page/product/change-management/task/t_CreateAChange.html[Change Request] ++ +[NOTE] +==== +If you have configured a custom service, the `*` value will not fetch data from the basic services above by default. In this case you'll need to mention these service names explicitly. +==== +Default value is `*`. Examples: ++ + - `User, Incident, Requested Item, Knowledge, Change Request` + - `*` + +Enable document level security:: +Restrict access to documents based on a user's permissions. +Refer to <> for more details. + +[discrete#es-connectors-servicenow-documents-syncs] +===== Documents and syncs + +All services and records the user has access to will be indexed according to the configurations provided. +The connector syncs the following ServiceNow object types: + +* Records +* Attachments + +[NOTE] +==== +* Content from files bigger than 10 MB won't be extracted. (Self-managed connectors can use the <> to handle larger binary files.) +* Permissions are not synced by default. Refer to <> for more details. +==== + +[discrete#es-connectors-servicenow-sync-types] +====== Sync types + +<> are supported by default for all connectors. + +This connector also supports <>. + +[discrete#es-connectors-servicenow-dls] +===== Document level security + +<> ensures identities and permissions set in ServiceNow are maintained in Elasticsearch. +This enables you to restrict and personalize read-access users and groups have to documents in this index. +Access control syncs ensure this metadata is kept up to date in your Elasticsearch documents. + +The ServiceNow connector supports roles for access control lists (ACLs) to enable document level security in {es}. +For default services, connectors use the following roles to find users who have access to documents. + +|=== +| Service | Roles + +| User | `admin` + +| Incident | `admin`, `sn_incident_read`, `ml_report_user`, `ml_admin`, `itil` + +| Requested Item | `admin`, `sn_request_read`, `asset`, `atf_test_designer`, `atf_test_admin` + +| Knowledge | `admin`, `knowledge`, `knowledge_manager`, `knowledge_admin` + +| Change Request | `admin`, `sn_change_read`, `itil` +|=== + +For services other than these defaults, the connector iterates over access controls with `read` operations and finds the respective roles for those services. + +[NOTE] +==== +The ServiceNow connector does not support scripted and conditional permissions. +==== + +[discrete#es-connectors-servicenow-sync-rules] +===== Sync rules + +<> are identical for all connectors and are available by default. + +[discrete#es-connectors-servicenow-sync-rules-advanced] +====== Advanced sync rules + +[NOTE] +==== +A <> is required for advanced sync rules to take effect. +==== + +Advanced sync rules are defined through a source-specific DSL JSON snippet. + +The following sections provide examples of advanced sync rules for this connector. + +[discrete#es-connectors-servicenow-sync-rules-number-incident-service] +======= Indexing document based on incident number for Incident service + +[source,js] +---- +[ + { + "service": "Incident", + "query": "numberSTARTSWITHINC001" + } +] +---- +// NOTCONSOLE + +[discrete#es-connectors-servicenow-sync-rules-active-false-user-service] +======= Indexing document based on user activity state for User service + +[source,js] +---- +[ + { + "service": "User", + "query": "active=False" + } +] +---- +// NOTCONSOLE + +[discrete#es-connectors-servicenow-sync-rules-author-administrator-knowledge-service] +======= Indexing document based on author name for Knowledge service + +[source,js] +---- +[ + { + "service": "Knowledge", + "query": "author.nameSTARTSWITHSystem Administrator" + } +] +---- +// NOTCONSOLE + +[discrete#es-connectors-servicenow-known-issues] +===== Known issues + +There are no known issues for this connector. +Refer to <> for a list of known issues that impact all connectors. + +[discrete#es-connectors-servicenow-troubleshooting] +===== Troubleshooting + +See <>. + +[discrete#es-connectors-servicenow-security] +===== Security + +See <>. + +[discrete#es-connectors-servicenow-content-extraction] +===== Content extraction + +See <>. + + +// Closing the collapsible section +=============== + + +// //////// //// //// //// //// //// //// //////// +// //////// CONNECTOR CLIENT REFERENCE /////// +// //////// //// //// //// //// //// //// //////// + +[discrete#es-connectors-servicenow-connector-client-reference] +==== *Self-managed connector* + +.View *self-managed connector* reference + +[%collapsible] +=============== + +[discrete#es-connectors-servicenow-client-availability-prerequisites] +===== Availability and prerequisites + +The ServiceNow connector was introduced in Elastic version 8.9.0. +This connector is available as a self-managed *self-managed connector*. +To use this connector as a self-managed connector, satisfy all <>. + +[discrete#es-connectors-servicenow-create-connector-client] +===== Create a {service-name} connector +include::_connectors-create-client.asciidoc[] + +[discrete#es-connectors-servicenow-client-usage] +===== Usage +To use this connector as a *self-managed connector*, use the *Customized connector* workflow. + +For additional operations, see <>. + +[discrete#es-connectors-servicenow-client-compatibility] +===== Compatibility + +The ServiceNow connector is compatible with the following versions of ServiceNow: + +* ServiceNow "Tokyo" +* ServiceNow "San Diego" +* ServiceNow "Rome" +* ServiceNow "Utah" +* ServiceNow "Vancouver" +* ServiceNow "Washington" +* ServiceNow "Xanadu" + +[discrete#es-connectors-servicenow-client-configuration] +===== Configuration + +[TIP] +==== +When using the <> workflow, initially these fields will use the default configuration set in the {connectors-python}/connectors/sources/servicenow.py[connector source code^]. +These are set in the `get_default_configuration` function definition. + +These configurable fields will be rendered with their respective *labels* in the Kibana UI. +Once connected, you'll be able to update these values in Kibana. +==== + +The following configuration fields are required to set up the connector: + +`url`:: +The host url of the ServiceNow instance. + +`username`:: +The username of the account for ServiceNow. + +`password`:: +The password of the account used for ServiceNow. + +`services`:: +Comma-separated list of services to fetch data from ServiceNow. If the value is `*`, the connector will fetch data from the list of basic services provided by ServiceNow: +- link:https://docs.servicenow.com/bundle/utah-platform-administration/page/administer/roles/concept/user.html[User] +- link:https://docs.servicenow.com/bundle/tokyo-it-service-management/page/product/incident-management/concept/c_IncidentManagement.html[Incident] +- link:https://docs.servicenow.com/bundle/tokyo-servicenow-platform/page/use/service-catalog-requests/task/t_AddNewRequestItems.html[Requested Item] +- link:https://docs.servicenow.com/bundle/tokyo-customer-service-management/page/product/customer-service-management/task/t_SearchTheKnowledgeBase.html[Knowledge] +- link:https://docs.servicenow.com/bundle/tokyo-it-service-management/page/product/change-management/task/t_CreateAChange.html[Change Request] ++ +[NOTE] +==== +If you have configured a custom service, the `*` value will not fetch data from the basic services above by default. In this case you'll need to mention these service names explicitly. +==== +Default value is `*`. Examples: ++ + - `User, Incident, Requested Item, Knowledge, Change Request` + - `*` + +`retry_count`:: +The number of retry attempts after a failed request to ServiceNow. Default value is `3`. + +`concurrent_downloads`:: +The number of concurrent downloads for fetching the attachment content. This speeds up the content extraction of attachments. Defaults to `10`. + +`use_text_extraction_service`:: +Requires a separate deployment of the <>. +Requires that ingest pipeline settings disable text extraction. +Default value is `False`. + +`use_document_level_security`:: +Restrict access to documents based on a user's permissions. +Refer to <> for more details. + +[discrete#es-connectors-servicenow-client-documents-syncs] +===== Documents and syncs + +All services and records the user has access to will be indexed according to the configurations provided. +The connector syncs the following ServiceNow object types: + +* Records +* Attachments + +[NOTE] +==== +* Content from files bigger than 10 MB won't be extracted. Use the <> to handle larger binary files. +* Permissions are not synced by default. You must enable <>. Otherwise, **all documents** indexed to an Elastic deployment will be visible to **all users with access** to that Elastic Deployment. +==== + +[discrete#es-connectors-servicenow-client-sync-types] +====== Sync types + +<> are supported by default for all connectors. + +This connector also supports <>. + +[discrete#es-connectors-servicenow-client-dls] +===== Document level security + +<> ensures identities and permissions set in ServiceNow are maintained in Elasticsearch. +This enables you to restrict and personalize read-access users and groups have to documents in this index. +Access control syncs ensure this metadata is kept up to date in your Elasticsearch documents. + +The ServiceNow connector supports roles for access control lists (ACLs) to enable document level security in {es}. +For default services, connectors use the following roles to find users who have access to documents. + +|=== +| Service | Roles + +| User | `admin` + +| Incident | `admin`, `sn_incident_read`, `ml_report_user`, `ml_admin`, `itil` + +| Requested Item | `admin`, `sn_request_read`, `asset`, `atf_test_designer`, `atf_test_admin` + +| Knowledge | `admin`, `knowledge`, `knowledge_manager`, `knowledge_admin` + +| Change Request | `admin`, `sn_change_read`, `itil` +|=== + +For services other than these defaults, the connector iterates over access controls with `read` operations and finds the respective roles for those services. + +[NOTE] +==== +The ServiceNow connector does not support scripted and conditional permissions. +==== + +[discrete#es-connectors-servicenow-client-docker] +===== Deployment using Docker + +include::_connectors-docker-instructions.asciidoc[] + +[discrete#es-connectors-servicenow-client-sync-rules] +===== Sync rules + +<> are identical for all connectors and are available by default. + +[discrete#es-connectors-servicenow-client-sync-rules-advanced] +====== Advanced sync rules + +[NOTE] +==== +A <> is required for advanced sync rules to take effect. +==== + +Advanced sync rules are defined through a source-specific DSL JSON snippet. + +The following sections provide examples of advanced sync rules for this connector. + +[discrete#es-connectors-servicenow-client-sync-rules-number-incident-service] +======= Indexing document based on incident number for Incident service + +[source,js] +---- +[ + { + "service": "Incident", + "query": "numberSTARTSWITHINC001" + } +] +---- +// NOTCONSOLE + +[discrete#es-connectors-servicenow-client-sync-rules-active-false-user-service] +======= Indexing document based on user activity state for User service + +[source,js] +---- +[ + { + "service": "User", + "query": "active=False" + } +] +---- +// NOTCONSOLE + +[discrete#es-connectors-servicenow-client-sync-rules-author-administrator-knowledge-service] +======= Indexing document based on author name for Knowledge service + +[source,js] +---- +[ + { + "service": "Knowledge", + "query": "author.nameSTARTSWITHSystem Administrator" + } +] +---- +// NOTCONSOLE + +[discrete#es-connectors-servicenow-client-connector-client-operations-testing] +===== End-to-end Testing + +The connector framework enables operators to run functional tests against a real data source. +Refer to <> for more details. + +To perform E2E testing for the ServiceNow connector, run the following command: + +[source,shell] +---- +$ make ftest NAME=servicenow +---- + +Generate performance reports using the following flag: `PERF8=yes`. +Toggle test data set size between SMALL, MEDIUM and LARGE with the argument `DATA_SIZE=`. +By default, it is set to `MEDIUM`. + +Users do not need to have a running Elasticsearch instance or a ServiceNow source to run this test. +Docker Compose manages the complete setup of the development environment. + +[discrete#es-connectors-servicenow-client-known-issues] +===== Known issues + +There are no known issues for this connector. +Refer to <> for a list of known issues that impact all connectors. + +[discrete#es-connectors-servicenow-client-troubleshooting] +===== Troubleshooting + +See <>. + +[discrete#es-connectors-servicenow-client-security] +===== Security + +See <>. + +[discrete#es-connectors-servicenow-client-content-extraction] +===== Content extraction + +See <>. + + +// Closing the collapsible section +=============== diff --git a/docs/reference/connector/docs/connectors-sharepoint-online.asciidoc b/docs/reference/connector/docs/connectors-sharepoint-online.asciidoc new file mode 100644 index 0000000000000..95ff8223b4d20 --- /dev/null +++ b/docs/reference/connector/docs/connectors-sharepoint-online.asciidoc @@ -0,0 +1,988 @@ +[#es-connectors-sharepoint-online] +=== Elastic SharePoint Online connector reference +++++ +SharePoint Online +++++ +// Attributes used in this file +:service-name: SharePoint Online +:service-name-stub: sharepoint_online + +[TIP] +==== +Looking for the SharePoint *Server* connector? See <>. +==== + +The _Elastic SharePoint Online connector_ is a <> for https://www.microsoft.com/en-ww/microsoft-365/sharepoint/[Microsoft SharePoint Online^]. + +This connector is written in Python using the {connectors-python}[Elastic connector framework^]. + +View the {connectors-python}/connectors/sources/{service-name-stub}.py[*source code* for this connector^] (branch _{connectors-branch}_, compatible with Elastic _{minor-version}_). + +.Choose your connector reference +******************************* +Are you using a managed connector on Elastic Cloud or a self-managed connector? Expand the documentation based on your deployment method. +******************************* + +// //////// //// //// //// //// //// //// //////// +// //////// NATIVE CONNECTOR REFERENCE /////// +// //////// //// //// //// //// //// //// //////// + +[discrete#es-connectors-sharepoint-online-native-connector-reference] +==== *Elastic managed connector reference* + +.View *Elastic managed connector* reference + +[%collapsible] +=============== + +[discrete#es-connectors-sharepoint-online-availability-prerequisites] +===== Availability and prerequisites + +This connector is available as a *managed connector* in Elastic versions *8.9.0 and later*. +To use this connector natively in Elastic Cloud, satisfy all <>. + +[NOTE] +==== +This connector requires a subscription. +View the requirements for this feature under the *Elastic Search* section of the https://www.elastic.co/subscriptions[Elastic Stack subscriptions^] page. +==== + +[discrete#es-connectors-sharepoint-online-usage] +===== Usage + +To use this connector as a *managed connector*, see <>. + +For additional operations, see <>. + +[discrete#es-connectors-sharepoint-online-sharepoint-prerequisites] +===== SharePoint prerequisites + +[discrete#es-connectors-sharepoint-online-oauth-app-create] +====== Create SharePoint OAuth app + +Before you can configure the connector, you must create an **OAuth App** in the SharePoint Online platform. +Your connector will authenticate to SharePoint as the registered OAuth application/client. +You'll collect values (`client ID`, `tenant ID`, and `client secret`) during this process that you'll need for the <> in Kibana. + +To get started, first log in to SharePoint Online and access your administrative dashboard. +Ensure you are logged in as the Azure Portal **service account**. + +Follow these steps: + +* Sign in to https://portal.azure.com/ and click on **Azure Active Directory**. +* Locate **App Registrations** and Click **New Registration**. +* Give your app a name - like "Search". +* Leave the *Redirect URIs* blank for now. +* *Register* the application. +* Find and keep the **Application (client) ID** and **Directory (tenant) ID** handy. +* Locate the **Secret** by navigating to **Client credentials: Certificates & Secrets**. +* Select **New client secret** +* Pick a name for your client secret. +Select an expiration date. (At this expiration date, you will need to generate a new secret and update your connector configuration.) +** Save the client secret **Secret ID** before leaving this screen. +** Save the client secret **Value** before leaving this screen. +* Set up the permissions the OAuth App will request from the Azure Portal service account. +** Navigate to **API Permissions** and click **Add Permission**. +** Add **application permissions** until the list looks like the following: ++ +``` +Graph API +- Sites.Read.All +- Files.Read.All +- Group.Read.All +- User.Read.All + +Sharepoint +- Sites.Read.All +``` +* **Grant admin consent**, using the `Grant Admin Consent` link from the permissions screen. +* Save the tenant name (i.e. Domain name) of Azure platform. + +[WARNING] +==== +The connector requires application permissions. It does not support delegated permissions (scopes). +==== + +[NOTE] +==== +The connector uses the https://learn.microsoft.com/en-us/sharepoint/dev/apis/sharepoint-rest-graph[Graph API^] (stable https://learn.microsoft.com/en-us/graph/api/overview?view=graph-rest-1.0#other-api-versions[v1.0 API^]) where possible to fetch data from Sharepoint Online. +When entities are not available via the Graph API the connector falls back to using the Sharepoint https://learn.microsoft.com/en-us/sharepoint/dev/sp-add-ins/get-to-know-the-sharepoint-rest-service[REST API^]. +==== + +[discrete#es-connectors-sharepoint-online-oauth-app-permissions] +====== SharePoint permissions + +Refer to the following documentation for setting https://learn.microsoft.com/en-us/sharepoint/dev/solution-guidance/security-apponly-azureacs[SharePoint permissions^]. + +* To set `DisableCustomAppAuthentication` to false, connect to SharePoint using PowerShell and run `set-spotenant -DisableCustomAppAuthentication $false` +* To assign full permissions to the tenant in SharePoint Online, go to the tenant URL in your browser. +The URL follows this pattern: `https:///_layouts/15/appinv.aspx`. +This loads the SharePoint admin center page. +** In the *App ID* box, enter the application ID that you recorded earlier, and then click *Lookup*. +The application name will appear in the Title box. +** In the *App Domain* box, type .onmicrosoft.com +** In the *App's Permission Request XML* box, type the following XML string: ++ +[source, xml] +---- + + + + +---- + +.Graph API permissions +**** +Microsoft recommends using Graph API for all operations with Sharepoint Online. Graph API is well-documented and more efficient at fetching data, which helps avoid throttling. +Refer to https://learn.microsoft.com/en-us/sharepoint/dev/general-development/how-to-avoid-getting-throttled-or-blocked-in-sharepoint-online[Microsoft's throttling policies^] for more information. + +Here's a summary of why we use these Graph API permissions: + +* *Sites.Read.All* is used to fetch the sites and their metadata +* *Files.Read.All* is used to fetch Site Drives and files in these drives +* *Groups.Read.All* is used to fetch groups for document-level permissions +* *User.Read.All* is used to fetch user information for document-level permissions + +Due to the way the Graph API is designed, these permissions are "all or nothing" - it's currently impossible to limit access to these resources. +**** + +[discrete#es-connectors-sharepoint-online-compatability] +===== Compatibility + +This connector is compatible with SharePoint Online. + +[discrete#es-connectors-sharepoint-online-configuration] +===== Configuration + +Use the following configuration fields to set up the connector: + +Tenant ID:: +The tenant id for the Azure account hosting the Sharepoint Online instance. + +Tenant Name:: +The tenant name for the Azure account hosting the Sharepoint Online instance. + +Client ID:: +The client id to authenticate with SharePoint Online. + +Secret value:: +The secret value to authenticate with SharePoint Online. + +Comma-separated list of sites:: +List of site collection names or paths to fetch from SharePoint. +When enumerating all sites, these values should be the _names_ of the sites. +Use `*` to include all available sites. +Examples: +* `collection1` +* `collection1,sub-collection` +* `*` ++ +When **not** enumerating all sites, these values should be the _paths_ (URL after `/sites/`) of the sites. +Examples: +* `collection1` +* `collection1,collection1/sub-collection` + +Enumerate all sites?:: +If enabled, the full list of all sites will be fetched from the API, in bulk, and will be filtered down to match the configured list of site names. +If disabled, each path in the configured list of site paths will be fetched individually from the API. +When disabled, `*` is not a valid configuration for `Comma-separated list of sites`. +Enabling this configuration is most useful when syncing large numbers (more than total/200) of sites. +This is because, at high volumes, it is more efficient to fetch sites in bulk. +When syncing fewer sites, disabling this configuration can result in improved performance. +This is because, at low volumes, it is more efficient to only fetch the sites that you need. + +Fetch sub-sites of configured sites?:: +Whether sub-sites of the configured site(s) should be automatically fetched. +This option is only available when not enumerating all sites (see above). + +Enable document level security:: +Toggle to enable <>. +When enabled, full and incremental syncs will fetch access control lists for each document and store them in the `_allow_access_control` field. +Access control syncs will fetch users' access control lists and store them in a separate index. ++ +Once enabled, the following granular permissions toggles will be available: + +** *Fetch drive item permissions*: Enable this option to fetch *drive item* specific permissions. +** *Fetch unique page permissions*: Enable this option to fetch unique *page* permissions. If this setting is disabled a page will inherit permissions from its parent site. +** *Fetch unique list permissions*: Enable this option to fetch unique *list* permissions. If this setting is disabled a list will inherit permissions from its parent site. +** *Fetch unique list item permissions*: Enable this option to fetch unique *list item* permissions. If this setting is disabled a list item will inherit permissions from its parent site. ++ +[NOTE] +==== +If left empty the default value `true` will be used for these granular permissions toggles. +Note that these settings may increase sync times. +==== + +[discrete#es-connectors-sharepoint-online-documents-syncs] +===== Documents and syncs + +The connector syncs the following SharePoint object types: + +* *Sites* (and subsites) +* *Lists* +* *List items* and *attachment content* +* *Document libraries* and *attachment content* (including web pages) + +[NOTE] +==== +* Content from files bigger than 10 MB won't be extracted. (Self-managed connectors can use the <> to handle larger binary files.) +* Permissions are not synced by default. Enable <> to sync permissions. +==== + +[TIP] +==== +*Making Sharepoint Site Pages Web Part content searchable* + +If you're using Web Parts on Sharepoint Site Pages and want to make this content searchable, you'll need to consult the https://learn.microsoft.com/en-us/sharepoint/dev/spfx/web-parts/guidance/integrate-web-part-properties-with-sharepoint#specify-web-part-property-value-type/[official documentation^]. + +We recommend setting `isHtmlString` to *True* for all Web Parts that need to be searchable. +==== + +[discrete#es-connectors-sharepoint-online-documents-syncs-limitations] +====== Limitations + +* The connector does not currently sync content from Teams-connected sites. + +[discrete#es-connectors-sharepoint-online-sync-rules] +===== Sync rules + +_Basic_ sync rules are identical for all connectors and are available by default. +For more information read <>. + +[discrete#es-connectors-sharepoint-online-sync-rules-advanced] +====== Advanced sync rules + +[NOTE] +==== +A <> is required for advanced sync rules to take effect. +==== + +The following section describes *advanced sync rules* for this connector. +Advanced sync rules are defined through a source-specific DSL JSON snippet. + +<> for the Sharepoint Online connector enable you to avoid extracting and syncing older data that might no longer be relevant for search. + +Example: + +[source,js] +---- +{ + "skipExtractingDriveItemsOlderThan": 60 +} +---- +// NOTCONSOLE + +This rule will not extract content of any drive items (files in document libraries) that haven't been modified for 60 days or more. + +[discrete#es-connectors-sharepoint-online-sync-rules-limitations] +======= Limitations of sync rules with incremental syncs + +Changing sync rules after Sharepoint Online content has already been indexed can bring unexpected results, when using <>. + +Incremental syncs ensure _updates_ from 3rd-party system, but do not modify existing documents in the index. + +*To avoid these issues, run a full sync after changing sync rules (basic or advanced).* + +Let's take a look at several examples where incremental syncs might lead to inconsistent data on your index. + +[discrete#es-connectors-sharepoint-online-sync-rules-limitations-restrictive-added] +======== Example: Restrictive basic sync rule added after a full sync + +Imagine your Sharepoint Online drive contains the following drive items: + +[source,txt] +---- +/Documents/Report.doc +/Documents/Spreadsheet.xls +/Presentations/Q4-2020-Report.pdf +/Presentations/Q4-2020-Report-Data.xls +/Personal/Documents/Sales.xls +---- + +After a sync, all these drive items will be stored on your Elasticsearch index. +Let's add a basic sync rule, filtering files by their path: + +[source,txt] +---- +Exclude WHERE path CONTAINS "Documents" +---- + +These filtering rules will exclude all files with "Documents" in their path, leaving only files in `/Presentations` directory: + +[source,txt] +---- +/Presentations/Q4-2020-Report.pdf +/Presentations/Q4-2020-Report-Data.xls +---- + +If no files were changed, incremental sync will not receive information about changes from Sharepoint Online and won't be able to delete any files, leaving the index in the same state it was before the sync. + +After a *full sync*, the index will be updated and files that are excluded by sync rules will be removed. + +[discrete#es-connectors-sharepoint-online-sync-rules-limitations-restrictive-removed] +======== Example: Restrictive basic sync rules removed after a full sync + +Imagine that Sharepoint Online drive has the following drive items: + +[source,txt] +---- +/Documents/Report.doc +/Documents/Spreadsheet.xls +/Presentations/Q4-2020-Report.pdf +/Presentations/Q4-2020-Report-Data.xls +/Personal/Documents/Sales.xls +---- + +Before doing a sync, we add a restrictive basic filtering rule: + +[source,txt] +---- +Exclude WHERE path CONTAINS "Documents" +---- + +After a full sync, the index will contain only files in the `/Presentations` directory: + +[source,txt] +---- +/Presentations/Q4-2020-Report.pdf +/Presentations/Q4-2020-Report-Data.xls +---- + +Afterwards, we can remove the filtering rule and run an incremental sync. If no changes happened to the files, incremental sync will not mirror these changes in the Elasticsearch index, because Sharepoint Online will not report any changes to the items. +Only a *full sync* will include the items previously ignored by the sync rule. + +[discrete#es-connectors-sharepoint-online-sync-rules-limitations-restrictive-changed] +======== Example: Advanced sync rules edge case + +Advanced sync rules can be applied to limit which documents will have content extracted. +For example, it's possible to set a rule so that documents older than 180 days won't have content extracted. + +However, there is an edge case. +Imagine a document that is 179 days old and its content is extracted and indexed into Elasticsearch. +After 2 days, this document will be 181 days old. +Since this document was already ingested it will not be modified. +Therefore, the content will not be removed from the index, following an incremental sync. + +In this situation, if you want older documents to be removed, you will need to clean the index up manually. +For example, you can manually run an Elasticsearch query that removes drive item content older than 180 days: + +[source, console] +---- +POST INDEX_NAME/_update_by_query?conflicts=proceed +{ + "query": { + "bool": { + "filter": [ + { + "match": { + "object_type": "drive_item" + } + }, + { + "exists": { + "field": "file" + } + }, + { + "range": { + "lastModifiedDateTime": { + "lte": "now-180d" + } + } + } + ] + } + }, + "script": { + "source": "ctx._source.body = ''", + "lang": "painless" + } +} +---- +// TEST[skip:TODO] + +[discrete#es-connectors-sharepoint-online-dls] +===== Document-level security + +Document-level security (DLS) enables you to restrict access to documents based on a user's permissions. +This feature is available by default for this connector. + +Refer to <> on this page for how to enable DLS for this connector. + +[TIP] +==== +Refer to <> to learn how to ingest data from SharePoint Online with DLS enabled, when building a search application. +==== + +[discrete#es-connectors-sharepoint-online-content-extraction] +===== Content extraction + +[discrete#es-connectors-sharepoint-online-content-extraction-pipeline] +====== Default content extraction + +The default content extraction service is powered by the Enterprise Search default ingest pipeline. +(See {ref}/ingest-pipeline-search.html[Ingest pipelines for Search indices].) + +See <>. + +[discrete#es-connectors-sharepoint-online-content-extraction-local] +====== Local content extraction (for large files) + +The SharePoint Online self-managed connector supports large file content extraction (> *100MB*). +This requires: + +* A self-managed deployment of the Elastic Text Extraction Service. +* Text extraction to be _disabled_ in the default ingest pipeline settings. + +Refer to <> for more information. + +[discrete#es-connectors-sharepoint-online-known-issues] +===== Known issues + +* *Documents failing to sync due to SharePoint file and folder limits* ++ +SharePoint has limits on the number of files and folders that can be synced. +You might encounter an error like the following written to the body of documents that failed to sync: +`The file size exceeds the allowed limit. CorrelationId: fdb36977-7cb8-4739-992f-49878ada6686, UTC DateTime: 4/21/2022 11:24:22 PM` ++ +Refer to https://support.microsoft.com/en-us/office/download-files-and-folders-from-onedrive-or-sharepoint-5c7397b7-19c7-4893-84fe-d02e8fa5df05#:~:text=Downloads%20are%20subject%20to%20the,zip%20file%20and%2020GB%20overall[SharePoint documentation^] for more information about these limits. ++ +** *Syncing a large number of files* ++ +The connector will fail to download files from folders that contain more than 5000 files. +The List View Threshold (default 5000) is a limit that prevents operations with a high performance impact on the SharePoint Online environment. ++ +*Workaround:* Reduce batch size to avoid this issue. ++ +** *Syncing large files* ++ +SharePoint has file size limits, but these are configurable. ++ +*Workaround:* Increase the file size limit. +Refer to https://learn.microsoft.com/en-us/sharepoint/manage-site-collection-storage-limits#set-automatic-or-manual-site-storage-limits[SharePoint documentation^] for more information. ++ +** *Deleted documents counter is not updated during incremental syncs* ++ +If the configuration `Enumerate All Sites?` is enabled, incremental syncs may not behave as expected. +Drive Item documents that were deleted between incremental syncs may not be detected as deleted. ++ +*Workaround*: Disable `Enumerate All Sites?`, and configure full site paths for all desired sites. + +Refer to <> for a list of known issues for all connectors. + +[discrete#es-connectors-sharepoint-online-troubleshooting] +===== Troubleshooting + +See <>. + +[discrete#es-connectors-sharepoint-onlinesecurity] +===== Security + +See <>. + +// Closing the collapsible section +=============== + +// //////// //// //// //// //// //// //// //////// +// //////// CONNECTOR CLIENT REFERENCE /////// +// //////// //// //// //// //// //// //// //////// + +[discrete#es-connectors-sharepoint-online-connector-client-reference] +==== *Self-managed connector* + +.View *self-managed connector* reference + +[%collapsible] +=============== + +[discrete#es-connectors-sharepoint-online-client-availability-prerequisites] +===== Availability and prerequisites + +This connector is available as a self-managed *self-managed connector*. +To use this connector as a self-managed connector, satisfy all <>. + +[NOTE] +==== +This connector requires a subscription. +View the requirements for this feature under the *Elastic Search* section of the https://www.elastic.co/subscriptions[Elastic Stack subscriptions^] page. +==== + +[discrete#es-connectors-sharepoint-online-client-usage] +===== Usage + +To use this connector as a *self-managed connector*, see <> +For additional operations, see <>. + +[discrete#es-connectors-sharepoint-online-client-sharepoint-prerequisites] +===== SharePoint prerequisites + +[discrete#es-connectors-sharepoint-online-client-oauth-app-create] +====== Create SharePoint OAuth app + +Before you can configure the connector, you must create an **OAuth App** in the SharePoint Online platform. +Your connector will authenticate to SharePoint as the registered OAuth application/client. +You'll collect values (`client ID`, `tenant ID`, and `client secret`) during this process that you'll need for the <> in Kibana. + +To get started, first log in to SharePoint Online and access your administrative dashboard. +Ensure you are logged in as the Azure Portal **service account**. + +Follow these steps: + +* Sign in to https://portal.azure.com/ and click on **Azure Active Directory**. +* Locate **App Registrations** and Click **New Registration**. +* Give your app a name - like "Search". +* Leave the *Redirect URIs* blank for now. +* *Register* the application. +* Find and keep the **Application (client) ID** and **Directory (tenant) ID** handy. +* Locate the **Secret** by navigating to **Client credentials: Certificates & Secrets**. +* Select **New client secret** +* Pick a name for your client secret. +Select an expiration date. (At this expiration date, you will need to generate a new secret and update your connector configuration.) +** Save the client secret **Secret ID** before leaving this screen. +** Save the client secret **Value** before leaving this screen. +* Set up the permissions the OAuth App will request from the Azure Portal service account. +** Navigate to **API Permissions** and click **Add Permission**. +** Add **application permissions** until the list looks like the following: ++ +``` +Graph API +- Sites.Read.All +- Files.Read.All +- Group.Read.All +- User.Read.All + +Sharepoint +- Sites.Read.All +``` +* **Grant admin consent**, using the `Grant Admin Consent` link from the permissions screen. +* Save the tenant name (i.e. Domain name) of Azure platform. + +[WARNING] +==== +The connector requires application permissions. It does not support delegated permissions (scopes). +==== + +[NOTE] +==== +The connector uses the https://learn.microsoft.com/en-us/sharepoint/dev/apis/sharepoint-rest-graph[Graph API^] (stable https://learn.microsoft.com/en-us/graph/api/overview?view=graph-rest-1.0#other-api-versions[v1.0 API^]) where possible to fetch data from Sharepoint Online. +When entities are not available via the Graph API the connector falls back to using the Sharepoint https://learn.microsoft.com/en-us/sharepoint/dev/sp-add-ins/get-to-know-the-sharepoint-rest-service[REST API^]. +==== + +[discrete#es-connectors-sharepoint-online-client-oauth-app-permissions] +====== SharePoint permissions + +Refer to the following documentation for setting https://learn.microsoft.com/en-us/sharepoint/dev/solution-guidance/security-apponly-azureacs[SharePoint permissions^]. + +* To set `DisableCustomAppAuthentication` to false, connect to SharePoint using PowerShell and run `set-spotenant -DisableCustomAppAuthentication $false` +* To assign full permissions to the tenant in SharePoint Online, go to the tenant URL in your browser. +The URL follows this pattern: `https:///_layouts/15/appinv.aspx`. +This loads the SharePoint admin center page. +** In the *App ID* box, enter the application ID that you recorded earlier, and then click *Lookup*. +The application name will appear in the Title box. +** In the *App Domain* box, type .onmicrosoft.com +** In the *App's Permission Request XML* box, type the following XML string: ++ +[source, xml] +---- + + + + +---- + +.Graph API permissions +**** +Microsoft recommends using Graph API for all operations with Sharepoint Online. Graph API is well-documented and more efficient at fetching data, which helps avoid throttling. +Refer to https://learn.microsoft.com/en-us/sharepoint/dev/general-development/how-to-avoid-getting-throttled-or-blocked-in-sharepoint-online[Microsoft's throttling policies^] for more information. + +Here's a summary of why we use these Graph API permissions: + +* *Sites.Read.All* is used to fetch the sites and their metadata +* *Files.Read.All* is used to fetch Site Drives and files in these drives +* *Groups.Read.All* is used to fetch groups for document-level permissions +* *User.Read.All* is used to fetch user information for document-level permissions + +Due to the way the Graph API is designed, these permissions are "all or nothing" - it's currently impossible to limit access to these resources. +**** + +[discrete#es-connectors-sharepoint-online-client-compatability] +===== Compatibility + +This connector is compatible with SharePoint Online. + +[discrete#es-connectors-sharepoint-online-client-configuration] +===== Configuration + +[TIP] +==== +When using the <> workflow, initially these fields will use the default configuration set in the {connectors-python}/connectors/sources/sharepoint_online.py[connector source code^]. +These are set in the `get_default_configuration` function definition. + +These configurable fields will be rendered with their respective *labels* in the Kibana UI. +Once connected, you'll be able to update these values in Kibana. +==== + +Use the following configuration fields to set up the connector: + +`tenant_id`:: +The tenant id for the Azure account hosting the Sharepoint Online instance. + +`tenant_name`:: +The tenant name for the Azure account hosting the Sharepoint Online instance. + +`client_id`:: +The client id to authenticate with SharePoint Online. + +`secret_value`:: +The secret value to authenticate with SharePoint Online. + +`site_collections`:: +List of site collection names or paths to fetch from SharePoint. +When enumerating all sites, these values should be the _names_ of the sites. +Use `*` to include all available sites. +Examples: +* `collection1` +* `collection1,sub-collection` +* `*` ++ +When **not** enumerating all sites, these values should be the _paths_ (URL after `/sites/`) of the sites. +Examples: +* `collection1` +* `collection1,collection1/sub-collection` + +`enumerate_all_sites`:: +If enabled, the full list of all sites will be fetched from the API, in bulk, and will be filtered down to match the configured list of site names. +If disabled, each path in the configured list of site paths will be fetched individually from the API. +Enabling this configuration is most useful when syncing large numbers (more than total/200) of sites. +This is because, at high volumes, it is more efficient to fetch sites in bulk. +When syncing fewer sites, disabling this configuration can result in improved performance. +This is because, at low volumes, it is more efficient to only fetch the sites that you need. ++ +[NOTE] +==== +When disabled, `*` is not a valid configuration for `Comma-separated list of sites`. +==== + +`fetch_subsites`:: +Whether sub-sites of the configured site(s) should be automatically fetched. +This option is only available when not enumerating all sites (see above). + +`use_text_extraction_service`:: +Toggle to enable local text extraction service for documents. +Requires a separate deployment of the <>. +Requires that ingest pipeline settings disable text extraction. +Default value is `False`. + +`use_document_level_security`:: +Toggle to enable <>. +When enabled, full and incremental syncs will fetch access control lists for each document and store them in the `_allow_access_control` field. +Access control syncs will fetch users' access control lists and store them in a separate index. ++ +Once enabled, the following granular permissions toggles will be available: + +** *Fetch drive item permissions*: Enable this option to fetch *drive item* specific permissions. +** *Fetch unique page permissions*: Enable this option to fetch unique *page* permissions. If this setting is disabled a page will inherit permissions from its parent site. +** *Fetch unique list permissions*: Enable this option to fetch unique *list* permissions. If this setting is disabled a list will inherit permissions from its parent site. +** *Fetch unique list item permissions*: Enable this option to fetch unique *list item* permissions. If this setting is disabled a list item will inherit permissions from its parent site. ++ +[NOTE] +==== +If left empty the default value `true` will be used for these granular permissions toggles. +Note that these settings may increase sync times. +==== + +[discrete#es-connectors-sharepoint-online-client-docker] +===== Deployment using Docker + +include::_connectors-docker-instructions.asciidoc[] + +[discrete#es-connectors-sharepoint-online-client-documents-syncs] +===== Documents and syncs + +The connector syncs the following SharePoint object types: + +* *Sites* (and subsites) +* *Lists* +* *List items* and *attachment content* +* *Document libraries* and *attachment content* (including web pages) + +[TIP] +==== +*Making Sharepoint Site Pages Web Part content searchable* + +If you're using Web Parts on Sharepoint Site Pages and want to make this content searchable, you'll need to consult the https://learn.microsoft.com/en-us/sharepoint/dev/spfx/web-parts/guidance/integrate-web-part-properties-with-sharepoint#specify-web-part-property-value-type/[official documentation^]. + +We recommend setting `isHtmlString` to *True* for all Web Parts that need to be searchable. +==== + +[NOTE] +==== +* Content from files bigger than 10 MB won't be extracted by default. Use the <> to handle larger binary files. +* Permissions are not synced by default. Enable <> to sync permissions. +==== + +[discrete#es-connectors-sharepoint-online-client-documents-syncs-limitations] +====== Limitations + +* The connector does not currently sync content from Teams-connected sites. + +[discrete#es-connectors-sharepoint-online-client-sync-rules] +===== Sync rules + +_Basic_ sync rules are identical for all connectors and are available by default. +For more information read <>. + +[discrete#es-connectors-sharepoint-online-client-sync-rules-advanced] +====== Advanced sync rules + +[NOTE] +==== +A <> is required for advanced sync rules to take effect. +==== + +The following section describes *advanced sync rules* for this connector. +Advanced sync rules are defined through a source-specific DSL JSON snippet. + +<> for the Sharepoint Online connector enable you to avoid extracting and syncing older data that might no longer be relevant for search. + +Example: + +[source,js] +---- +{ + "skipExtractingDriveItemsOlderThan": 60 +} + +---- +// NOTCONSOLE + +This rule will not extract content of any drive items (files in document libraries) that haven't been modified for 60 days or more. + +[discrete#es-connectors-sharepoint-online-client-sync-rules-limitations] +======= Limitations of sync rules with incremental syncs + +Changing sync rules after Sharepoint Online content has already been indexed can bring unexpected results, when using <>. + +Incremental syncs ensure _updates_ from 3rd-party system, but do not modify existing documents in the index. + +*To avoid these issues, run a full sync after changing sync rules (basic or advanced).* + +Let's take a look at several examples where incremental syncs might lead to inconsistent data on your index. + +[discrete#es-connectors-sharepoint-online-client-sync-rules-limitations-restrictive-added] +======== Example: Restrictive basic sync rule added after a full sync + +Imagine your Sharepoint Online drive contains the following drive items: + +[source,txt] +---- +/Documents/Report.doc +/Documents/Spreadsheet.xls +/Presentations/Q4-2020-Report.pdf +/Presentations/Q4-2020-Report-Data.xls +/Personal/Documents/Sales.xls +---- + +After a sync, all these drive items will be stored on your Elasticsearch index. +Let's add a basic sync rule, filtering files by their path: + +[source,txt] +---- +Exclude WHERE path CONTAINS "Documents" +---- + +These filtering rules will exclude all files with "Documents" in their path, leaving only files in `/Presentations` directory: + +[source,txt] +---- +/Presentations/Q4-2020-Report.pdf +/Presentations/Q4-2020-Report-Data.xls +---- + +If no files were changed, incremental sync will not receive information about changes from Sharepoint Online and won't be able to delete any files, leaving the index in the same state it was before the sync. + +After a *full sync*, the index will be updated and files that are excluded by sync rules will be removed. + +[discrete#es-connectors-sharepoint-online-client-sync-rules-limitations-restrictive-removed] +======== Example: Restrictive basic sync rules removed after a full sync + +Imagine that Sharepoint Online drive has the following drive items: + +[source,txt] +---- +/Documents/Report.doc +/Documents/Spreadsheet.xls +/Presentations/Q4-2020-Report.pdf +/Presentations/Q4-2020-Report-Data.xls +/Personal/Documents/Sales.xls +---- + +Before doing a sync, we add a restrictive basic filtering rule: + +[source,txt] +---- +Exclude WHERE path CONTAINS "Documents" +---- + +After a full sync, the index will contain only files in the `/Presentations` directory: + +[source,txt] +---- +/Presentations/Q4-2020-Report.pdf +/Presentations/Q4-2020-Report-Data.xls +---- + +Afterwards, we can remove the filtering rule and run an incremental sync. If no changes happened to the files, incremental sync will not mirror these changes in the Elasticsearch index, because Sharepoint Online will not report any changes to the items. +Only a *full sync* will include the items previously ignored by the sync rule. + +[discrete#es-connectors-sharepoint-online-client-sync-rules-limitations-restrictive-changed] +======== Example: Advanced sync rules edge case + +Advanced sync rules can be applied to limit which documents will have content extracted. +For example, it's possible to set a rule so that documents older than 180 days won't have content extracted. + +However, there is an edge case. +Imagine a document that is 179 days old and its content is extracted and indexed into Elasticsearch. +After 2 days, this document will be 181 days old. +Since this document was already ingested it will not be modified. +Therefore, the content will not be removed from the index, following an incremental sync. + +In this situation, if you want older documents to be removed, you will need to clean the index up manually. +For example, you can manually run an Elasticsearch query that removes drive item content older than 180 days: + +[source, console] +---- +POST INDEX_NAME/_update_by_query?conflicts=proceed +{ + "query": { + "bool": { + "filter": [ + { + "match": { + "object_type": "drive_item" + } + }, + { + "exists": { + "field": "file" + } + }, + { + "range": { + "lastModifiedDateTime": { + "lte": "now-180d" + } + } + } + ] + } + }, + "script": { + "source": "ctx._source.body = ''", + "lang": "painless" + } +} +---- +// TEST[skip:TODO] + +[discrete#es-connectors-sharepoint-online-client-dls] +===== Document-level security + +Document-level security (DLS) enables you to restrict access to documents based on a user's permissions. +This feature is available by default for this connector. + +Refer to <> on this page for how to enable DLS for this connector. + +[TIP] +==== +Refer to <> to learn how to ingest data from SharePoint Online with DLS enabled, when building a search application. +==== + +[discrete#es-connectors-sharepoint-online-client-content-extraction] +===== Content extraction + +[discrete#es-connectors-sharepoint-online-client-content-extraction-pipeline] +====== Default content extraction + +The default content extraction service is powered by the Enterprise Search default ingest pipeline. +(See {ref}/ingest-pipeline-search.html[Ingest pipelines for Search indices].) + +See <>. + +[discrete#es-connectors-sharepoint-online-client-content-extraction-local] +====== Local content extraction (for large files) + +The SharePoint Online self-managed connector supports large file content extraction (> *100MB*). +This requires: + +* A self-managed deployment of the Elastic Text Extraction Service. +* Text extraction to be _disabled_ in the default ingest pipeline settings. + +Refer to <> for more information. + +[discrete#es-connectors-sharepoint-online-client-testing] +===== End-to-end testing + +The connector framework enables operators to run functional tests against a real data source. +Refer to <> for more details. + +To perform E2E testing for the SharePoint Online connector, run the following command: + +[source,shell] +---- +$ make ftest NAME=sharepoint_online +---- + +For faster tests, add the `DATA_SIZE=small` flag: + +[source,shell] +---- +make ftest NAME=sharepoint_online DATA_SIZE=small +---- + +[discrete#es-connectors-sharepoint-online-client-known-issues] +===== Known issues + +* *Documents failing to sync due to SharePoint file and folder limits* ++ +SharePoint has limits on the number of files and folders that can be synced. +You might encounter an error like the following written to the body of documents that failed to sync: +`The file size exceeds the allowed limit. CorrelationId: fdb36977-7cb8-4739-992f-49878ada6686, UTC DateTime: 4/21/2022 11:24:22 PM` ++ +Refer to https://support.microsoft.com/en-us/office/download-files-and-folders-from-onedrive-or-sharepoint-5c7397b7-19c7-4893-84fe-d02e8fa5df05#:~:text=Downloads%20are%20subject%20to%20the,zip%20file%20and%2020GB%20overall[SharePoint documentation^] for more information about these limits. ++ +** *Syncing a large number of files* ++ +The connector will fail to download files from folders that contain more than 5000 files. +The List View Threshold (default 5000) is a limit that prevents operations with a high performance impact on the SharePoint Online environment. ++ +*Workaround:* Reduce batch size to avoid this issue. ++ +** *Syncing large files* ++ +SharePoint has file size limits, but these are configurable. ++ +*Workaround:* Increase the file size limit. +Refer to https://learn.microsoft.com/en-us/sharepoint/manage-site-collection-storage-limits#set-automatic-or-manual-site-storage-limits[SharePoint documentation^] for more information. ++ +** *Deleted documents counter is not updated during incremental syncs* ++ +If the configuration `Enumerate All Sites?` is enabled, incremental syncs may not behave as expected. +Drive Item documents that were deleted between incremental syncs may not be detected as deleted. ++ +*Workaround*: Disable `Enumerate All Sites?`, and configure full site paths for all desired sites. + +Refer to <> for a list of known issues for all connectors. + +[discrete#es-connectors-sharepoint-online-client-troubleshooting] +===== Troubleshooting + +See <>. + +[discrete#es-connectors-sharepoint-online-client-security] +===== Security + +See <>. + +// Closing the collapsible section +=============== diff --git a/docs/reference/connector/docs/connectors-sharepoint.asciidoc b/docs/reference/connector/docs/connectors-sharepoint.asciidoc new file mode 100644 index 0000000000000..f5590daa1e701 --- /dev/null +++ b/docs/reference/connector/docs/connectors-sharepoint.asciidoc @@ -0,0 +1,412 @@ +[#es-connectors-sharepoint] +=== Elastic SharePoint Server connector reference +++++ +SharePoint Server +++++ +// Attributes used in this file +:service-name: SharePoint Server +:service-name-stub: sharepoint_server + +The _Elastic SharePoint Server connector_ is a <> for https://www.microsoft.com/en-ww/microsoft-365/sharepoint/[Microsoft SharePoint Server^]. + +This connector is written in Python using the open code {connectors-python}[Elastic connector framework^]. +View the {connectors-python}/connectors/sources/sharepoint_server.py[source code for this connector^]. + +[TIP] +==== +Looking for the SharePoint *Online* connector? See the <>. +==== + + +// //////// //// //// //// //// //// //// //////// +// //////// NATIVE CONNECTOR REFERENCE (MANAGED SERVICE) /////// +// //////// //// //// //// //// //// //// //////// + +[discrete#es-connectors-sharepoint-native-connector-reference] +==== *Elastic managed connector reference* + +.View *Elastic managed connector* reference +[%collapsible] +=============== + +[discrete#es-connectors-sharepoint-availability-prerequisites] +===== Availability and prerequisites + +This connector is available as a managed service since Elastic *8.15.0*. +To use this connector, satisfy all <>. + +[NOTE] +==== +This connector is in *beta* and is subject to change. +Beta features are subject to change and are not covered by the support SLA of generally available (GA) features. +Elastic plans to promote this feature to GA in a future release. +==== + +[discrete#es-connectors-sharepoint-create-connector-client] +===== Create a {service-name} connector +include::_connectors-create-native.asciidoc[] + +[discrete#es-connectors-sharepoint-usage] +===== Usage + +See <>. + +For additional operations, see <>. + +[discrete#es-connectors-sharepoint-compatability] +===== Compatibility + +The following SharePoint Server versions are compatible: + +* SharePoint 2013 +* SharePoint 2016 +* SharePoint 2019 + +[discrete#es-connectors-sharepoint-configuration] +===== Configuration + +The following configuration fields are required to set up the connector: + +`username`:: +The username of the account for the SharePoint Server instance. + +`password`:: +The password of the account. + +`host_url`:: +The server host url where the SharePoint Server instance is hosted. Examples: +* `https://192.158.1.38:8080` +* `https://.sharepoint.com` + +`site_collections`:: +Comma-separated list of site collections to fetch from SharePoint Server. Examples: +* `collection1` +* `collection1, collection2` + +`ssl_enabled`:: +Whether SSL verification will be enabled. +Default value is `False`. + +`ssl_ca`:: +Content of SSL certificate needed for SharePoint Server. +Keep this field empty, if `ssl_enabled` is set to `False`. ++ +Example certificate: ++ +[source, txt] +---- +-----BEGIN CERTIFICATE----- +MIID+jCCAuKgAwIBAgIGAJJMzlxLMA0GCSqGSIb3DQEBCwUAMHoxCzAJBgNVBAYT +... +7RhLQyWn2u00L7/9Omw= +-----END CERTIFICATE----- +---- + +`retry_count`:: +The number of retry attempts after a failed request to the SharePoint Server instance. Default value is `3`. + +`use_document_level_security`:: +Toggle to enable <>. +When enabled, full syncs will fetch access control lists for each document and store them in the `_allow_access_control` field. Access control syncs fetch users' access control lists and store them in a separate index. ++ +Once enabled, the following granular permissions toggles will be available: ++ +** *Fetch unique list permissions*: Enable this option to fetch unique *list* permissions. If this setting is disabled a list will inherit permissions from its parent site. +** *Fetch unique list item permissions*: Enable this option to fetch unique *list item* permissions. If this setting is disabled a list item will inherit permissions from its parent site. ++ +[NOTE] +==== +If left empty the default value `true` will be used for these granular permissions toggles. +Note that these settings may increase sync times. +==== + +[discrete#es-connectors-sharepoint-documents-syncs] +===== Documents and syncs + +The connector syncs the following SharePoint object types: + +* Sites and Subsites +* Lists +* List Items and its attachment content +* Document Libraries and its attachment content(include Web Pages) + +[NOTE] +==== +* Content from files bigger than 10 MB won't be extracted by default. Use the <> to handle larger binary files. +* Permissions are not synced. **All documents** indexed to an Elastic deployment will be visible to **all users with access** to that Elasticsearch Index. +==== + +[discrete#es-connectors-sharepoint-sync-types] +====== Sync types + +<> are supported by default for all connectors. + +This connector also supports <>. + +[discrete#es-connectors-sharepoint-document-level-security] +===== Document level security + +Document level security (DLS) enables you to restrict access to documents based on a user’s permissions. +Refer to <> on this page for how to enable DLS for this connector. + +[NOTE] +==== +Refer to <> to learn how to ingest data from a connector with DLS enabled, when building a search application. +The example uses SharePoint _Online_ as the data source, but the same steps apply to every connector. +==== + +[discrete#es-connectors-sharepoint-sync-rules] +===== Sync rules + +<> are identical for all connectors and are available by default. + +Advanced sync rules are not available for this connector in the present version. +Currently filtering is controlled via ingest pipelines. + +[discrete#es-connectors-sharepoint-content-extraction] +===== Content Extraction + +See <>. + +[discrete#es-connectors-sharepoint-known-issues] +===== Known issues + +There are currently no known issues for this connector. +Refer to <> for a list of known issues for all connectors. + +[discrete#es-connectors-sharepoint-troubleshooting] +===== Troubleshooting + +See <>. + +[discrete#es-connectors-sharepoint-security] +===== Security + +See <>. + +[discrete#es-connectors-sharepoint-source] +===== Framework and source + +This connector is written in Python using the {connectors-python}[Elastic connector framework^]. + +View the {connectors-python}/connectors/sources/sharepoint_server.py[source code for this connector^] (branch _{connectors-branch}_, compatible with Elastic _{minor-version}_). + +// Closing the collapsible section +=============== + + +// //////// //// //// //// //// //// //// //////// +// //////// CONNECTOR CLIENT REFERENCE (SELF-MANAGED) /////// +// //////// //// //// //// //// //// //// //////// + +[discrete#es-connectors-sharepoint-connector-client-reference] +==== *Self-managed connector reference* + +.View *self-managed connector* reference +[%collapsible] +=============== + +[discrete#es-connectors-sharepoint-client-availability-prerequisites] +===== Availability and prerequisites + +This connector is available as a self-managed *self-managed connector*. +This self-managed connector is compatible with Elastic versions *8.9.0+*. +To use this connector, satisfy all <>. + +[NOTE] +==== +This connector is in *beta* and is subject to change. +Beta features are subject to change and are not covered by the support SLA of generally available (GA) features. +Elastic plans to promote this feature to GA in a future release. +==== + +[discrete#es-connectors-sharepoint-client-create-connector-client] +===== Create a {service-name} connector +include::_connectors-create-client.asciidoc[] + +[discrete#es-connectors-sharepoint-client-usage] +===== Usage + +To use this connector as a *self-managed connector*, see <>. + +For additional operations, see <>. + +[discrete#es-connectors-sharepoint-client-compatability] +===== Compatibility + +The following SharePoint Server versions are compatible with the Elastic connector framework: + +* SharePoint 2013 +* SharePoint 2016 +* SharePoint 2019 + +[discrete#es-connectors-sharepoint-client-configuration] +===== Configuration + +[TIP] +==== +When using the <> workflow, initially these fields will use the default configuration set in the {connectors-python}/connectors/sources/sharepoint_server.py[connector source code^]. +These are set in the `get_default_configuration` function definition. + +These configurable fields will be rendered with their respective *labels* in the Kibana UI. +Once connected, you'll be able to update these values in Kibana. +==== + +The following configuration fields are required to set up the connector: + +`username`:: +The username of the account for the SharePoint Server instance. + +`password`:: +The password of the account. + +`host_url`:: +The server host url where the SharePoint Server instance is hosted. Examples: +* `https://192.158.1.38:8080` +* `https://.sharepoint.com` + +`site_collections`:: +Comma-separated list of site collections to fetch from SharePoint Server. Examples: +* `collection1` +* `collection1, collection2` + +`ssl_enabled`:: +Whether SSL verification will be enabled. +Default value is `False`. + +`ssl_ca`:: +Content of SSL certificate needed for the SharePoint Server instance. +Keep this field empty, if `ssl_enabled` is set to `False`. ++ +Example certificate: ++ +[source, txt] +---- +-----BEGIN CERTIFICATE----- +MIID+jCCAuKgAwIBAgIGAJJMzlxLMA0GCSqGSIb3DQEBCwUAMHoxCzAJBgNVBAYT +... +7RhLQyWn2u00L7/9Omw= +-----END CERTIFICATE----- +---- + +`retry_count`:: +The number of retry attempts after failed request to the SharePoint Server instance. Default value is `3`. + +`use_document_level_security`:: +Toggle to enable <>. +When enabled, full syncs will fetch access control lists for each document and store them in the `_allow_access_control` field. Access control syncs fetch users' access control lists and store them in a separate index. ++ +Once enabled, the following granular permissions toggles will be available: ++ +** *Fetch unique list permissions*: Enable this option to fetch unique *list* permissions. If this setting is disabled a list will inherit permissions from its parent site. +** *Fetch unique list item permissions*: Enable this option to fetch unique *list item* permissions. If this setting is disabled a list item will inherit permissions from its parent site. ++ +[NOTE] +==== +If left empty the default value `true` will be used for these granular permissions toggles. +Note that these settings may increase sync times. +==== + +[discrete#es-connectors-sharepoint-client-docker] +===== Deployment using Docker + +include::_connectors-docker-instructions.asciidoc[] + +[discrete#es-connectors-sharepoint-client-documents-syncs] + +===== Documents and syncs + +The connector syncs the following SharePoint object types: + +* Sites and Subsites +* Lists +* List Items and its attachment content +* Document Libraries and its attachment content(include Web Pages) + +[NOTE] +==== +* Content from files bigger than 10 MB won't be extracted by default. Use the <> to handle larger binary files. +* Permissions are not synced. **All documents** indexed to an Elastic deployment will be visible to **all users with access** to that Elasticsearch Index. +==== + +[discrete#es-connectors-sharepoint-client-sync-types] +====== Sync types + +<> are supported by default for all connectors. + +This connector also supports <>, but this feature is currently disabled by default. +Refer to the linked documentation for enabling incremental syncs. + +[discrete#es-connectors-sharepoint-client-document-level-security] +===== Document level security + +Document level security (DLS) enables you to restrict access to documents based on a user’s permissions. +Refer to <> on this page for how to enable DLS for this connector. + +[NOTE] +==== +Refer to <> to learn how to ingest data from a connector with DLS enabled, when building a search application. +The example uses SharePoint Online as the data source, but the same steps apply to every connector. +==== + +[discrete#es-connectors-sharepoint-client-sync-rules] +===== Sync rules + +<> are identical for all connectors and are available by default. + +Advanced sync rules are not available for this connector in the present version. +Currently filtering is controlled via ingest pipelines. + +[discrete#es-connectors-sharepoint-client-content-extraction] +===== Content Extraction + +See <>. + +[discrete#es-connectors-sharepoint-client-connector-client-operations] +===== Self-managed connector operations + +[discrete#es-connectors-sharepoint-client-testing] +===== End-to-end testing + +The connector framework enables operators to run functional tests against a real data source. +Refer to <> for more details. + +To perform E2E testing for the sharepoint connector, run the following command: + +[source,shell] +---- +$ make ftest NAME=sharepoint_server +---- + +For faster tests, add the `DATA_SIZE=small` flag: + +[source,shell] +---- +make ftest NAME=sharepoint_server DATA_SIZE=small +---- + +[discrete#es-connectors-sharepoint-client-known-issues] +===== Known issues + +There are currently no known issues for this connector. +Refer to <> for a list of known issues for all connectors. + +[discrete#es-connectors-sharepoint-client-troubleshooting] +===== Troubleshooting + +See <>. + +[discrete#es-connectors-sharepoint-client-security] +===== Security + +See <>. + +[discrete#es-connectors-sharepoint-client-source] +===== Framework and source + +This connector is written in Python using the {connectors-python}[Elastic connector framework^]. + +View the {connectors-python}/connectors/sources/sharepoint_server.py[source code for this connector^] (branch _{connectors-branch}_, compatible with Elastic _{minor-version}_). + +// Closing the collapsible section +=============== diff --git a/docs/reference/connector/docs/connectors-slack.asciidoc b/docs/reference/connector/docs/connectors-slack.asciidoc new file mode 100644 index 0000000000000..059394f28d4ec --- /dev/null +++ b/docs/reference/connector/docs/connectors-slack.asciidoc @@ -0,0 +1,344 @@ +[#es-connectors-slack] +=== Elastic Slack connector reference +++++ +Slack +++++ +// Attributes used in this file +:service-name: Slack +:service-name-stub: slack + +The Slack connector is written in Python using the {connectors-python}[Elastic connector framework^]. + +View the {connectors-python}/connectors/sources/{service-name-stub}.py[*source code* for this connector^] (branch _{connectors-branch}_, compatible with Elastic _{minor-version}_). + +// //////// //// //// //// //// //// //// //////// +// //////// NATIVE CONNECTOR REFERENCE (MANAGED SERVICE) /////// +// //////// //// //// //// //// //// //// //////// + +[discrete#es-connectors-slack-native-connector-reference] +==== *Elastic managed connector reference* + +.View *Elastic managed connector* reference +[%collapsible] +=============== + +[discrete#es-connectors-slack-availability] +===== Availability and prerequisites + +This managed connector was introduced in Elastic *8.14.0* as a managed service on Elastic Cloud. + +To use this connector natively in Elastic Cloud, satisfy all <>. + +[NOTE] +==== +This connector is in **technical preview** and is subject to change. +The design and code is less mature than official GA features and is being provided as-is with no warranties. +Technical preview features are not subject to the support SLA of official GA features. +==== + +[discrete#es-connectors-slack-create-connector-native] +===== Create a {service-name} connector +include::_connectors-create-native.asciidoc[] + +[discrete#es-connectors-slack-usage] +===== Usage + +To use this connector in the UI, select the *Notion* tile when creating a new connector under *Search -> Connectors*. + +If you're already familiar with how connectors work, you can also use the {ref}/connector-apis.html[Connector APIs]. + +For additional operations, see <>. + +[NOTE] +==== +You need to create a Slack application to authenticate with Slack. +==== + +[discrete#es-connectors-slack-app] +====== Create a Slack application + +When created you'll receive a credential that the connector uses for authentication. +A new Bot user will also be created. + +[TIP] +==== +The connector will only sync messages from the channels of which the Bot user is a member. +==== + +To create the app, follow these steps: + +1. Go to https://api.slack.com/apps and click "Create New App". +2. Choose "From Scratch". +3. Name the app, and select the workspace you want to sync from. +Depending on the workspace's settings, you may get a warning about requiring admin approval. +That will be handled later. +4. Navigate to "OAuth & Permissions" in the sidebar. +5. Scroll down to the "Scopes" section and add these scopes: +* `channels:history` +* `channels:read` +* `users:read`. ++ +Optionally, you can also add `channels:join` if you want the App Bot to automatically be able to add itself to public channels. +6. Scroll up to "OAuth Tokens for Your Workspace" and install the application. Your workspace may require you to get administrator approval. If so, request approval now and return to the next step once it has been approved. +7. Copy and save the new "Bot User OAuth Token". +This credential will be used when configuring the connector. + +[discrete#es-connectors-slack-configuration] +===== Configuration + +The following settings are required to set up this connector: + +`token`(required) :: +The Bot User OAuth Token generated by creating and installing your Slack App. + +`fetch_last_n_days`(required) :: +The number of days of history to fetch from Slack. +This must be a positive number to fetch a subset of data, going back that many days. +If set to `0`, it will fetch all data since the beginning of the workspace. +The default is 180 days. + +`auto_join_channels`(required) :: +Whether or not the connector should have the App's Bot User automatically invite itself into all public channels. +The connector will only sync messages from the channels of which the Bot user is a member. +By default, the bot will not invite itself to any channels, and must be manually invited to each channel that you wish to sync. +If this setting is enabled, your App must have the `channels.join` scope. + +`sync_users`(required) :: + +Whether or not the connector should index a document for each Slack user. +By default, the connector will create documents only for Channels and Messages. +However, regardless of the value of this setting, the Slack App does need the `users.read` scope and will make requests to enumerate all of the workspace's users. +This allows the messages to be enriched with human-readable usernames, and not rely on unreadable User UIDs. +Therefore, disabling this setting does not result in a speed improvement, but merely results in less overall storage in Elasticsearch. + +[discrete#es-connectors-slack-sync-rules] +===== Sync rules + +_Basic_ sync rules are identical for all connectors and are available by default. + +Advanced sync rules are not available for this connector in the present version. + +For more information read <>. + +[discrete#es-connectors-slack-content-extraction] +===== Content Extraction + +This connector does not currently support processing Slack attachments or other binary files. + +//See <>. + +[discrete#es-connectors-slack-documents-syncs] +===== Documents and syncs + +The connector syncs the following objects and entities: + +* *Channels* +* *Messages* +* *Users* (configurable) + +[NOTE] +==== +* Only public channels and messages from public channels are synced. +* No permissions are synced. **All documents** indexed to an Elastic deployment will be visible to **all users with access** to that Elastic Deployment. +==== + + +[discrete#es-connectors-slack-known-issues] +===== Known issues + +There are currently no known issues for this connector. +Refer to <> for a list of known issues for all connectors. + +[discrete#es-connectors-slack-troubleshooting] +===== Troubleshooting + +See <>. + +[discrete#es-connectors-slack-security] +===== Security + +See <>. + + +// Closing the collapsible section +=============== + + +// //////// //// //// //// //// //// //// //////// +// //////// CONNECTOR CLIENT REFERENCE (SELF-MANAGED) /////// +// //////// //// //// //// //// //// //// //////// + +[discrete#es-connectors-slack-connector-client-reference] +==== *Self-managed connector reference* + +.View *self-managed connector* reference +[%collapsible] +=============== + +[discrete#es-connectors-slack-client-availability] +===== Availability and prerequisites + +This connector is available as a self-managed *self-managed connector* from the *Elastic connector framework*. + +This self-managed connector is compatible with Elastic versions *8.10.0+*. + +To use this connector, satisfy all <>. + +[NOTE] +==== +This connector is in **technical preview** and is subject to change. +The design and code is less mature than official GA features and is being provided as-is with no warranties. +Technical preview features are not subject to the support SLA of official GA features. +==== + +[discrete#es-connectors-slack-client-create-connector-client] +===== Create a {service-name} connector +include::_connectors-create-client.asciidoc[] + +[discrete#es-connectors-slack-client-usage] +===== Usage + +To use this connector as a **self-managed connector**, use the *Connector* workflow in the Kibana UI. + +For additional operations, see <>. + +[NOTE] +==== +You need to create a Slack application to authenticate with Slack. +==== + +[discrete#es-connectors-slack-client-app] +====== Create a Slack application + +When created you'll receive a credential that the connector uses for authentication. +A new Bot user will also be created. + +[TIP] +==== +The connector will only sync messages from the channels of which the Bot user is a member. +==== + +To create the app, follow these steps: + +1. Go to https://api.slack.com/apps and click "Create New App". +2. Choose "From Scratch". +3. Name the app, and select the workspace you want to sync from. +Depending on the workspace's settings, you may get a warning about requiring admin approval. +That will be handled later. +4. Navigate to "OAuth & Permissions" in the sidebar. +5. Scroll down to the "Scopes" section and add these scopes: +* `channels:history` +* `channels:read` +* `users:read`. ++ +Optionally, you can also add `channels:join` if you want the App Bot to automatically be able to add itself to public channels. +6. Scroll up to "OAuth Tokens for Your Workspace" and install the application. Your workspace may require you to get administrator approval. If so, request approval now and return to the next step once it has been approved. +7. Copy and save the new "Bot User OAuth Token". +This credential will be used when configuring the connector. + +[discrete#es-connectors-slack-client-docker] +===== Deploy with Docker + +include::_connectors-docker-instructions.asciidoc[] + +[discrete#es-connectors-slack-client-configuration] +===== Configuration + +The following settings are required to set up this connector: + +`token`(required) :: +The Bot User OAuth Token generated by creating and installing your Slack App. + +`fetch_last_n_days`(required) :: +The number of days of history to fetch from Slack. +This must be a positive number to fetch a subset of data, going back that many days. +If set to `0`, it will fetch all data since the beginning of the workspace. +The default is 180 days. + +`auto_join_channels`(required) :: +Whether or not the connector should have the App's Bot User automatically invite itself into all public channels. +The connector will only sync messages from the channels of which the Bot user is a member. +By default, the bot will not invite itself to any channels, and must be manually invited to each channel that you wish to sync. +If this setting is enabled, your App must have the `channels.join` scope. + +`sync_users`(required) :: + +Whether or not the connector should index a document for each Slack user. +By default, the connector will create documents only for Channels and Messages. +However, regardless of the value of this setting, the Slack App does need the `users.read` scope and will make requests to enumerate all of the workspace's users. +This allows the messages to be enriched with human-readable usernames, and not rely on unreadable User UIDs. +Therefore, disabling this setting does not result in a speed improvement, but merely results in less overall storage in Elasticsearch. + +[discrete#es-connectors-slack-client-sync-rules] +===== Sync rules + +_Basic_ sync rules are identical for all connectors and are available by default. + +Advanced sync rules are not available for this connector in the present version. + +For more information read <>. + +[discrete#es-connectors-slack-client-content-extraction] +===== Content Extraction + +This connector does not currently support processing Slack attachments or other binary files. + +//See <>. + +[discrete#es-connectors-slack-client-documents-syncs] +===== Documents and syncs + +The connector syncs the following objects and entities: + +* *Channels* +* *Messages* +* *Users* (configurably) + +[NOTE] +==== +* Only public channels and messages from public channels are synced. +* No permissions are synced. **All documents** indexed to an Elastic deployment will be visible to **all users with access** to that Elastic Deployment. +==== + +[discrete#es-connectors-slack-client-connector-client-operations] +===== Self-managed connector operations + +[discrete#es-connectors-slack-client-testing] +===== End-to-end testing + +The connector framework enables operators to run functional tests against a real data source. +Refer to <> for more details. + +To perform E2E testing for the GitHub connector, run the following command: + +[source,shell] +---- +$ make ftest NAME=slack +---- + +For faster tests, add the `DATA_SIZE=small` flag: + +[source,shell] +---- +make ftest NAME=slack DATA_SIZE=small +---- + +[discrete#es-connectors-slack-client-known-issues] +===== Known issues + +There are currently no known issues for this connector. +Refer to <> for a list of known issues for all connectors. + +[discrete#es-connectors-slack-client-troubleshooting] +===== Troubleshooting + +See <>. + +[discrete#es-connectors-slack-client-security] +===== Security + +See <>. + + +// Closing the collapsible section +=============== diff --git a/docs/reference/connector/docs/connectors-teams.asciidoc b/docs/reference/connector/docs/connectors-teams.asciidoc new file mode 100644 index 0000000000000..cd7d05d884349 --- /dev/null +++ b/docs/reference/connector/docs/connectors-teams.asciidoc @@ -0,0 +1,359 @@ +[#es-connectors-teams] +=== Elastic Microsoft Teams connector reference +++++ +Teams +++++ +// Attributes used in this file +:service-name: Microsoft Teams +:service-name-stub: microsoft_teams + +The Microsoft Teams connector is written in Python using the {connectors-python}[Elastic connector framework^]. + +View the {connectors-python}/connectors/sources/microsoft_teams.py[source code for this connector^] (branch _{connectors-branch}_, compatible with Elastic _{minor-version}_). + +// //////// //// //// //// //// //// //// //////// +// //////// NATIVE CONNECTOR REFERENCE (MANAGED SERVICE) /////// +// //////// //// //// //// //// //// //// //////// + +[discrete#es-connectors-teams-native-connector-reference] +==== *Elastic managed connector reference* + +.View *Elastic managed connector* reference +[%collapsible] +=============== + +[discrete#es-connectors-microsoft-teams-native-availability-and-prerequisites] +===== Availability and prerequisites + +This managed connector was introduced in Elastic *8.14.0* as a managed service on Elastic Cloud. + +To use this connector natively in Elastic Cloud, satisfy all <>. + +[NOTE] +==== +This connector is in **technical preview** and is subject to change. +The design and code is less mature than official GA features and is being provided as-is with no warranties. +Technical preview features are not subject to the support SLA of official GA features. +==== + +[discrete#es-connectors-teams-create-connector-native] +===== Create a {service-name} connector +include::_connectors-create-native.asciidoc[] + +[discrete#es-connectors-microsoft-teams-native-usage] +===== Usage + +To use this connector in the UI, select the *Teams* tile when creating a new connector under *Search -> Connectors*. + +If you're already familiar with how connectors work, you can also use the {ref}/connector-apis.html[Connector APIs]. + +For additional operations, see <>. + +[discrete#es-connectors-microsoft-teams-native-connecting-to-microsoft-teams] +===== Connecting to Microsoft Teams + +To connect to Microsoft Teams you need to https://learn.microsoft.com/en-us/azure/active-directory/develop/howto-create-service-principal-portal[create an Azure Active Directory application and service principal] that can access resources. Follow these steps: + +1. Go to the https://portal.azure.com[Azure portal] and sign in with your Azure account. +2. Navigate to the *Azure Active Directory* service. +3. Select *App registrations* from the left-hand menu. +4. Click on the *New registration* button to register a new application. +5. Provide a *name* for your app, and _optionally_ select the supported account types (e.g., single tenant, multi-tenant). +6. Click on the *Register* button to create the app registration. +7. After the registration is complete, you will be redirected to the app's overview page. Take note of the *Application (client) ID* value, as you'll need it later. +8. Scroll down to the *API permissions* section and click on the "Add a permission" button. +9. In the "Request API permissions pane, select "Microsoft Graph" as the API. +10. Select the following *permissions*: ++ +* `TeamMember.Read.All` (Delegated) +* `Team.ReadBasic.All` (Delegated) +* `TeamsTab.Read.All` (Delegated) +* `Group.Read.All` (Delegated) +* `ChannelMessage.Read.All` (Delegated) +* `Chat.Read` (Delegated) & `Chat.Read.All` (Application) +* `Chat.ReadBasic` (Delegated) & `Chat.ReadBasic.All` (Application) +* `Files.Read.All` (Delegated and Application) +* `Calendars.Read` (Delegated and Application) ++ +11. Click on the *Add permissions* button to add the selected permissions to your app. +12. Click on the *Grant admin consent* button to grant the required permissions to the app. This step requires administrative privileges. *If you are not an admin, you need to request the admin to grant consent via their Azure Portal*. +13. Under the "Certificates & Secrets" tab, go to *Client Secrets*. +Generate a new client secret and keep a note of the string under the `Value` column. + +After completion, use the following configuration parameters to configure the connector. + +[discrete#es-connectors-microsoft-teams-native-configuration] +===== Configuration + +The following configuration fields are required: + +`client_id` (required):: +Unique identifier for your Azure Application, found on the app's overview page. Example: +* `ab123453-12a2-100a-1123-93fd09d67394` + +`secret_value` (required):: +String value that the application uses to prove its identity when requesting a token, available under the `Certificates & Secrets` tab of your Azure application menu. Example: +* `eyav1~12aBadIg6SL-STDfg102eBfCGkbKBq_Ddyu` + +`tenant_id` (required):: +Unique identifier for your Azure Active Directory instance, found on the app's overview page. Example: +* `123a1b23-12a3-45b6-7c8d-fc931cfb448d` + +`username` (required):: +Username for your Azure Application. Example: +* `dummy@3hmr2@onmicrosoft.com` + +`password` (required):: +Password for your Azure Application. Example: +* `changeme` + +[discrete#es-connectors-microsoft-teams-native-content-extraction] +====== Content Extraction + +Refer to <>. + +[discrete#es-connectors-microsoft-teams-native-documents-and-syncs] +===== Documents and syncs + +The connector syncs the following objects and entities: + +* *USER_CHATS_MESSAGE* +* *USER_CHAT_TABS* +* *USER_CHAT_ATTACHMENT* +* *USER_CHAT_MEETING_RECORDING* +* *USER_MEETING* +* *TEAMS* +* *TEAM_CHANNEL* +* *CHANNEL_TAB* +* *CHANNEL_MESSAGE* +* *CHANNEL_MEETING* +* *CHANNEL_ATTACHMENT* +* *CALENDAR_EVENTS* + +[NOTE] +==== +* Files bigger than 10 MB won't be extracted. +* Permissions are not synced. *All documents* indexed to an Elastic deployment will be visible to *all users with access* to that Elastic Deployment. +==== + +[discrete#es-connectors-microsoft-teams-native-sync-types] +====== Sync types + +<> are supported by default for all connectors. + +This connector also supports <>. + +[discrete#es-connectors-microsoft-teams-native-sync-rules] +===== Sync rules + +<> are identical for all connectors and are available by default. + +[discrete#es-connectors-microsoft-teams-native-advanced-sync-rules] +===== Advanced Sync Rules + +Advanced sync rules are not available for this connector in the present version. + +[discrete#es-connectors-microsoft-teams-native-known-issues] +===== Known issues + +* Messages in one-on-one chats for _Chat with Self_ users are not fetched via Graph APIs. Therefore, these messages won't be indexed into Elasticsearch. + +Refer to <> for a list of known issues for all connectors. + +[discrete#es-connectors-microsoft-teams-native-troubleshooting] +===== Troubleshooting + +See <>. + +[discrete#es-connectors-microsoft-teams-native-security] +===== Security + +See <>. + +// Closing the collapsible section +=============== + + +// //////// //// //// //// //// //// //// //////// +// //////// CONNECTOR CLIENT REFERENCE (SELF-MANAGED) /////// +// //////// //// //// //// //// //// //// //////// + +[discrete#es-connectors-teams-connector-client-reference] +==== *Self-managed connector reference* + +.View *self-managed connector* reference +[%collapsible] +=============== + +[discrete#es-connectors-microsoft-teams-availability-and-prerequisites] +===== Availability and prerequisites + +This connector is available as a self-managed *self-managed connector*. +To use this connector, satisfy all <>. + +[NOTE] +==== +This connector is in *technical preview* and is subject to change. +The design and code is less mature than official GA features and is being provided as-is with no warranties. +Technical preview features are not subject to the support SLA of official GA features. +==== + +[discrete#es-connectors-teams-client-create-connector-client] +===== Create a {service-name} connector +include::_connectors-create-client.asciidoc[] + +[discrete#es-connectors-microsoft-teams-usage] +===== Usage + +To use this connector as a *self-managed connector*, use the *Microsoft Teams* tile from the connectors list *Customized connector* workflow. + +For additional operations, see <>. + +[discrete#es-connectors-microsoft-teams-connecting-to-microsoft-teams] +===== Connecting to Microsoft Teams + +To connect to Microsoft Teams you need to https://learn.microsoft.com/en-us/azure/active-directory/develop/howto-create-service-principal-portal[create an Azure Active Directory application and service principal] that can access resources. Follow these steps: + +1. Go to the https://portal.azure.com[Azure portal] and sign in with your Azure account. +2. Navigate to the *Azure Active Directory* service. +3. Select *App registrations* from the left-hand menu. +4. Click on the *New registration* button to register a new application. +5. Provide a *name* for your app, and _optionally_ select the supported account types (e.g., single tenant, multi-tenant). +6. Click on the *Register* button to create the app registration. +7. After the registration is complete, you will be redirected to the app's overview page. Take note of the *Application (client) ID* value, as you'll need it later. +8. Scroll down to the *API permissions* section and click on the "Add a permission" button. +9. In the "Request API permissions pane, select "Microsoft Graph" as the API. +10. Select the following *permissions*: ++ +* `TeamMember.Read.All` (Delegated) +* `Team.ReadBasic.All` (Delegated) +* `TeamsTab.Read.All` (Delegated) +* `Group.Read.All` (Delegated) +* `ChannelMessage.Read.All` (Delegated) +* `Chat.Read` (Delegated) & `Chat.Read.All` (Application) +* `Chat.ReadBasic` (Delegated) & `Chat.ReadBasic.All` (Application) +* `Files.Read.All` (Delegated and Application) +* `Calendars.Read` (Delegated and Application) ++ +11. Click on the *Add permissions* button to add the selected permissions to your app. +12. Click on the *Grant admin consent* button to grant the required permissions to the app. This step requires administrative privileges. *If you are not an admin, you need to request the admin to grant consent via their Azure Portal*. +13. Under the "Certificates & Secrets" tab, go to *Client Secrets*. +Generate a new client secret and keep a note of the string under the `Value` column. + +After completion, use the following configuration parameters to configure the connector. + +[discrete#es-connectors-microsoft-teams-configuration] +===== Configuration + +The following configuration fields are required: + +`client_id` (required):: +Unique identifier for your Azure Application, found on the app's overview page. Example: +* `ab123453-12a2-100a-1123-93fd09d67394` + +`secret_value` (required):: +String value that the application uses to prove its identity when requesting a token, available under the `Certificates & Secrets` tab of your Azure application menu. Example: +* `eyav1~12aBadIg6SL-STDfg102eBfCGkbKBq_Ddyu` + +`tenant_id` (required):: +Unique identifier for your Azure Active Directory instance, found on the app's overview page. Example: +* `123a1b23-12a3-45b6-7c8d-fc931cfb448d` + +`username` (required):: +Username for your Azure Application. Example: +* `dummy@3hmr2@onmicrosoft.com` + +`password` (required):: +Password for your Azure Application. Example: +* `changeme` + +[discrete#es-connectors-microsoft-teams-client-docker] +====== Deployment using Docker + +include::_connectors-docker-instructions.asciidoc[] + +[discrete#es-connectors-microsoft-teams-content-extraction] +====== Content Extraction + +Refer to <>. + +[discrete#es-connectors-microsoft-teams-documents-and-syncs] +===== Documents and syncs + +The connector syncs the following objects and entities: + +* *USER_CHATS_MESSAGE* +* *USER_CHAT_TABS* +* *USER_CHAT_ATTACHMENT* +* *USER_CHAT_MEETING_RECORDING* +* *USER_MEETING* +* *TEAMS* +* *TEAM_CHANNEL* +* *CHANNEL_TAB* +* *CHANNEL_MESSAGE* +* *CHANNEL_MEETING* +* *CHANNEL_ATTACHMENT* +* *CALENDAR_EVENTS* + +[NOTE] +==== +* Files bigger than 10 MB won't be extracted. +* Permissions are not synced. *All documents* indexed to an Elastic deployment will be visible to *all users with access* to that Elastic Deployment. +==== + +[discrete#es-connectors-microsoft-teams-sync-types] +====== Sync types + +<> are supported by default for all connectors. + +This connector also supports <>. + +[discrete#es-connectors-microsoft-teams-sync-rules] +===== Sync rules + +<> are identical for all connectors and are available by default. + +[discrete#es-connectors-microsoft-teams-advanced-sync-rules] +===== Advanced Sync Rules + +Advanced sync rules are not available for this connector in the present version. + +[discrete#es-connectors-microsoft-teams-end-to-end-testing] +===== End-to-end Testing + +The connector framework enables operators to run functional tests against a real data source. +Refer to <> for more details. + +To perform E2E testing for the Teams connector, run the following command: + +[source,shell] +---- +$ make ftest NAME=microsoft_teams +---- + +For faster tests, add the `DATA_SIZE=small` flag: + +[source,shell] +---- +make ftest NAME=microsoft_teams DATA_SIZE=small +---- + +[discrete#es-connectors-microsoft-teams-known-issues] +===== Known issues + +* Messages in one-on-one chats for _Chat with Self_ users are not fetched via Graph APIs. Therefore, these messages won't be indexed into Elasticsearch. + +Refer to <> for a list of known issues for all connectors. + +[discrete#es-connectors-microsoft-teams-troubleshooting] +===== Troubleshooting + +See <>. + +[discrete#es-connectors-microsoft-teams-security] +===== Security + +See <>. + +// Closing the collapsible section +=============== diff --git a/docs/reference/connector/docs/connectors-troubleshooting.asciidoc b/docs/reference/connector/docs/connectors-troubleshooting.asciidoc new file mode 100644 index 0000000000000..798e4e13a253d --- /dev/null +++ b/docs/reference/connector/docs/connectors-troubleshooting.asciidoc @@ -0,0 +1,14 @@ +[#es-connectors-troubleshooting] +=== Troubleshooting connectors +++++ +Troubleshooting +++++ + +Use the following actions to help diagnose and resolve issues with <> and <>: + +* <>. +* <>. +* <>. +* Edit your index configuration: <>, <>. + +You can also request help or support. diff --git a/docs/reference/connector/docs/connectors-usage.asciidoc b/docs/reference/connector/docs/connectors-usage.asciidoc new file mode 100644 index 0000000000000..97fe7d92e945a --- /dev/null +++ b/docs/reference/connector/docs/connectors-usage.asciidoc @@ -0,0 +1,225 @@ +[#es-connectors-usage] +== Connectors UI in {kib} + +This document describes operations available to <> and <>, using the UI. + +In the Kibana UI, go to *Search > Content > Connectors* to view a summary of all your connectors and sync jobs, and to create new connectors. + +[TIP] +==== +In 8.12 we introduced a set of {ref}/connector-apis.html[Connector APIs] to create and manage Elastic connectors and sync jobs, along with a https://github.com/elastic/connectors/blob/main/docs/CLI.md[CLI tool]. +Use these tools if you'd like to work with connectors and sync jobs programmatically, without using the UI. +==== + +[discrete#es-connectors-usage-index-create] +=== Create and configure connectors + +You connector writes data to an {es} index. + +To create <> or self-managed <>, use the buttons under *Search > Content > Connectors*. +Once you've chosen the data source type you'd like to sync, you'll be prompted to create an {es} index. + +[discrete#es-connectors-usage-indices] +=== Manage connector indices + +View and manage all Elasticsearch indices managed by connectors. + +In the {kib} UI, navigate to *Search > Content > Connectors* to view a list of connector indices and their attributes, including connector type health and ingestion status. + +Within this interface, you can choose to view the details for each existing index or delete an index. +Or, you can <>. + +These operations require access to Kibana and additional index privileges. + +[discrete#es-connectors-usage-index-create-configure-existing-index] +=== Customize connector index mappings and settings + +{es} stores your data as documents in an index. Each index is made up of a set of fields and each field has a type (such as `keyword`, `boolean`, or `date`). + +*Mapping* is the process of defining how a document, and the fields it contains, are stored and indexed. +Connectors use {ref}/dynamic-field-mapping.html[dynamic mapping] to automatically create mappings based on the data fetched from the source. + +Index *settings* are configurations that can be adjusted on a per-index basis. They control things like the index's performance, the resources it uses, and how it should handle operations. + +When you create an index with a connector, the index is created with _default_ search-optimized field template mappings and index settings. Mappings for specific fields are then dynamically created based on the data fetched from the source. + +You can inspect your index mappings in the following ways: + +* *In the {kib} UI*: Navigate to *Search > Content > Indices > _YOUR-INDEX_ > Index Mappings* +* *By API*: Use the {ref}/indices-get-mapping.html[Get mapping API] + +You can manually *edit* the mappings and settings via the {es} APIs: + +* Use the {ref}/indices-put-mapping.html[Put mapping API] to update index mappings. +* Use the {ref}/indices-update-settings.html[Update index settings API] to update index settings. + +It's important to note that these updates are more complex when the index already contains data. + +Refer to the following sections for more information. + +[discrete#es-connectors-usage-index-create-configure-existing-index-no-data] +==== Customize mappings and settings before syncing data + +Updating mappings and settings is simpler when your index has no data. +If you create and attach a _new_ index while setting up a connector, you can customize the mappings and settings before syncing data, using the APIs mentioned earlier. + +[discrete#es-connectors-usage-index-create-configure-existing-index-have-data] +==== Customize mappings and settings after syncing data + +Once data has been added to {es} using dynamic mappings, you can't directly update existing field mappings. +If you've already synced data into an index and want to change the mappings, you'll need to {ref}/docs-reindex.html[reindex your data]. + +The workflow for these updates is as follows: + +. {ref}/indices-create-index.html[Create] a new index with the desired mappings and settings. +. {ref}/docs-reindex.html[Reindex] your data from the old index into this new index. +. Delete the old index. +. (Optional) Use an {ref}/aliases.html[alias], if you want to retain the old index name. +. Attach your connector to the new index or alias. + +[discrete#es-connectors-usage-syncs-recurring] +=== Manage recurring syncs + +After creating an index to be managed by a connector, you can configure automatic, recurring syncs. + +In the {kib} UI, navigate to *Search > Content > Connectors*. + +Choose the index to configure, and then choose the *Scheduling* tab. + +Within this interface, you can enable or disable scheduled: + +. Full content syncs +. Incremental content syncs (if supported) +. Access control syncs (if supported) + +When enabled, you can additionally manage the sync schedule. + +This operation requires access to Kibana and the `write` {ref}/security-privileges.html[indices privilege^] for the `.elastic-connectors` index. + +Alternatively, you can <>. + +After you enable recurring syncs or sync once, the first sync will begin. +(There may be a short delay before the connector service begins the first sync.) +You may want to <> to see the status or errors, or <>. + +[discrete#es-connectors-usage-syncs-manual] +=== Sync once + +After creating the index to be managed by a connector, you can request a single sync at any time. + +In the {kib} UI, navigate to *Search > Content > Elasticsearch indices*. + +Then choose the index to sync. + +Regardless of which tab is active, the *Sync* button is always visible in the top right. +Choose this button to reveal sync options: + +. Full content +. Incremental content (if supported) +. Access control (if supported) + +Choose one of the options to request a sync. +(There may be a short delay before the connector service begins the sync.) + +This operation requires access to Kibana and the `write` {ref}/security-privileges.html[indices privilege^] for the `.elastic-connectors` index. + +[discrete#es-connectors-usage-syncs-cancel] +=== Cancel sync + +After a sync has started, you can cancel the sync before it completes. + +In the {kib} UI, navigate to *Search > Content > Elasticsearch indices*. + +Then choose the index with the running sync. + +Regardless of which tab is active, the *Sync* button is always visible in the top right. +Choose this button to reveal sync options, and choose *Cancel Syncs* to cancel active syncs. +This will cancel the running job, and marks all _pending_ and _suspended_ jobs as canceled as well. +(There may be a short delay before the connector service cancels the syncs.) + +This operation requires access to Kibana and the `write` {ref}/security-privileges.html[indices privilege^] for the `.elastic-connectors` and `.elastic-connectors-sync-jobs` index. + +[discrete#es-connectors-usage-index-view] +=== View status + +View the index details to see a variety of information that communicate the status of the index and connector. + +In the {kib} UI, navigate to *Search > Content > Elasticsearch indices*. + +Then choose the index to view. + +The *Overview* tab presents a variety of information, including: + +* General information about the connector index, for example: name, description, ingestion type, connector type, and language analyzer. +* Any errors affecting the connector or sync process. +* The current ingestion status (see below for possible values). +* The current document count. + +Possible values of ingestion status: + +* Incomplete - A connector that is not configured yet. +* Configured - A connector that is configured. +* Connected - A connector that can successfully connect to a data source. +* Error - A connector that failed to connect to the data source. +* Connector failure - A connector that has not seen any update for more than 30 minutes. +* Sync failure - A connector that failed in the last sync job. + +This tab also displays the recent sync history, including sync status (see below for possible values). + +Possible values of sync status: + +* Sync pending - The initial job status, the job is pending to be picked up. +* Sync in progress - The job is running. +* Canceling sync - Cancelation of the job has been requested. +* Sync canceled - The job was canceled +* Sync suspended - The job was suspended due to service shutdown, and it can be resumed when the service restarts. +* Sync complete - The job completed successfully. +* Sync failure - The job failed. + +For each sync, choose the `view` button to display the job details, including: + +* The job ID +* Document stats, including: number of documents added/deleted, total number of documents, and volume of documented added +* Event logs +* Sync rules that were active when the sync was requested +* Pipelines that were active when the sync was requested + +This operation requires access to Kibana and the `read` {ref}/security-privileges.html[indices privilege^] for the `.elastic-connectors` index. + +[discrete#es-connectors-usage-documents] +=== View documents + +View the documents the connector has synced from the data. +Additionally view the index mappings to determine the current document schema. + +In the {kib} UI, navigate to *Search > Content > Elasticsearch indices*. + +Then choose the index to view. + +Choose the *Documents* tab to view the synced documents. +Choose the *Index Mappings* tab to view the index mappings that were created by the connector. + +When setting up a new connector, ensure you are getting the documents and fields you were expecting from the data source. +If not, see <> for help. + +These operations require access to Kibana and the `read` and `manage` {ref}/security-privileges.html[indices privileges^] for the index containing the documents. + +See <> for security details. + +[discrete#es-connectors-usage-sync-rules] +=== Manage sync rules + +Use <> to limit which documents are fetched from the data source, or limit which fetched documents are stored in Elastic. + +In the {kib} UI, navigate to *Search > Content > Elasticsearch indices*. + +Then choose the index to manage and choose the *Sync rules* tab. + +[discrete#es-connectors-usage-pipelines] +=== Manage ingest pipelines + +Use {ref}/ingest-pipeline-search.html[ingest pipelines] to transform fetched data before it is stored in Elastic. + +In the {kib} UI, navigate to *Search > Content > Elasticsearch indices*. + +Then choose the index to manage and choose the *Pipelines* tab. diff --git a/docs/reference/connector/docs/connectors-use-cases.asciidoc b/docs/reference/connector/docs/connectors-use-cases.asciidoc new file mode 100644 index 0000000000000..0fd6e81a8e483 --- /dev/null +++ b/docs/reference/connector/docs/connectors-use-cases.asciidoc @@ -0,0 +1,11 @@ +[#es-connectors-use-cases] +== Connectors use cases +++++ +Use cases +++++ + +Learn how to use connectors for your use case. + +* <> + +include::connectors-architecture.asciidoc[] \ No newline at end of file diff --git a/docs/reference/connector/docs/connectors-zoom.asciidoc b/docs/reference/connector/docs/connectors-zoom.asciidoc new file mode 100644 index 0000000000000..d01b9c2be0368 --- /dev/null +++ b/docs/reference/connector/docs/connectors-zoom.asciidoc @@ -0,0 +1,356 @@ +[#es-connectors-zoom] +=== Elastic Zoom connector reference +++++ +Zoom +++++ +// Attributes used in this file +:service-name: Zoom +:service-name-stub: zoom + +The Zoom connector is written in Python using the {connectors-python}[Elastic connector framework^]. + +View the {connectors-python}/connectors/sources/{service-name-stub}.py[*source code* for this connector^] (branch _{connectors-branch}_, compatible with Elastic _{minor-version}_). + + +// //////// //// //// //// //// //// //// //////// +// //////// NATIVE CONNECTOR REFERENCE (MANAGED SERVICE) /////// +// //////// //// //// //// //// //// //// //////// + +[discrete#es-connectors-zoom-native-connector-reference] +==== *Elastic managed connector reference* + +.View *Elastic managed connector* reference +[%collapsible] +=============== + +[discrete#es-connectors-zoom-connector-availability-and-prerequisites] +===== Availability and prerequisites + +This managed connector was introduced in Elastic *8.14.0* as a managed service on Elastic Cloud. + +To use this connector natively in Elastic Cloud, satisfy all <>. + +[NOTE] +==== +This connector is in **technical preview** and is subject to change. +The design and code is less mature than official GA features and is being provided as-is with no warranties. +Technical preview features are not subject to the support SLA of official GA features. +==== + +[discrete#es-connectors-zoom-create-native-connector] +===== Create a {service-name} connector +include::_connectors-create-native.asciidoc[] + +[discrete#es-connectors-zoom-connector-usage] +===== Usage + +To use this connector in the UI, select the *Zoom* tile when creating a new connector under *Search -> Connectors*. + +If you're already familiar with how connectors work, you can also use the {ref}/connector-apis.html[Connector APIs]. + +For additional operations, see <>. + +[discrete#es-connectors-zoom-connector-connecting-to-zoom] +===== Connecting to Zoom + +To connect to Zoom you need to https://developers.zoom.us/docs/internal-apps/s2s-oauth/[create an Server-to-Server OAuth application] that can access resources. Follow these steps: + +1. Go to the https://marketplace.zoom.us/[Zoom App Marketplace] and sign in with your Zoom account. +2. Navigate to the "Develop" service. +3. Select "Build App" from the dropdown menu. +4. Click on the "Server-to-Server OAuth" button to register a new application. +5. Provide a name for your app. +6. Click on the "Create" button to create the app registration. +7. After the registration is complete, you will be redirected to the app's overview page. Take note of the "App Credentials" value, as you'll need it later. +8. Navigate to the "Scopes" section and click on the "Add Scopes" button. +9. The following scopes need to be added to the app. ++ +[source,bash] +---- +user:read:admin +meeting:read:admin +chat_channel:read:admin +recording:read:admin +chat_message:read:admin +report:read:admin +---- + +10. Click on the "Done" button to add the selected scopes to your app. +11. Navigate to the "Activation" section and input the necessary information to activate the app. + +After completion, use the following configuration parameters to configure the connector. + +[discrete#es-connectors-zoom-connector-configuration] +===== Configuration + +The following configuration fields are required: + +`Zoom application Account ID`:: (required) +"Account ID" is a unique identifier associated with a specific Zoom account within the Zoom platform, found on the app's overview page. Example: + +* `KVx-aQssTOutOAGrDfgMaA` + +`Zoom application Client ID`:: (required) +"Client ID" refers to a unique identifier associated with an application that integrates with the Zoom platform, found on the app's overview page. Example: + +* `49Z69_rnRiaF4JYyfHusw` + +`Zoom application Client Secret`:: (required) +The "Client Secret" refers to a confidential piece of information generated when developers register an application on the Zoom Developer Portal for integration with the Zoom platform, found on the app's overview page. Example: + +* `eieiUJRsiH543P5NbYadavczjkqgdRTw` + +`Recording Age Limit (Months)`:: (required) +How far back in time to request recordings from Zoom. Recordings older than this will not be indexed. This configuration parameter allows you to define a time limit, measured in months, for which recordings will be indexed. + +`Fetch past meeting details`:: +Retrieve more information about previous meetings, including their details and participants. Default value is `False`. Enable this option to fetch past meeting details. This setting can increase sync time. + +[discrete#es-connectors-zoom-connector-content-extraction] +====== Content Extraction + +Refer to <>. + +[discrete#es-connectors-zoom-connector-documents-and-syncs] +===== Documents and syncs + +The connector syncs the following objects and entities: + +* *Users* +* *Live Meetings* +* *Upcoming Meetings* +* *Past Meetings* +* *Recordings* +* *Channels* +* *Chat Messages* +* *Chat Files* + +[NOTE] +==== +* Content from files bigger than 10 MB won't be extracted. (Self-managed connectors can use the <> to handle larger binary files.) +* Permissions are not synced. *All documents* indexed to an Elastic deployment will be visible to *all users with access* to that Elastic Deployment. +==== + +[discrete#es-connectors-zoom-connector-sync-types] +====== Sync types + +<> are supported by default for all connectors. + +This connector also supports <>. + +[discrete#es-connectors-zoom-connector-sync-rules] +===== Sync rules + +<> are identical for all connectors and are available by default. + +[discrete#es-connectors-zoom-connector-advanced-sync-rules] +===== Advanced Sync Rules + +Advanced sync rules are not available for this connector in the present version. + +[discrete#es-connectors-zoom-connector-known-issues] +===== Known issues + +* *Meetings*: Users can only index meetings that are less than a month old. +* *Chat Messages & Files*:Users can only index chats and files that are less than 6 months old. + +Refer to <> for a list of known issues for _all_ connectors. + +[discrete#es-connectors-zoom-connector-troubleshooting] +===== Troubleshooting + +See <>. + +[discrete#es-connectors-zoom-connector-security] +===== Security + +See <>. + + +// Closing the collapsible section +=============== + + +// //////// //// //// //// //// //// //// //////// +// //////// CONNECTOR CLIENT REFERENCE (SELF-MANAGED) /////// +// //////// //// //// //// //// //// //// //////// + +[discrete#es-connectors-zoom-connector-client-reference] +==== *Self-managed connector reference* + +.View *self-managed connector* reference +[%collapsible] +=============== + +[discrete#es-connectors-zoom-client-connector-availability-and-prerequisites] +===== Availability and prerequisites + +This connector is available as a self-managed *self-managed connector*. To use this connector, satisfy all <>. + +[NOTE] +==== +This connector is in *technical preview* and is subject to change. +The design and code is less mature than official GA features and is being provided as-is with no warranties. +Technical preview features are not subject to the support SLA of official GA features. +==== + +[discrete#es-connectors-zoom-client-create-connector-client] +===== Create a {service-name} connector +include::_connectors-create-client.asciidoc[] + +[discrete#es-connectors-zoom-client-connector-usage] +===== Usage + +To use this connector in the UI, select the *Teams* tile when creating a new connector under *Search -> Connectors*. + +If you're already familiar with how connectors work, you can also use the {ref}/connector-apis.html[Connector APIs]. + +For additional operations, see <>. + +[discrete#es-connectors-zoom-client-connector-connecting-to-zoom] +===== Connecting to Zoom + +To connect to Zoom you need to https://developers.zoom.us/docs/internal-apps/s2s-oauth/[create an Server-to-Server OAuth application] that can access resources. Follow these steps: + +1. Go to the https://marketplace.zoom.us/[Zoom App Marketplace] and sign in with your Zoom account. +2. Navigate to the "Develop" service. +3. Select "Build App" from the dropdown menu. +4. Click on the "Server-to-Server OAuth" button to register a new application. +5. Provide a name for your app. +6. Click on the "Create" button to create the app registration. +7. After the registration is complete, you will be redirected to the app's overview page. Take note of the "App Credentials" value, as you'll need it later. +8. Navigate to the "Scopes" section and click on the "Add Scopes" button. +9. The following scopes need to be added to the app. ++ +[source,bash] +---- +user:read:admin +meeting:read:admin +chat_channel:read:admin +recording:read:admin +chat_message:read:admin +report:read:admin +---- + +10. Click on the "Done" button to add the selected scopes to your app. +11. Navigate to the "Activation" section and input the necessary information to activate the app. + +After completion, use the following configuration parameters to configure the connector. + +[discrete#es-connectors-zoom-client-connector-configuration] +===== Configuration + +The following configuration fields are required: + +`Zoom application Account ID`:: (required) +"Account ID" is a unique identifier associated with a specific Zoom account within the Zoom platform, found on the app's overview page. Example: + +* `KVx-aQssTOutOAGrDfgMaA` + +`Zoom application Client ID`:: (required) +"Client ID" refers to a unique identifier associated with an application that integrates with the Zoom platform, found on the app's overview page. Example: + +* `49Z69_rnRiaF4JYyfHusw` + +`Zoom application Client Secret`:: (required) +The "Client Secret" refers to a confidential piece of information generated when developers register an application on the Zoom Developer Portal for integration with the Zoom platform, found on the app's overview page. Example: + +* `eieiUJRsiH543P5NbYadavczjkqgdRTw` + +`Recording Age Limit (Months)`:: (required) +How far back in time to request recordings from Zoom. Recordings older than this will not be indexed. This configuration parameter allows you to define a time limit, measured in months, for which recordings will be indexed. + +`Fetch past meeting details`:: +Retrieve more information about previous meetings, including their details and participants. Default value is `False`. Enable this option to fetch past meeting details. This setting can increase sync time. + +[discrete#es-connectors-zoom-client-client-docker] +====== Deployment using Docker + +include::_connectors-docker-instructions.asciidoc[] + +[discrete#es-connectors-zoom-client-connector-content-extraction] +====== Content Extraction + +Refer to <>. + +[discrete#es-connectors-zoom-client-connector-documents-and-syncs] +===== Documents and syncs + +The connector syncs the following objects and entities: + +* *Users* +* *Live Meetings* +* *Upcoming Meetings* +* *Past Meetings* +* *Recordings* +* *Channels* +* *Chat Messages* +* *Chat Files* + +[NOTE] +==== +* Content from files bigger than 10 MB won't be extracted by default. You can use the <> to handle larger binary files. +* Permissions are not synced. *All documents* indexed to an Elastic deployment will be visible to *all users with access* to that Elastic Deployment. +==== + +[discrete#es-connectors-zoom-client-connector-sync-types] +====== Sync types + +<> are supported by default for all connectors. + +This connector also supports <>. + +[discrete#es-connectors-zoom-client-connector-sync-rules] +===== Sync rules + +<> are identical for all connectors and are available by default. + +[discrete#es-connectors-zoom-client-connector-advanced-sync-rules] +===== Advanced Sync Rules + +Advanced sync rules are not available for this connector in the present version. + +[discrete#es-connectors-zoom-client-connector-connector-client-operations] +===== Connector Client operations + +[discrete#es-connectors-zoom-client-connector-end-to-end-testing] +====== End-to-end Testing + +The connector framework enables operators to run functional tests against a real data source. +Refer to <> for more details. + +To perform E2E testing for the Zoom connector, run the following command: + +[source,shell] +---- +$ make ftest NAME=zoom +---- + +For faster tests, add the `DATA_SIZE=small` flag: + +[source,shell] +---- +make ftest NAME=zoom DATA_SIZE=small +---- + +[discrete#es-connectors-zoom-client-connector-known-issues] +===== Known issues + +* *Meetings*: Users can only index meetings that are less than a month old. +* *Chat Messages & Files*:Users can only index chats and files that are less than 6 months old. + +Refer to <> for a list of known issues for _all_ connectors. + +[discrete#es-connectors-zoom-client-connector-troubleshooting] +===== Troubleshooting + +See <>. + +[discrete#es-connectors-zoom-client-connector-security] +===== Security + +See <>. + + +// Closing the collapsible section +=============== diff --git a/docs/reference/connector/docs/dls-e2e-guide.asciidoc b/docs/reference/connector/docs/dls-e2e-guide.asciidoc new file mode 100644 index 0000000000000..7f07fddd575c9 --- /dev/null +++ b/docs/reference/connector/docs/dls-e2e-guide.asciidoc @@ -0,0 +1,439 @@ +[#es-dls-e2e-guide] +=== Leverage document-level security from connectors in Search Applications +++++ +DLS in Search Applications +++++ + +This guide explains how to ensure document-level security (DLS) for documents ingested by <>, when building a search application. + +In this example we will: + +* Set up the SharePoint Online connector to ingest data from SharePoint Online +* Set up a *Search Application* using the Elasticsearch index created by the SharePoint Online connector +* Create Elasticsearch *API keys* with DLS and workflow restrictions to query your Search Application +* Build a search experience where authenticated users can search over the data ingested by connectors + +[discrete#es-dls-e2e-guide-connector-setup] +==== Set up connector to sync data with access control + +You can run SharePoint Online connector in Elastic Cloud (native) or on a self-managed deployment (self-managed connector). +Refer to <> to learn how to set up the SharePoint Online connector and enable DLS. + + +To run the self-managed connector, you'll need to run the *connectors service* in addition to your Elastic deployment. +Refer to <> for details on how to set up a self-managed connector and run the connectors service. + +[TIP] +==== +This guide assumes you already have an Elastic deployment, that satisfies the <> for running the connectors service. +If you don't have an Elastic deployment, sign up for a https://cloud.elastic.co/registration[free Elastic Cloud trial^]. +==== + +[NOTE] +==== +We use the SharePoint Online connector in this concrete example. +Refer to <> for a list of connectors that support DLS. +==== + +[discrete#es-dls-e2e-guide-sharepoint-data-overview] +==== Elasticsearch indices overview + +When the SharePoint Online connector is set up and you've started syncing content, the connector will create two separate Elasticsearch indices: + +* A *content* index that holds the searchable data in SharePoint Online. +We'll use this index to create our search application. +* An *access control* index that includes access control data for each user that has access to SharePoint Online. +It will be named `.search-acl-filter-`, where `` is the index name you chose. +For example, an index named `search-sharepoint` would have the ACL filter index `.search-acl-filter-search-sharepoint`. +We'll use this index to create Elasticsearch API keys that control access to the content index. + +[discrete#es-dls-e2e-guide-search-application-create] +==== Create a Search Application + +To build our search experience for our SharePoint Online data, we need to create a Search Application. + +Follow these steps to create a Search Application in the Kibana UI: + +. Navigate to *Search > Search Applications*. +. Select *Create*. +. *Name* the Search Application. +. Select the *index* used by the SharePoint Online connector. +. Select *Create*. + +Alternatively, you can use the {ref}/put-search-application.html[Put Search Application] API. + +[discrete#es-dls-e2e-guide-elasticsearch-api-keys-setup] +==== Create Elasticsearch API keys + +Next we need to create Elasticsearch API keys to restrict queries to the search application. +These restrictions will ensure that users can only query documents they have access to. +To create this API key, we will leverage information in the access control index created by the connector. + +The access control index will contain documents similar to this example: + +[source,js] +---- +{ + "_index": ".search-acl-filter-search-sharepoint", + "_id": "john@example.co", + "_version": 1, + "_seq_no": 0, + "_primary_term": 1, + "found": true, + "_source": { + "identity": { + "email": "john@example.co", + "access_control": [ + "john@example.co", + "Engineering Members" + ] + }, + "query": { + "template": { + "params": { + "access_control": [ + "john@example.co", + "Engineering Members" + ] + }, + "source": """ + { + "bool": { + "should": [ + { + "bool": { + "must_not": { + "exists": { + "field": "_allow_access_control" + } + } + } + }, + { + "terms": { + "_allow_access_control.enum": {{#toJson}}access_control{{/toJson}} + } + } + ] + } + } + """ + } + } + } +} +---- +// NOTCONSOLE + +This document contains the Elasticsearch query that describes which documents the user `john@example.com` has access to. +The access control information is stored in the `access_control` field. +In this case the user has access only to documents that contain `"john@example.co"` or `"Engineering Members"` in the `_allow_access_control` field. + +The `query` field contains the DLS query we will use to create an Elasticsearch API key. +That key will ensure queries are restricted to the documents `john@example.com` has access to. + +To create the API key, we will use the {ref}/security-api-create-api-key.html[Create API Key] API. +The API call will look like this: + +[source,console] +---- +POST /_security/api_key +{ + "name": "john-api-key", + "expiration": "1d", + "role_descriptors": { + "sharepoint-online-role": { + "index": [ + { + "names": [ + "sharepoint-search-application" + ], + "privileges": [ + "read" + ], + "query": { + "template": { + "params": { + "access_control": [ + "john@example.co", + "Engineering Members" + ] + }, + "source": """ + { + "bool": { + "should": [ + { + "bool": { + "must_not": { + "exists": { + "field": "_allow_access_control" + } + } + } + }, + { + "terms": { + "_allow_access_control.enum": {{#toJson}}access_control{{/toJson}} + } + } + ] + } + } + """ + } + } + } + ], + "restriction": { + "workflows": [ + "search_application_query" + ] + } + } + } +} +---- +// TEST[skip:TODO] + +The response will look like this: + +[source,js] +---- +{ + "id": "0rCD3i-MjKsw4g9BpRIBa", + "name": "john-api-key", + "expiration": 1687881715555, + "api_key": "zTxre9L6TcmRIgd2NgLCRg", + "encoded": "Qk05dy1JZ0JhRDNyNGpLQ3MwUmk6elRzdGU5QjZUY21SSWdkMldnQ1RMZw==" +} +---- +// NOTCONSOLE + +The `api_key` field contains the API key that can be used to query the Search Application with the appropriate DLS restrictions. + +[discrete#es-dls-e2e-guide-elasticsearch-querying-multiple-indices] +===== Querying multiple indices + +This section describes how to generate an API key to query a search application that contains multiple indices with documents ingested by connectors with DLS. + +A user might have multiple identities that define which documents they are allowed to read. +In this case we want to create a single Elasticsearch API key that can be used to query only the documents this user has access to. + +Let's assume we want to create an API key that combines the following user identities: + +[source,js] +---- +GET .search-acl-filter-source1 +{ + "_id": "example.user@example.com", + "identity": { + "username": "example username", + "email": "example.user@example.com" + }, + "query": { + "template": { + "params": { + "access_control": [ + "example.user@example.com", + "source1-user-group"] + } + }, + "source": "..." + } +} +---- +// NOTCONSOLE + +[source,js] +---- +GET .search-acl-filter-source2 +{ + "_id": "example.user@example.com", + "identity": { + "username": "example username", + "email": "example.user@example.com" + }, + "query": { + "template": { + "params": { + "access_control": [ + "example.user@example.com", + "source2-user-group"] + } + }, + "source": "..." + } +} +---- +// NOTCONSOLE + +`.search-acl-filter-source1` and `.search-acl-filter-source2` define the access control identities for `source1` and `source2`. + +The following script exemplifies how to generate the Elasticsearch API key that combines multiple user identities: + +[source,js] +---- +require("dotenv").config(); +const axios = require("axios"); + +// Elasticsearch URL and creds retrieved from environment variables +const ELASTICSEARCH_URL = process.env.ELASTICSEARCH_URL; +const ELASTICSEARCH_USER = process.env.ELASTICSEARCH_USER; +const ELASTICSEARCH_PASSWORD = process.env.ELASTICSEARCH_PASSWORD; + +const config = { + auth: { + username: ELASTICSEARCH_USER, + password: ELASTICSEARCH_PASSWORD, + }, + headers: { + "Content-Type": "application/json", + }, +}; + +async function createApiKey({ + searchApplication, + userId, + indices = "", + metadata, + expiration = "1d" +}) { + try { + const indices = indices.split(","); + + let combinedQuery = { bool: { should: [] } }; + + for (const index of indices) { + const aclsIndex = `.search-acl-filter-${index}`; + const response = await axios.get( + `${ELASTICSEARCH_URL}/${aclsIndex}/_doc/${userId}`, + config + ); + combinedQuery.bool.should.push({ + bool: { + must: [ + { + term: { + "_index": index, + }, + }, + response.data._source.query.source, + ], + }, + }); + } + + if (!metadata || Object.keys(metadata).length === 0) { + metadata = { created_by: "create-api-key" }; + } + + const apiKeyBody = { + name: userId, + expiration, + role_descriptors: { + [`${searchApplication}-role`]: { + index: [ + { + names: [searchApplication], + privileges: ["read"], + query: combinedQuery, + }, + ], + restriction: { + workflows: ["search_application_query"], + }, + }, + }, + metadata, + }; + + const apiKeyResponse = await axios.post( + `${ELASTICSEARCH_URL}/_security/api_key`, + apiKeyBody, + config + ); + + console.log(apiKeyResponse.data); + return apiKeyResponse.data.encoded; + } catch (error) { + console.log(error) + } +} + +// example usage: +createApiKey({ + searchApplication: "my-search-app", + userId: "example.user@example.com", + indices: "source1,source2", + expiration: "1d", + metadata: { + application: "my-search-app", + namespace: "dev", + foo: "bar", + }, +}).then((encodedKey) => console.log(encodedKey)); + +---- +// NOTCONSOLE + +NOTE: The example combines multiple identities into a single role descriptor. This is because an Elasticsearch API key can use role restrictions only if it has a *single role descriptor*. + +[discrete#es-dls-e2e-guide-elasticsearch-api-keys-frontend-implementation] +==== Implementation in your frontend application + +If you're building a frontend application, use the `encoded` field to pass the API key to the frontend. +Your app can then use the API key to query the search application. +The workflow will look something like this: + +1. User signs in to your application. +2. Your application generates an Elasticsearch API key using the {ref}/security-api-create-api-key.html[Create API Key] API. +3. The `encoded` field is returned to the frontend application. +4. When the user searches for documents, the frontend application passes the `encoded` field to your search application's {ref}/search-application-search.html[`_search` endpoint]. +For example, you might use the https://github.com/elastic/search-application-client[Search Application client^] to make the actual queries using the API key: ++ +[source,js] +---- +const client = SearchApplicationClient(applicationName, endpoint, apiKey, params); +---- +// NOTCONSOLE + +Here's what this workflow looks like in a sequence diagram: + +[.screenshot] +image::images/dls-api-key-workflow.png[DLS API key and search application client workflow] + +[TIP] +==== +When creating an Elasticsearch API key for query Search Applications, you must include the `search_application_query` restriction. This will ensure the API key can only access the Search Application Search API. +==== + +[TIP] +==== +We recommend always setting an `expiration` time when creating an Elasticsearch API key. When `expiration` is not set, the Elasticsearch API will never expire. +==== + +[discrete#es-dls-e2e-guide-workflow-guidance] +==== Workflow guidance + +We recommend relying on the connector access control sync to automate and keep documents in sync with changes to the original content source's user permissions. + +In this workflow you will need to handle the generation of the Elasticsearch API key in the backend of your application, in response to browser sign ins. + +Once the key is generated, the backend will also need to return that key to the client (browser) to be used in subsequent search requests to your search application. + +The API key can be invalidated using the {ref}/security-api-invalidate-api-key.html[Invalidate API Key API]. +Additionally, if the user's permission changes, you'll need to update or recreate the Elasticsearch API key. + +[discrete#es-dls-e2e-guide-next-steps] +==== Next steps + +Learn how to use the Search Application client to query your Search Application. +See {ref}/search-application-client.html[Search Applications client]. + +[discrete#es-dls-e2e-guide-learn-more] +==== Learn more + +* <> +* <> +* {ref}/search-application-overview.html[Search Applications] diff --git a/docs/reference/connector/docs/dls-overview.asciidoc b/docs/reference/connector/docs/dls-overview.asciidoc new file mode 100644 index 0000000000000..ec6bb43d955c7 --- /dev/null +++ b/docs/reference/connector/docs/dls-overview.asciidoc @@ -0,0 +1,345 @@ +[#es-dls-overview] +=== How DLS works + +Document level security (DLS) enables you to control access to content at the document level. +Access to each document in an index can be managed independently, based on the identities (such as usernames, emails, groups etc.) that are allowed to view it. + +This feature works with the help of special access control documents that are indexed by a connector into a hidden Elasticsearch index, associated with the standard content index. +If your content documents have access control fields that match the criteria defined in your access control documents, Elasticsearch will apply DLS to the documents synced by the connector. + +[discrete#es-dls-overview-core-concepts] +==== Core concepts + +At a very high level, there are two essential components that enable document level security with connectors: + +* *Access control documents*: These documents define the access control policy for documents from your third party source. +They live in a hidden index named with the following pattern: `.search-acl-filter-`. +See <> for more details and an example. +* *Content documents with access control fields*: The documents that contain the synced content from your third party source must have *access control fields* that match the criteria defined in your access control documents. +These documents live in an index named with the following pattern: `search-`. +** If a content document does not have access control fields, there will be no restrictions on who can view it. +** If the access control field is present but _empty_, no identities will have access and the document will be effectively invisible. ++ +See <> for more details. + +[discrete#es-dls-overview-procedure] +==== Enabling DLS + +To enable DLS, you need to perform the following steps: + +. First *enable DLS* for your connector as part of the connector configuration. +. Run an *Access control* sync. +. This creates a hidden access control index prefixed with `.search-acl-filter-`. For example, if you named your connector index `search-sharepoint`, the access control index would be named `.search-acl-filter-search-sharepoint`. +. The <> on the hidden index define which identities are allowed to view documents with access control fields. +. The access control document uses a search template to define how to filter search results based on identities. +. Schedule recurring *Access control* syncs to update the access control documents in the hidden index. + +Note the following details about content documents and syncs: + +. Remember that for DLS to work, your *content documents* must have access control fields that match the criteria defined in your access control documents. +<> contain the actual content your users will search for. +If a content document does not have access control fields, there will be no restrictions on who can view it. +. When a user searches for content, the access control documents determine which content the user is allowed to view. +. At _search_ time documents without the `_allow_access_control` field or with allowed values in `_allow_access_control.enum` will be returned in the search results. The logic for determining whether a document has access control enabled is based on the presence or values of the `_allow_access_control*` fields. +. Run *Content* syncs to sync your third party data source to Elasticsearch. +A specific field (or fields) within these documents correlates with the query parameters in the access control documents enabling document-level security (DLS). + +[NOTE] +==== +You must enable DLS for your connector _before_ running the first content sync. +If you have already run a content sync, you'll need to delete all documents on the index, enable DLS, and run a new content sync. +==== + +[discrete#es-dls-overview-index] +==== DLS at index time + +[discrete#es-dls-overview-access-control-documents] +===== Access control documents + +These documents define the access control policy for the data indexed into Elasticsearch. +An example of an access control document is as follows: + +[source,js] +---- +{ + "_id": "example.user@example.com", + "identity": { + "username": "example username", + "email": "example.user@example.com" + }, + "query": { + "template": { + "params": { + "access_control": [ + "example.user@example.com", + "example group", + "example username"] + } + }, + "source": "..." + } +} +---- +// NOTCONSOLE + +In this example, the identity object specifies the identity of the user that this document pertains to. +The `query` object then uses a template to list the parameters that form the access control policy for this identity. +It also contains the query `source`, which will specify a query to fetch all content documents the identity has access to. +The `_id` could be, for example, the email address or the username of a user. +The exact content and structure of `identity` depends on the corresponding implementation. + +[discrete#es-dls-overview-content-documents] +===== Content documents + +Content documents contain the actual data from your 3rd party source. +A specific field (or fields) within these documents correlates with the query parameters in the access control documents enabling document-level security (DLS). +Please note, the field names used to implement DLS may vary across different connectors. +In the following example we'll use the field `_allow_access_control` for specifying the access control for a user identity. + +[source,js] +---- +{ + "_id": "some-unique-id", + "key-1": "value-1", + "key-2": "value-2", + "key-3": "value-3", + "_allow_access_control": [ + "example.user@example.com", + "example group", + "example username" + ] +} +---- +// NOTCONSOLE + +[discrete#es-dls-overview-sync-type-comparison] +===== Access control sync vs content sync + +The ingestion of documents into an Elasticsearch index is known as a sync. +DLS is managed using two types of syncs: + +* *Content sync*: Ingests content into an index that starts with `search-`. + +* *Access control sync*: Separate, additional sync which ingests access control documents into index that starts with `.search-acl-filter-`. + +During a sync, the connector ingests the documents into the relevant index based on their type (content or access control). +The access control documents determine the access control policy for the content documents. + +By leveraging DLS, you can ensure that your Elasticsearch data is securely accessible to the right users or groups, based on the permissions defined in the access control documents. + +[discrete#es-dls-overview-search-time] +==== DLS at search time + +[discrete#es-dls-overview-search-time-identity-allowed] +===== When is an identity allowed to see a content document + +A user can view a document if at least one access control element in their access control document matches an item within the document's `_allow_access_control` field. + +[discrete#es-dls-overview-search-time-example] +====== Example +This section illustrates when a user has access to certain documents depending on the access control. + +One access control document: +[source,js] +---- +{ + "_id": "example.user@example.com", + "identity": { + "username": "example username", + "email": "example.user@example.com" + }, + "query": { + "template": { + "params": { + "access_control": [ + "example.user@example.com", + "example group", + "example username"] + } + }, + "source": "..." + } +} +---- +// NOTCONSOLE + +Let's see which of the following example documents these permissions can access, and why. +[source,js] +---- +{ + "_id": "some-unique-id-1", + "_allow_access_control": [ + "example.user@example.com", + "example group", + "example username" + ] +} +---- +// NOTCONSOLE + +The user `example username` will have access to this document as he's part of the corresponding group and his username and email address are also explicitly part of `_allow_access_control`. + +[source,js] +---- +{ + "_id": "some-unique-id-2", + "_allow_access_control": [ + "example group" + ] +} +---- +// NOTCONSOLE + +The user `example username` will also have access to this document as they are part of the `example group`. + +[source,js] +---- +{ + "_id": "some-unique-id-3", + "_allow_access_control": [ + "another.user@example.com" + ] +} +---- +// NOTCONSOLE + +The user `example username` won't have access to this document because their email does not match `another.user@example.com`. + +[source,js] +---- +{ + "_id": "some-unique-id-4", + "_allow_access_control": [] +} +---- +// NOTCONSOLE + +No one will have access to this document as the `_allow_access_control` field is empty. + +[discrete#es-dls-overview-multiple-connectors] +===== Querying multiple indices + +This section illustrates how to define an Elasticsearch API key that has restricted read access to multiple indices that have DLS enabled. + +A user might have multiple identities that define which documents they are allowed to read. +We can define an Elasticsearch API key with a role descriptor for each index the user has access to. + +[discrete#es-dls-overview-multiple-connectors-example] +====== Example + +Let's assume we want to create an API key that combines the following user identities: + +[source,js] +---- +GET .search-acl-filter-source1 +{ + "_id": "example.user@example.com", + "identity": { + "username": "example username", + "email": "example.user@example.com" + }, + "query": { + "template": { + "params": { + "access_control": [ + "example.user@example.com", + "source1-user-group"] + } + }, + "source": "..." + } +} +---- +// NOTCONSOLE + +[source,js] +---- +GET .search-acl-filter-source2 +{ + "_id": "example.user@example.com", + "identity": { + "username": "example username", + "email": "example.user@example.com" + }, + "query": { + "template": { + "params": { + "access_control": [ + "example.user@example.com", + "source2-user-group"] + } + }, + "source": "..." + } +} +---- +// NOTCONSOLE + +`.search-acl-filter-source1` and `.search-acl-filter-source2` define the access control identities for `source1` and `source2`. + +You can create an Elasticsearch API key using an API call like this: + +[source,console] +---- +POST /_security/api_key +{ + "name": "my-api-key", + "role_descriptors": { + "role-source1": { + "indices": [ + { + "names": ["source1"], + "privileges": ["read"], + "query": { + "template": { + "params": { + "access_control": [ + "example.user@example.com", + "source1-user-group"] + } + }, + "source": "..." + } + } + ] + }, + "role-source2": { + "indices": [ + { + "names": ["source2"], + "privileges": ["read"], + "query": { + "template": { + "params": { + "access_control": [ + "example.user@example.com", + "source2-user-group"] + } + }, + "source": "..." + } + } + ] + } + } +} + +---- +// TEST[skip:TODO] + +[discrete#es-dls-overview-multiple-connectors-workflow-guidance] +====== Workflow guidance + +We recommend relying on the connector access control sync to automate and keep documents in sync with changes to the original content source's user permissions. + +Consider setting an `expiration` time when creating an Elasticsearch API key. When `expiration` is not set, the Elasticsearch API will never expire. + +The API key can be invalidated using the {ref}/security-api-invalidate-api-key.html[Invalidate API Key API]. +Additionally, if the user's permission changes, you'll need to update or recreate the Elasticsearch API key. + +[discrete#es-dls-overview-search-time-learn-more] +===== Learn more + +* <> +* {ref}/document-level-security.html[Elasticsearch Document Level Security^] + diff --git a/docs/reference/connector/docs/dls.asciidoc b/docs/reference/connector/docs/dls.asciidoc new file mode 100644 index 0000000000000..2e8871ea9eb87 --- /dev/null +++ b/docs/reference/connector/docs/dls.asciidoc @@ -0,0 +1,39 @@ +[#es-dls] +== Document level security + +Document level security (DLS) enables you to restrict access to documents in your Elasticsearch indices according to user and group permissions. +This ensures search results only return authorized information for users, based on their permissions. + +[discrete#es-dls-availability-prerequisites] +=== Availability & prerequisites + +Support for DLS in Elastic connectors was introduced in version *8.9.0*. + +[NOTE] +==== +This feature is in *beta* and is subject to change. +The design and code is less mature than official GA features and is being provided as-is with no warranties. +Beta features are not subject to the support SLA of official GA features. +==== + +This feature is not available for all Elastic subscription levels. +Refer to the subscriptions pages for https://www.elastic.co/subscriptions/cloud[Elastic Cloud^] and https://www.elastic.co/subscriptions[Elastic Stack^]. + +DLS is available by default when using the following Elastic connectors: + +include::_connectors-list-dls.asciidoc[] + +Note that our standalone products (App Search and Workplace Search) do not use this feature. +Workplace Search has its own permissions management system. + +[discrete#es-dls-learn-more] +=== Learn more + +DLS documentation: + +* <> +* <> +* <> + +include::dls-overview.asciidoc[] +include::dls-e2e-guide.asciidoc[] diff --git a/docs/reference/connector/docs/images/analytics-collections-dashboard.png b/docs/reference/connector/docs/images/analytics-collections-dashboard.png new file mode 100644 index 0000000000000..b99fc07bc5fab Binary files /dev/null and b/docs/reference/connector/docs/images/analytics-collections-dashboard.png differ diff --git a/docs/reference/connector/docs/images/analytics-explorer-dashboard.png b/docs/reference/connector/docs/images/analytics-explorer-dashboard.png new file mode 100644 index 0000000000000..922763585d67f Binary files /dev/null and b/docs/reference/connector/docs/images/analytics-explorer-dashboard.png differ diff --git a/docs/reference/connector/docs/images/analytics-overview-dashboard.png b/docs/reference/connector/docs/images/analytics-overview-dashboard.png new file mode 100644 index 0000000000000..c088cd3994d1e Binary files /dev/null and b/docs/reference/connector/docs/images/analytics-overview-dashboard.png differ diff --git a/docs/reference/connector/docs/images/app-search-audit-log-table.png b/docs/reference/connector/docs/images/app-search-audit-log-table.png new file mode 100644 index 0000000000000..ccf9147bdb6e8 Binary files /dev/null and b/docs/reference/connector/docs/images/app-search-audit-log-table.png differ diff --git a/docs/reference/connector/docs/images/app-search-kibana-ui.png b/docs/reference/connector/docs/images/app-search-kibana-ui.png new file mode 100644 index 0000000000000..0e6b09b7f1bba Binary files /dev/null and b/docs/reference/connector/docs/images/app-search-kibana-ui.png differ diff --git a/docs/reference/connector/docs/images/app-search-settings.png b/docs/reference/connector/docs/images/app-search-settings.png new file mode 100644 index 0000000000000..9c8c31c81a6c5 Binary files /dev/null and b/docs/reference/connector/docs/images/app-search-settings.png differ diff --git a/docs/reference/connector/docs/images/app-search-standalone-ui.png b/docs/reference/connector/docs/images/app-search-standalone-ui.png new file mode 100644 index 0000000000000..f496d831b70ad Binary files /dev/null and b/docs/reference/connector/docs/images/app-search-standalone-ui.png differ diff --git a/docs/reference/connector/docs/images/basic-rule-example.png b/docs/reference/connector/docs/images/basic-rule-example.png new file mode 100644 index 0000000000000..aa1d79bb6f274 Binary files /dev/null and b/docs/reference/connector/docs/images/basic-rule-example.png differ diff --git a/docs/reference/connector/docs/images/blog-elastic-crawler-1.png b/docs/reference/connector/docs/images/blog-elastic-crawler-1.png new file mode 100644 index 0000000000000..e2e5593c3e102 Binary files /dev/null and b/docs/reference/connector/docs/images/blog-elastic-crawler-1.png differ diff --git a/docs/reference/connector/docs/images/blog-elastic-crawler-2.jpg b/docs/reference/connector/docs/images/blog-elastic-crawler-2.jpg new file mode 100644 index 0000000000000..3bc45743afbd0 Binary files /dev/null and b/docs/reference/connector/docs/images/blog-elastic-crawler-2.jpg differ diff --git a/docs/reference/connector/docs/images/blog-elastic-crawler-3.jpg b/docs/reference/connector/docs/images/blog-elastic-crawler-3.jpg new file mode 100644 index 0000000000000..6f7f4fe5c4b6d Binary files /dev/null and b/docs/reference/connector/docs/images/blog-elastic-crawler-3.jpg differ diff --git a/docs/reference/connector/docs/images/build-a-connector-workflow.png b/docs/reference/connector/docs/images/build-a-connector-workflow.png new file mode 100644 index 0000000000000..eb51863358e9a Binary files /dev/null and b/docs/reference/connector/docs/images/build-a-connector-workflow.png differ diff --git a/docs/reference/connector/docs/images/combine-engines-indices-content-sources.png b/docs/reference/connector/docs/images/combine-engines-indices-content-sources.png new file mode 100644 index 0000000000000..072f4cefff01b Binary files /dev/null and b/docs/reference/connector/docs/images/combine-engines-indices-content-sources.png differ diff --git a/docs/reference/connector/docs/images/connectors-overview.png b/docs/reference/connector/docs/images/connectors-overview.png new file mode 100644 index 0000000000000..4d0edfeb6adae Binary files /dev/null and b/docs/reference/connector/docs/images/connectors-overview.png differ diff --git a/docs/reference/connector/docs/images/convert-connector.png b/docs/reference/connector/docs/images/convert-connector.png new file mode 100644 index 0000000000000..f07886d12d7fb Binary files /dev/null and b/docs/reference/connector/docs/images/convert-connector.png differ diff --git a/docs/reference/connector/docs/images/crawler-crawl-rules.png b/docs/reference/connector/docs/images/crawler-crawl-rules.png new file mode 100644 index 0000000000000..69c97418189d3 Binary files /dev/null and b/docs/reference/connector/docs/images/crawler-crawl-rules.png differ diff --git a/docs/reference/connector/docs/images/crawler-events-logs-viewer-by-url.png b/docs/reference/connector/docs/images/crawler-events-logs-viewer-by-url.png new file mode 100644 index 0000000000000..2f05747d49398 Binary files /dev/null and b/docs/reference/connector/docs/images/crawler-events-logs-viewer-by-url.png differ diff --git a/docs/reference/connector/docs/images/crawler-events-logs-viewer.png b/docs/reference/connector/docs/images/crawler-events-logs-viewer.png new file mode 100644 index 0000000000000..758b94e808661 Binary files /dev/null and b/docs/reference/connector/docs/images/crawler-events-logs-viewer.png differ diff --git a/docs/reference/connector/docs/images/crawler-extraction-rules-content-field.png b/docs/reference/connector/docs/images/crawler-extraction-rules-content-field.png new file mode 100644 index 0000000000000..2b7b9f3d41cd9 Binary files /dev/null and b/docs/reference/connector/docs/images/crawler-extraction-rules-content-field.png differ diff --git a/docs/reference/connector/docs/images/crawler-extraction-rules-url-filters.png b/docs/reference/connector/docs/images/crawler-extraction-rules-url-filters.png new file mode 100644 index 0000000000000..11be61bcce8fa Binary files /dev/null and b/docs/reference/connector/docs/images/crawler-extraction-rules-url-filters.png differ diff --git a/docs/reference/connector/docs/images/crawler-extraction-rules.png b/docs/reference/connector/docs/images/crawler-extraction-rules.png new file mode 100644 index 0000000000000..175f18e2eaf66 Binary files /dev/null and b/docs/reference/connector/docs/images/crawler-extraction-rules.png differ diff --git a/docs/reference/connector/docs/images/crawler-iteration-cycle.png b/docs/reference/connector/docs/images/crawler-iteration-cycle.png new file mode 100644 index 0000000000000..f013bd2ed0dcd Binary files /dev/null and b/docs/reference/connector/docs/images/crawler-iteration-cycle.png differ diff --git a/docs/reference/connector/docs/images/crawler-proxy-schematic.png b/docs/reference/connector/docs/images/crawler-proxy-schematic.png new file mode 100644 index 0000000000000..524182d2f6643 Binary files /dev/null and b/docs/reference/connector/docs/images/crawler-proxy-schematic.png differ diff --git a/docs/reference/connector/docs/images/crawler-proxy-validation.png b/docs/reference/connector/docs/images/crawler-proxy-validation.png new file mode 100644 index 0000000000000..61f268f83f209 Binary files /dev/null and b/docs/reference/connector/docs/images/crawler-proxy-validation.png differ diff --git a/docs/reference/connector/docs/images/crawler-scheduling-multiple-crawls.png b/docs/reference/connector/docs/images/crawler-scheduling-multiple-crawls.png new file mode 100644 index 0000000000000..fcddae8dd1d04 Binary files /dev/null and b/docs/reference/connector/docs/images/crawler-scheduling-multiple-crawls.png differ diff --git a/docs/reference/connector/docs/images/crawler-scheduling.png b/docs/reference/connector/docs/images/crawler-scheduling.png new file mode 100644 index 0000000000000..f67a0d6b5fb5d Binary files /dev/null and b/docs/reference/connector/docs/images/crawler-scheduling.png differ diff --git a/docs/reference/connector/docs/images/discover-data-view-analytics.png b/docs/reference/connector/docs/images/discover-data-view-analytics.png new file mode 100644 index 0000000000000..676ed40098e99 Binary files /dev/null and b/docs/reference/connector/docs/images/discover-data-view-analytics.png differ diff --git a/docs/reference/connector/docs/images/discover-lens-analytics.png b/docs/reference/connector/docs/images/discover-lens-analytics.png new file mode 100644 index 0000000000000..89701eca60bad Binary files /dev/null and b/docs/reference/connector/docs/images/discover-lens-analytics.png differ diff --git a/docs/reference/connector/docs/images/dls-api-key-workflow.png b/docs/reference/connector/docs/images/dls-api-key-workflow.png new file mode 100644 index 0000000000000..a3bae143aa57e Binary files /dev/null and b/docs/reference/connector/docs/images/dls-api-key-workflow.png differ diff --git a/docs/reference/connector/docs/images/document-enrichment-add-inference-pipeline.png b/docs/reference/connector/docs/images/document-enrichment-add-inference-pipeline.png new file mode 100644 index 0000000000000..ddcf42e24ab83 Binary files /dev/null and b/docs/reference/connector/docs/images/document-enrichment-add-inference-pipeline.png differ diff --git a/docs/reference/connector/docs/images/document-enrichment-diagram.png b/docs/reference/connector/docs/images/document-enrichment-diagram.png new file mode 100644 index 0000000000000..89ae1d45e24d4 Binary files /dev/null and b/docs/reference/connector/docs/images/document-enrichment-diagram.png differ diff --git a/docs/reference/connector/docs/images/elser-deploy-model.png b/docs/reference/connector/docs/images/elser-deploy-model.png new file mode 100644 index 0000000000000..46f5e8cc7229a Binary files /dev/null and b/docs/reference/connector/docs/images/elser-deploy-model.png differ diff --git a/docs/reference/connector/docs/images/elser-model-deployment.png b/docs/reference/connector/docs/images/elser-model-deployment.png new file mode 100644 index 0000000000000..1bcae4c85a5e1 Binary files /dev/null and b/docs/reference/connector/docs/images/elser-model-deployment.png differ diff --git a/docs/reference/connector/docs/images/elser-model-started.png b/docs/reference/connector/docs/images/elser-model-started.png new file mode 100644 index 0000000000000..c533f7b5123fb Binary files /dev/null and b/docs/reference/connector/docs/images/elser-model-started.png differ diff --git a/docs/reference/connector/docs/images/elser-pipeline-model-selection.png b/docs/reference/connector/docs/images/elser-pipeline-model-selection.png new file mode 100644 index 0000000000000..986071e77b36a Binary files /dev/null and b/docs/reference/connector/docs/images/elser-pipeline-model-selection.png differ diff --git a/docs/reference/connector/docs/images/elser-start-model.png b/docs/reference/connector/docs/images/elser-start-model.png new file mode 100644 index 0000000000000..81cdfa0eb58a0 Binary files /dev/null and b/docs/reference/connector/docs/images/elser-start-model.png differ diff --git a/docs/reference/connector/docs/images/enable-rbac-app-search.png b/docs/reference/connector/docs/images/enable-rbac-app-search.png new file mode 100644 index 0000000000000..11ef21d55f07f Binary files /dev/null and b/docs/reference/connector/docs/images/enable-rbac-app-search.png differ diff --git a/docs/reference/connector/docs/images/enable-rbac-workplace-search.png b/docs/reference/connector/docs/images/enable-rbac-workplace-search.png new file mode 100644 index 0000000000000..45205d23cddfd Binary files /dev/null and b/docs/reference/connector/docs/images/enable-rbac-workplace-search.png differ diff --git a/docs/reference/connector/docs/images/filtering-general-diagram.png b/docs/reference/connector/docs/images/filtering-general-diagram.png new file mode 100644 index 0000000000000..ce1a9367d7b57 Binary files /dev/null and b/docs/reference/connector/docs/images/filtering-general-diagram.png differ diff --git a/docs/reference/connector/docs/images/filtering-rules-zero-state.png b/docs/reference/connector/docs/images/filtering-rules-zero-state.png new file mode 100644 index 0000000000000..fa43c4cc5a71c Binary files /dev/null and b/docs/reference/connector/docs/images/filtering-rules-zero-state.png differ diff --git a/docs/reference/connector/docs/images/hybrid-architecture.png b/docs/reference/connector/docs/images/hybrid-architecture.png new file mode 100644 index 0000000000000..81d19179db3e2 Binary files /dev/null and b/docs/reference/connector/docs/images/hybrid-architecture.png differ diff --git a/docs/reference/connector/docs/images/kibana-setup-guide.png b/docs/reference/connector/docs/images/kibana-setup-guide.png new file mode 100644 index 0000000000000..2797472933102 Binary files /dev/null and b/docs/reference/connector/docs/images/kibana-setup-guide.png differ diff --git a/docs/reference/connector/docs/images/kibana-ui.png b/docs/reference/connector/docs/images/kibana-ui.png new file mode 100644 index 0000000000000..4371f3a1052aa Binary files /dev/null and b/docs/reference/connector/docs/images/kibana-ui.png differ diff --git a/docs/reference/connector/docs/images/ldap-login.png b/docs/reference/connector/docs/images/ldap-login.png new file mode 100644 index 0000000000000..b7dd2b9fce5fb Binary files /dev/null and b/docs/reference/connector/docs/images/ldap-login.png differ diff --git a/docs/reference/connector/docs/images/mongodb-connector-config.png b/docs/reference/connector/docs/images/mongodb-connector-config.png new file mode 100644 index 0000000000000..2c4d2e2158908 Binary files /dev/null and b/docs/reference/connector/docs/images/mongodb-connector-config.png differ diff --git a/docs/reference/connector/docs/images/mongodb-load-sample-data.png b/docs/reference/connector/docs/images/mongodb-load-sample-data.png new file mode 100644 index 0000000000000..f7bc9c4192b02 Binary files /dev/null and b/docs/reference/connector/docs/images/mongodb-load-sample-data.png differ diff --git a/docs/reference/connector/docs/images/mongodb-sample-document.png b/docs/reference/connector/docs/images/mongodb-sample-document.png new file mode 100644 index 0000000000000..f462c41ad751c Binary files /dev/null and b/docs/reference/connector/docs/images/mongodb-sample-document.png differ diff --git a/docs/reference/connector/docs/images/oidc-login.png b/docs/reference/connector/docs/images/oidc-login.png new file mode 100644 index 0000000000000..37753acc8a0f6 Binary files /dev/null and b/docs/reference/connector/docs/images/oidc-login.png differ diff --git a/docs/reference/connector/docs/images/pipeline-copy-customize.png b/docs/reference/connector/docs/images/pipeline-copy-customize.png new file mode 100644 index 0000000000000..1f2bf99aa4f16 Binary files /dev/null and b/docs/reference/connector/docs/images/pipeline-copy-customize.png differ diff --git a/docs/reference/connector/docs/images/pipelines-extraction-sync-rules.png b/docs/reference/connector/docs/images/pipelines-extraction-sync-rules.png new file mode 100644 index 0000000000000..1a491a6bf95bb Binary files /dev/null and b/docs/reference/connector/docs/images/pipelines-extraction-sync-rules.png differ diff --git a/docs/reference/connector/docs/images/pki-login-screen.png b/docs/reference/connector/docs/images/pki-login-screen.png new file mode 100644 index 0000000000000..9fec19564adb3 Binary files /dev/null and b/docs/reference/connector/docs/images/pki-login-screen.png differ diff --git a/docs/reference/connector/docs/images/saml-login.png b/docs/reference/connector/docs/images/saml-login.png new file mode 100644 index 0000000000000..f8d5771363efc Binary files /dev/null and b/docs/reference/connector/docs/images/saml-login.png differ diff --git a/docs/reference/connector/docs/images/search-applications-create.png b/docs/reference/connector/docs/images/search-applications-create.png new file mode 100644 index 0000000000000..cce31b985ad82 Binary files /dev/null and b/docs/reference/connector/docs/images/search-applications-create.png differ diff --git a/docs/reference/connector/docs/images/search-applications-docs-explorer.png b/docs/reference/connector/docs/images/search-applications-docs-explorer.png new file mode 100644 index 0000000000000..d9b2cfa05f986 Binary files /dev/null and b/docs/reference/connector/docs/images/search-applications-docs-explorer.png differ diff --git a/docs/reference/connector/docs/images/search-applications-unified-search.png b/docs/reference/connector/docs/images/search-applications-unified-search.png new file mode 100644 index 0000000000000..2eca235b2d968 Binary files /dev/null and b/docs/reference/connector/docs/images/search-applications-unified-search.png differ diff --git a/docs/reference/connector/docs/images/select-ingestion-method.png b/docs/reference/connector/docs/images/select-ingestion-method.png new file mode 100644 index 0000000000000..29dc3630e1237 Binary files /dev/null and b/docs/reference/connector/docs/images/select-ingestion-method.png differ diff --git a/docs/reference/connector/docs/images/self-managed-architecture.png b/docs/reference/connector/docs/images/self-managed-architecture.png new file mode 100644 index 0000000000000..c06248acf7d81 Binary files /dev/null and b/docs/reference/connector/docs/images/self-managed-architecture.png differ diff --git a/docs/reference/connector/docs/images/simple-rule-equals.png b/docs/reference/connector/docs/images/simple-rule-equals.png new file mode 100644 index 0000000000000..5dd5e43427ea5 Binary files /dev/null and b/docs/reference/connector/docs/images/simple-rule-equals.png differ diff --git a/docs/reference/connector/docs/images/simple-rule-greater.png b/docs/reference/connector/docs/images/simple-rule-greater.png new file mode 100644 index 0000000000000..5dd78897c6154 Binary files /dev/null and b/docs/reference/connector/docs/images/simple-rule-greater.png differ diff --git a/docs/reference/connector/docs/images/simple-rule-regex.png b/docs/reference/connector/docs/images/simple-rule-regex.png new file mode 100644 index 0000000000000..adb8be4965464 Binary files /dev/null and b/docs/reference/connector/docs/images/simple-rule-regex.png differ diff --git a/docs/reference/connector/docs/images/sync-rules-advanced-rules-appeared.png b/docs/reference/connector/docs/images/sync-rules-advanced-rules-appeared.png new file mode 100644 index 0000000000000..e3a49b40212d8 Binary files /dev/null and b/docs/reference/connector/docs/images/sync-rules-advanced-rules-appeared.png differ diff --git a/docs/reference/connector/docs/images/sync-rules-applied-rules-during-sync.png b/docs/reference/connector/docs/images/sync-rules-applied-rules-during-sync.png new file mode 100644 index 0000000000000..7cb713277a77b Binary files /dev/null and b/docs/reference/connector/docs/images/sync-rules-applied-rules-during-sync.png differ diff --git a/docs/reference/connector/docs/images/sync-rules-detail-view-button.png b/docs/reference/connector/docs/images/sync-rules-detail-view-button.png new file mode 100644 index 0000000000000..f24daea0d4351 Binary files /dev/null and b/docs/reference/connector/docs/images/sync-rules-detail-view-button.png differ diff --git a/docs/reference/connector/docs/images/sync-rules-draft-new-rules.png b/docs/reference/connector/docs/images/sync-rules-draft-new-rules.png new file mode 100644 index 0000000000000..c7386659b2b09 Binary files /dev/null and b/docs/reference/connector/docs/images/sync-rules-draft-new-rules.png differ diff --git a/docs/reference/connector/docs/images/sync-rules-extract-all-at-once.png b/docs/reference/connector/docs/images/sync-rules-extract-all-at-once.png new file mode 100644 index 0000000000000..4b2235bf07967 Binary files /dev/null and b/docs/reference/connector/docs/images/sync-rules-extract-all-at-once.png differ diff --git a/docs/reference/connector/docs/images/sync-rules-new-rule-applied.png b/docs/reference/connector/docs/images/sync-rules-new-rule-applied.png new file mode 100644 index 0000000000000..936b7b98b7cda Binary files /dev/null and b/docs/reference/connector/docs/images/sync-rules-new-rule-applied.png differ diff --git a/docs/reference/connector/docs/images/sync-rules-pagination.png b/docs/reference/connector/docs/images/sync-rules-pagination.png new file mode 100644 index 0000000000000..3cb52f134081c Binary files /dev/null and b/docs/reference/connector/docs/images/sync-rules-pagination.png differ diff --git a/docs/reference/connector/docs/images/sync-rules-paste-aggregation-pipeline.png b/docs/reference/connector/docs/images/sync-rules-paste-aggregation-pipeline.png new file mode 100644 index 0000000000000..697759beeb2b9 Binary files /dev/null and b/docs/reference/connector/docs/images/sync-rules-paste-aggregation-pipeline.png differ diff --git a/docs/reference/connector/docs/images/sync-rules-rules-fulfilling-properties.png b/docs/reference/connector/docs/images/sync-rules-rules-fulfilling-properties.png new file mode 100644 index 0000000000000..3245d4ca7a2ae Binary files /dev/null and b/docs/reference/connector/docs/images/sync-rules-rules-fulfilling-properties.png differ diff --git a/docs/reference/connector/docs/images/sync-rules-save-and-validate-draft.png b/docs/reference/connector/docs/images/sync-rules-save-and-validate-draft.png new file mode 100644 index 0000000000000..15c0808159a18 Binary files /dev/null and b/docs/reference/connector/docs/images/sync-rules-save-and-validate-draft.png differ diff --git a/docs/reference/connector/docs/images/sync-rules-time-dimension.png b/docs/reference/connector/docs/images/sync-rules-time-dimension.png new file mode 100644 index 0000000000000..003dd4bef1f0b Binary files /dev/null and b/docs/reference/connector/docs/images/sync-rules-time-dimension.png differ diff --git a/docs/reference/connector/docs/images/use-a-connector-workflow.png b/docs/reference/connector/docs/images/use-a-connector-workflow.png new file mode 100644 index 0000000000000..eb51863358e9a Binary files /dev/null and b/docs/reference/connector/docs/images/use-a-connector-workflow.png differ diff --git a/docs/reference/connector/docs/images/workplace-search-create-role-mapping.png b/docs/reference/connector/docs/images/workplace-search-create-role-mapping.png new file mode 100644 index 0000000000000..92c22b80ea30d Binary files /dev/null and b/docs/reference/connector/docs/images/workplace-search-create-role-mapping.png differ diff --git a/docs/reference/connector/docs/images/workplace-search-kibana-ui.png b/docs/reference/connector/docs/images/workplace-search-kibana-ui.png new file mode 100644 index 0000000000000..ea5f4fcfca4df Binary files /dev/null and b/docs/reference/connector/docs/images/workplace-search-kibana-ui.png differ diff --git a/docs/reference/connector/docs/images/workplace-search-standalone-ui.png b/docs/reference/connector/docs/images/workplace-search-standalone-ui.png new file mode 100644 index 0000000000000..b3a1e12e63fa3 Binary files /dev/null and b/docs/reference/connector/docs/images/workplace-search-standalone-ui.png differ diff --git a/docs/reference/connector/docs/index.asciidoc b/docs/reference/connector/docs/index.asciidoc new file mode 100644 index 0000000000000..481e124a1a117 --- /dev/null +++ b/docs/reference/connector/docs/index.asciidoc @@ -0,0 +1,130 @@ +[#es-connectors] += Ingest content with Elastic connectors +++++ +Connectors +++++ + +.Connectors documentation history +**** +Please note that the connectors documentation lived in the https://www.elastic.co/guide/en/enterprise-search/8.15/connectors-references.html[Enterprise Search documentation] prior to version 8.16.0. +**** + +A _connector_ is a type of https://www.elastic.co/integrations/data-integrations[Elastic integration^] that syncs content from an original data source to an *Elasticsearch index*. +Connectors enable you to create _searchable_, read-only replicas of your data sources. + +[IMPORTANT] +==== +These connectors are focused on general content, which is non-timestamped data. +Refer to https://www.elastic.co/guide/en/cloud/current/ec-cloud-ingest-data.html[add data to {es}] if you're interested in ingesting timestamped data. +==== + +Connectors extract the original files, records, or objects; and transforms them into Elasticsearch documents. + +Many connectors are available out-of-the-box on Elastic Cloud. +You can also access the source code for these (and additional) connectors, and run them on your own infrastructure. + +* **Managed connectors** are available directly within your Elastic Cloud deployment. +* **Self-managed connectors** are self-managed on your own infrastructure. + +[discrete#es-connectors-native] +== Elastic managed connectors {ess-icon} + +_Managed connectors_ are available directly within your Elastic Cloud deployment, as a managed service. +No additional infrastructure is required. + +Please note that Elastic managed connectors were previously known as "native connectors". + +Refer to <> for details on how to configure and use Elastic managed connectors. + +.*Expand* for list of available Elastic managed connectors +[%collapsible] +==== +include::_connectors-list-native.asciidoc[] +==== + +[discrete#es-connectors-build] +== Self-managed connectors + +Self-managed connectors enable you to run connectors locally on your own infrastructure. +This means you can try out new connectors before they are available natively within Elastic Cloud, and/or customize existing connectors. + +Please note that self-managed connectors were previously known as "connector clients". + +Refer to <> for details on how to deploy self-managed connectors. + +.*Expand* for list of available self-managed connectors +[%collapsible] +==== +include::_connectors-list-clients.asciidoc[] +==== + +[discrete#es-connectors-overview-framework] +== Connector framework + +All Elastic connectors are built using our Python connector framework. +The source code is available in the {connectors-python}[`elastic/connectors`] repository on GitHub. + +The connector framework is available for developers to customize existing self-managed connectors or build their own connectors. +Refer to <> for details. + +[discrete#es-connectors-overview-diagram] +== Connectors overview diagram + +The following diagram provides a high-level overview of the Elastic connectors offering and some key facts. + +image::connectors-overview.png[align="center",width="100%"] + +[discrete#es-connectors-overview-available-connectors] +== Available connectors and feature support + +include::_connectors-overview-table.asciidoc[] + + +:connectors-branch: {branch} + +ifeval::['{branch}' == 'master'] +:connectors-branch: main +endif::[] + +:connectors-python: https://github.com/elastic/connectors/tree/{connectors-branch} +:connectors-ruby: https://github.com/elastic/connectors-ruby/tree/{connectors-branch} + +include::connectors-refs.asciidoc[] + + +include::connectors-self-managed.asciidoc[] +include::connectors-run-from-docker.asciidoc[] +include::connectors-run-from-source.asciidoc[] +include::connectors-docker-compose-quickstart.asciidoc[] +include::postgresql-connector-client-tutorial.asciidoc[] + + +include::connectors-managed-service.asciidoc[] +include::connectors-hosted-tutorial-mongo.asciidoc[] + +include::connectors-framework.asciidoc[] + +include::connectors-usage.asciidoc[] + +include::connectors-APIs.asciidoc[] +include::connectors-API-tutorial.asciidoc[] +include::connectors-content-syncs.asciidoc[] +include::connectors-filter-extract-transform.asciidoc[] +include::connectors-content-extraction.asciidoc[] +include::sync-rules.asciidoc[] + +include::dls.asciidoc[] + + +include::connectors-management.asciidoc[] +include::connectors-scalability.asciidoc[] +include::connectors-security.asciidoc[] +include::connectors-troubleshooting.asciidoc[] +include::connectors-logs.asciidoc[] + +include::connectors-use-cases.asciidoc[] + + +include::connectors-release-notes.asciidoc[] +include::connectors-known-issues.asciidoc[] + diff --git a/docs/reference/connector/docs/postgresql-connector-client-tutorial.asciidoc b/docs/reference/connector/docs/postgresql-connector-client-tutorial.asciidoc new file mode 100644 index 0000000000000..cf8aac9c689ca --- /dev/null +++ b/docs/reference/connector/docs/postgresql-connector-client-tutorial.asciidoc @@ -0,0 +1,234 @@ +[#es-postgresql-connector-client-tutorial] +=== PostgreSQL self-managed connector tutorial +++++ +Tutorial +++++ + +This tutorial walks you through the process of creating a self-managed connector for a PostgreSQL data source. +You'll be using the <> workflow in the Kibana UI. +This means you'll be deploying the connector on your own infrastructure. +Refer to the <> for more information about this connector. + +You'll use the {connectors-python}[connector framework^] to create the connector. +In this exercise, you'll be working in both the terminal (or your IDE) and the Kibana UI. + +If you want to deploy a self-managed connector for another data source, use this tutorial as a blueprint. +Refer to the list of available <>. + +[TIP] +==== +Want to get started quickly testing a self-managed connector using Docker Compose? +Refer to this https://github.com/elastic/connectors/tree/main/scripts/stack#readme[README] in the `elastic/connectors` repo for more information. +==== + +[discrete#es-postgresql-connector-client-tutorial-prerequisites] +==== Prerequisites + +[discrete#es-postgresql-connector-client-tutorial-prerequisites-elastic] +===== Elastic prerequisites + +First, ensure you satisfy the <> for self-managed connectors. + +[discrete#es-postgresql-connector-client-tutorial-postgresql-prerequisites] +===== PostgreSQL prerequisites + +You need: + +* PostgreSQL version 11+. +* Tables must be owned by a PostgreSQL user. +* Database `superuser` privileges are required to index all database tables. + +[TIP] +==== +You should enable recording of the commit time of PostgreSQL transactions. +Otherwise, _all_ data will be indexed in every sync. +By default, `track_commit_timestamp` is `off`. + +Enable this by running the following command on the PosgreSQL server command line: + +[source,shell] +---- +ALTER SYSTEM SET track_commit_timestamp = on; +---- + +Then restart the PostgreSQL server. +==== + +[discrete#es-postgresql-connector-client-tutorial-steps] +==== Steps + +To complete this tutorial, you'll need to complete the following steps: + +. <> +. <> +. <> +. <> + +[discrete#es-postgresql-connector-client-tutorial-create-index] +==== Create an Elasticsearch index + +Elastic connectors enable you to create searchable, read-only replicas of your data sources in Elasticsearch. +The first step in setting up your self-managed connector is to create an index. + +In the {kibana-ref}[Kibana^] UI go to *Search > Content > Elasticsearch indices*. + +Create a new connector index: + +. Under *Select an ingestion method* choose *Connector*. +. Choose *PostgreSQL* from the list of connectors. +. Name your index and optionally change the language analyzer to match the human language of your data source. +(The index name you provide is automatically prefixed with `search-`.) +. Save your changes. + +The index is created and ready to configure. + +[discrete#es-postgresql-connector-client-tutorial-gather-elastic-details] +.Gather Elastic details +**** +Before you can configure the connector, you need to gather some details about your Elastic deployment: + +* *Elasticsearch endpoint*. +** If you're an Elastic Cloud user, find your deployment’s Elasticsearch endpoint in the Cloud UI under *Cloud > Deployments > > Elasticsearch*. +** If you're running your Elastic deployment and the connector service in Docker, the default Elasticsearch endpoint is `http://host.docker.internal:9200`. +* *API key.* +You'll need this key to configure the connector. +Use an existing key or create a new one. +* *Connector ID*. +Your unique connector ID is automatically generated when you create the connector. +Find this in the Kibana UI. +**** + +[discrete#es-postgresql-connector-client-tutorial-setup-connector] +==== Set up the connector + +Once you've created an index, you can set up the connector. +You will be guided through this process in the UI. + +. *Edit the name and description for the connector.* +This will help your team identify the connector. +. *Clone and edit the connector service code.* +For this example, we'll use the {connectors-python}[Python framework^]. +Follow these steps: +** Clone or fork that repository locally with the following command: `git clone https://github.com/elastic/connectors`. +** Open the `config.yml` configuration file in your editor of choice. +** Replace the values for `host`, `api_key`, and `connector_id` with the values you gathered <>. +Use the `service_type` value `postgresql` for this connector. ++ +.*Expand* to see an example `config.yml` file +[%collapsible] +==== +Replace the values for `host`, `api_key`, and `connector_id` with your own values. +Use the `service_type` value `postgresql` for this connector. +[source,yaml] +---- +elasticsearch: + host: > # Your Elasticsearch endpoint + api_key: '' # Your top-level Elasticsearch API key +... +connectors: + - + connector_id: "" + api_key: "'" # Your scoped connector index API key (optional). If not provided, the top-level API key is used. + service_type: "postgresql" + + + +# Self-managed connector settings +connector_id: '' # Your connector ID +service_type: 'postgresql' # The service type for your connector + +sources: + # mongodb: connectors.sources.mongo:MongoDataSource + # s3: connectors.sources.s3:S3DataSource + # dir: connectors.sources.directory:DirectoryDataSource + # mysql: connectors.sources.mysql:MySqlDataSource + # network_drive: connectors.sources.network_drive:NASDataSource + # google_cloud_storage: connectors.sources.google_cloud_storage:GoogleCloudStorageDataSource + # azure_blob_storage: connectors.sources.azure_blob_storage:AzureBlobStorageDataSource + postgresql: connectors.sources.postgresql:PostgreSQLDataSource + # oracle: connectors.sources.oracle:OracleDataSource + # sharepoint: connectors.sources.sharepoint:SharepointDataSource + # mssql: connectors.sources.mssql:MSSQLDataSource + # jira: connectors.sources.jira:JiraDataSource +---- +==== + +[discrete#es-postgresql-connector-client-tutorial-run-connector-service] +==== Run the connector service + +Now that you've configured the connector code, you can run the connector service. + +In your terminal or IDE: + +. `cd` into the root of your `connectors` clone/fork. +. Run the following command: `make run`. + +The connector service should now be running. +The UI will let you know that the connector has successfully connected to Elasticsearch. + +Here we're working locally. +In production setups, you'll deploy the connector service to your own infrastructure. +If you prefer to use Docker, refer to the {connectors-python}/docs/DOCKER.md[repo docs^] for instructions. + +[discrete#es-postgresql-connector-client-tutorial-sync-data-source] +==== Sync your PostgreSQL data source + +[discrete#es-postgresql-connector-client-tutorial-sync-data-source-details] +===== Enter your PostgreSQL data source details + +Once you've configured the connector, you can use it to index your data source. + +You can now enter your PostgreSQL instance details in the Kibana UI. + +Enter the following information: + +* *Host*. +Server host address for your PostgreSQL instance. +* *Port*. +Port number for your PostgreSQL instance. +* *Username*. +Username of the PostgreSQL account. +* *Password*. +Password for that user. +* *Database*. +Name of the PostgreSQL database. +* *Comma-separated list of tables*. +`*` will fetch data from all tables in the configured database. + +Once you've entered all these details, select *Save configuration*. + +[discrete#es-postgresql-connector-client-tutorial-sync-data-source-launch-sync] +===== Launch a sync + +If you navigate to the *Overview* tab in the Kibana UI, you can see the connector's _ingestion status_. +This should now have changed to *Configured*. + +It's time to launch a sync by selecting the *Sync* button. + +If you navigate to the terminal window where you're running the connector service, you should see output like the following: + +[source,shell] +---- +[FMWK][13:22:26][INFO] Fetcher +[FMWK][13:22:26][INF0] Fetcher +[FMWK][13:22:26][INFO] Fetcher +... +[FMWK][23:22:28][INF0] [oRXQwYYBLhXTs-qYpJ9i] Sync done: 3864 indexed, 0 deleted. +(27 seconds) +---- + +This confirms the connector has fetched records from your PostgreSQL table(s) and transformed them into documents in your Elasticsearch index. + +Verify your Elasticsearch documents in the *Documents* tab in the Kibana UI. + +If you're happy with the results, set a recurring sync schedule in the *Scheduling* tab. +This will ensure your _searchable_ data in Elasticsearch is always up to date with changes to your PostgreSQL data source. + +[discrete#es-postgresql-connector-client-tutorial-learn-more] +==== Learn more + +* <> +* {connectors-python}[Elastic connector framework repository^] +* <> +* <> +* <> \ No newline at end of file diff --git a/docs/reference/connector/docs/sync-rules.asciidoc b/docs/reference/connector/docs/sync-rules.asciidoc new file mode 100644 index 0000000000000..9b2a77be7db03 --- /dev/null +++ b/docs/reference/connector/docs/sync-rules.asciidoc @@ -0,0 +1,327 @@ +[#es-sync-rules] +=== Connector sync rules +++++ +Sync rules +++++ + +Use connector sync rules to help control which documents are synced between the third-party data source and Elasticsearch. +Define sync rules in the Kibana UI for each connector index, under the `Sync rules` tab for the index. + +Sync rules apply to <> and <>. + +[discrete#es-sync-rules-availability-prerequisites] +==== Availability and prerequisites + +In Elastic versions *8.8.0 and later* all connectors have support for _basic_ sync rules. + +Some connectors support _advanced_ sync rules. +Learn more in the <>. + +[discrete#es-sync-rules-types] +==== Types of sync rule + +There are two types of sync rule: + +* **Basic sync rules** - these rules are represented in a table-like view. +Basic sync rules are identical for all connectors. +* **Advanced sync rules** - these rules cover complex query-and-filter scenarios that cannot be expressed with basic sync rules. +Advanced sync rules are defined through a _source-specific_ DSL JSON snippet. + +[.screenshot] +image::images/filtering-rules-zero-state.png[Sync rules tab] + +[discrete#es-sync-rules-general-filtering] +==== General data filtering concepts + +Before discussing sync rules, it's important to establish a basic understanding of _data filtering_ concepts. +The following diagram shows that data filtering can occur in several different processes/locations. + +[.screenshot] +image::images/filtering-general-diagram.png[Filtering] + +In this documentation we will focus on remote and integration filtering. +Sync rules can be used to modify both of these. + +[discrete#es-sync-rules-general-filtering-remote] +===== Remote filtering + +Data might be filtered at its source. +We call this *remote filtering*, as the filtering process is external to Elastic. + +[discrete#es-sync-rules-general-filtering-integration] +===== Integration filtering + +*Integration filtering* acts as a bridge between the original data source and Elasticsearch. +Filtering that takes place in connectors is an example of integration filtering. + +[discrete#es-sync-rules-general-filtering-pipeline] +===== Pipeline filtering + +Finally, Elasticsearch can filter data right _before persistence_ using {ref}/ingest-pipeline-search.html[ingest pipelines]. +We will not focus on ingest pipeline filtering in this guide. + +[NOTE] +==== +Currently, basic sync rules are the only way to control _integration filtering_ for connectors. +Remember that remote filtering extends far beyond the scope of connectors alone. +For best results, collaborate with the owners and maintainers of your data source. +Ensure the source data is well-organized and optimized for the query types made by the connectors. +==== + +[discrete#es-sync-rules-overview] +==== Sync rules overview + +In most cases, your data lake will contain far more data than you want to expose to end users. +For example, you may want to search a product catalog, but not include vendor contact information, even if the two are co-located for business purposes. + +The optimal time to filter data is _early_ in the data pipeline. +There are two main reasons: + +* *Performance*: +It's more efficient to send a query to the backing data source than to obtain all the data and then filter it in the connector. +It's faster to send a smaller dataset over a network and to process it on the connector side. +* *Security*: +Query-time filtering is applied on the data source side, so the data is not sent over the network and into the connector, which limits the exposure of your data. + +In a perfect world, all filtering would be done as remote filtering. + +In practice, however, this is not always possible. +Some sources do not allow robust remote filtering. +Others do, but require special setup (building indexes on specific fields, tweaking settings, etc.) that may require attention from other stakeholders in your organization. + +With this in mind, sync rules were designed to modify both remote filtering and integration filtering. +Your goal should be to do as much remote filtering as possible, but integration is a perfectly viable fall-back. +By definition, remote filtering is applied before the data is obtained from a third-party source. +Integration filtering is applied after the data is obtained from a third-party source, but before it is ingested into the Elasticsearch index. + +[NOTE] +==== +All sync rules are applied to a given document _before_ any {ref}/ingest-pipeline-search.html[ingest pipelines] are run on that document. +Therefore, you can use ingest pipelines for any processing that must occur _after_ integration filtering has occurred. +==== + +[NOTE] +==== +If a sync rule is added, edited or removed, it will only take effect after the next full sync. +==== + +[discrete#es-sync-rules-basic] +==== Basic sync rules + +Each basic sync rules can be one of two "policies": `include` and `exclude`. +`Include` rules are used to include the documents that "match" the specified condition. +`Exclude` rules are used to exclude the documents that "match" the specified condition. + +A "match" is determined based on a condition defined by a combination of "field", "rule", and "value". + +The `Field` column should be used to define which field on a given document should be considered. + +The following rules are available in the `Rule` column: + +* `equals` - The field value is equal to the specified value. +* `starts_with` - The field value starts with the specified (string) value. +* `ends_with` - The field value ends with the specified (string) value. +* `contains` - The field value includes the specified (string) value. +* `regex` - The field value matches the specified https://en.wikipedia.org/wiki/Regular_expression[regular expression^]. +* `>` - The field value is greater than the specified value. +* `<` - The field value is less than the specified value. + +Finally, the `Value` column is dependent on: + +* the data type in the specified "field" +* which "rule" was selected. + +For example, if a value of `[A-Z]{2}` might make sense for a `regex` rule, but much less so for a `>` rule. +Similarly, you probably wouldn't have a value of `espresso` when operating on an `ip_address` field, but perhaps you would for a `beverage` field. + +[discrete#es-sync-rules-basic-examples] +===== Basic sync rules examples + +[discrete#es-sync-rules-basic-examples-1] +====== Example 1 + +Exclude all documents that have an `ID` field with the value greater than 1000. + +[.screenshot] +image::images/simple-rule-greater.png[Simple greater than rule] + +[discrete#es-sync-rules-basic-examples-2] +====== Example 2 + +Exclude all documents that have a `state` field that matches a specified regex. + +[.screenshot] +image::images/simple-rule-regex.png[Simple regex rule] + +[discrete#es-sync-rules-performance-implications] +===== Performance implications + +- If you're relying solely on basic sync rules in the integration filtering phase the connector will fetch *all* the data from the data source +- For data sources without automatic pagination, or similar optimizations, fetching all the data can lead to memory issues. +For example, loading datasets which are too big to fit in memory at once. + +[NOTE] +==== +The native MongoDB connector provided by Elastic uses pagination and therefore has optimized performance. +Keep in mind that custom community-built self-managed connectors may not have these performance optimizations. +==== + +The following diagrams illustrate the concept of pagination. +A huge data set may not fit into a connector instance's memory. +Splitting data into smaller chunks reduces the risk of out-of-memory errors. + +This diagram illustrates an entire dataset being extracted at once: +[.screenshot] +image::images/sync-rules-extract-all-at-once.png[Extract whole dataset at once] + +By comparison, this diagram illustrates a paginated dataset: + +[.screenshot] +image::images/sync-rules-pagination.png[Pagination] + +[discrete#es-sync-rules-advanced] +==== Advanced sync rules + +[IMPORTANT] +==== +Advanced sync rules overwrite any remote filtering query that could have been inferred from the basic sync rules. +If an advanced sync rule is defined, any defined basic sync rules will be used exclusively for integration filtering. +==== + +Advanced sync rules are only used in remote filtering. +You can think of advanced sync rules as a language-agnostic way to represent queries to the data source. +Therefore, these rules are highly *source-specific*. + +The following connectors support advanced sync rules: + +include::_connectors-list-advanced-rules.asciidoc[] + +Each connector supporting advanced sync rules provides its own DSL to specify rules. +Refer to the documentation for <> for details. + +[discrete#es-interplay-basic-rules-advanced-rules] +==== Combining basic and advanced sync rules + +You can also use basic sync rules and advanced sync rules together to filter a data set. + +The following diagram provides an overview of the order in which advanced sync rules, basic sync rules, and pipeline filtering, are applied to your documents: + +[.screenshot] +image::images/sync-rules-time-dimension.png[Sync Rules: What is applied when?] + +[discrete#es-example-interplay-basic-rules-advanced-rules] +===== Example + +In the following example we want to filter a data set containing apartments to only contain apartments with specific properties. +We'll use basic and advanced sync rules throughout the example. + +A sample apartment looks like this in the `.json` format: +[source, js] +---- + { + "id": 1234, + "bedrooms": 3, + "price": 1500, + "address": { + "street": "Street 123", + "government_area": "Area", + "country_information": { + "country_code": "PT", + "country": "Portugal" + } + } +} +---- +// NOTCONSOLE + +The target data set should fulfill the following conditions: + +. Every apartment should have at least *3 bedrooms* +. The apartments should not be more expensive than *1500 per month* +. The apartment with id '1234' should get included without considering the first two conditions +. Each apartment should be located in either 'Portugal' or 'Spain' + +The first 3 conditions can be handled by basic sync rules, but we'll need to use advanced sync rules for number 4. + +[discrete#es-example-interplay-basic-rules] +====== Basic sync rules examples + +To create a new basic sync rule, navigate to the 'Sync Rules' tab and select *Draft new sync rules*: + +[.screenshot] +image::images/sync-rules-draft-new-rules.png[Draft new rules] + +Afterwards you need to press the 'Save and validate draft' button to validate these rules. +Note that when saved the rules will be in _draft_ state. They won't be executed in the next sync unless they are _applied_. + +[.screenshot] +image::images/sync-rules-save-and-validate-draft.png[Save and validate draft] + +After a successful validation you can apply your rules so they'll be executed in the next sync. + +These following conditions can be covered by basic sync rules: + +1. The apartment with id '1234' should get included without considering the first two conditions +2. Every apartment should have at least three bedrooms +3. The apartments should not be more expensive than 1000/month + +[.screenshot] +image::images/sync-rules-rules-fulfilling-properties.png[Save and validate draft] + +[NOTE] +==== +Remember that order matters for basic sync rules. +You may get different results for a different ordering. +==== + +[discrete#es-example-interplay-advanced-rules] +====== Advanced sync rules example + +You want to only include apartments which are located in Portugal or Spain. +We need to use advanced sync rules here, because we're dealing with deeply nested objects. + +Let's assume that the apartment data is stored inside a MongoDB instance. +For MongoDB we support https://www.mongodb.com/docs/manual/core/aggregation-pipeline/[aggregation pipelines^] in our advanced sync rules among other things. +An aggregation pipeline to only select properties located in Portugal or Spain looks like this: +[source, js] +---- + [ + { + "$match": { + "$or": [ + { + "address.country_information.country": "Portugal" + }, + { + "address.country_information.country": "Spain" + } + ] + } + } + ] +---- +// NOTCONSOLE + +To create these advanced sync rules navigate to the sync rules creation dialog and select the 'Advanced rules' tab. +You can now paste your aggregation pipeline into the input field under `aggregate.pipeline`: + +[.screenshot] +image::images/sync-rules-paste-aggregation-pipeline.png[Paste aggregation pipeline] + +Once validated, apply these rules. +The following screenshot shows the applied sync rules, which will be executed in the next sync: + +[.screenshot] +image::images/sync-rules-advanced-rules-appeared.png[Advanced sync rules appeared] + +After a successful sync you can expand the sync details to see which rules were applied: + +[.screenshot] +image::images/sync-rules-applied-rules-during-sync.png[Applied rules during sync] + +[WARNING] +==== +Active sync rules can become invalid when changed outside of the UI. +Sync jobs with invalid rules will fail. +One workaround is to revalidate the draft rules and override the invalid active rules. +==== diff --git a/docs/reference/ilm/apis/explain.asciidoc b/docs/reference/ilm/apis/explain.asciidoc index a1ddde8c9f2d9..31c6ae9e82ec7 100644 --- a/docs/reference/ilm/apis/explain.asciidoc +++ b/docs/reference/ilm/apis/explain.asciidoc @@ -303,6 +303,12 @@ the case. "index_uuid": "H7lF9n36Rzqa-KfKcnGQMg", "index": "test-000057" }, + "previous_step_info": { <5> + "type": "cluster_block_exception", + "reason": "index [test-000057/H7lF9n36Rzqa-KfKcnGQMg] blocked by: [FORBIDDEN/5/index read-only (api)", + "index_uuid": "H7lF9n36Rzqa-KfKcnGQMg", + "index": "test-000057" + }, "phase_execution": { "policy": "my_lifecycle3", "phase_definition": { @@ -329,3 +335,4 @@ is true, {ilm-init} will retry the failed step automatically. <3> Shows the number of attempted automatic retries to execute the failed step. <4> What went wrong +<5> Contains a copy of the `step_info` field (when it exists) of the last attempted or executed step for diagnostic purposes, since the `step_info` is overwritten during each new attempt. diff --git a/docs/reference/index.asciidoc b/docs/reference/index.asciidoc index 24dbee8c2983b..7e207146e38e3 100644 --- a/docs/reference/index.asciidoc +++ b/docs/reference/index.asciidoc @@ -40,6 +40,8 @@ include::aggregations.asciidoc[] include::geospatial-analysis.asciidoc[] +include::connector/docs/index.asciidoc[] + include::eql/eql.asciidoc[] include::esql/index.asciidoc[] diff --git a/docs/reference/ingest.asciidoc b/docs/reference/ingest.asciidoc index e2b4cf6fa10db..8b63f9acc5a21 100644 --- a/docs/reference/ingest.asciidoc +++ b/docs/reference/ingest.asciidoc @@ -450,7 +450,7 @@ configuration. See {fleet-guide}/install-standalone-elastic-agent.html[Install s [[pipelines-in-enterprise-search]] === Pipelines for search indices -When you create Elasticsearch indices for search use cases, for example, using the {enterprise-search-ref}/crawler.html[web crawler^] or {enterprise-search-ref}/connectors.html[connectors^], these indices are automatically set up with specific ingest pipelines. +When you create Elasticsearch indices for search use cases, for example, using the {enterprise-search-ref}/crawler.html[web crawler^] or <>, these indices are automatically set up with specific ingest pipelines. These processors help optimize your content for search. See <> for more information. diff --git a/docs/reference/ingest/processors/redact.asciidoc b/docs/reference/ingest/processors/redact.asciidoc index 6706106e92655..9b8ac1e15d1a8 100644 --- a/docs/reference/ingest/processors/redact.asciidoc +++ b/docs/reference/ingest/processors/redact.asciidoc @@ -39,6 +39,7 @@ patterns. Legacy Grok patterns are not supported. | `ignore_missing` | no | `true` | If `true` and `field` does not exist or is `null`, the processor quietly exits without modifying the document include::common-options.asciidoc[] | `skip_if_unlicensed` | no | `false` | If `true` and the current license does not support running redact processors, then the processor quietly exits without modifying the document +| `trace_redact` | no | `false` | If `true` then ingest metadata `_ingest._redact._is_redacted` is set to `true` if the document has been redacted |====== In this example the predefined `IP` Grok pattern is used to match diff --git a/docs/reference/ingest/search-ingest-pipelines.asciidoc b/docs/reference/ingest/search-ingest-pipelines.asciidoc index f37e07f632810..e414dacaab964 100644 --- a/docs/reference/ingest/search-ingest-pipelines.asciidoc +++ b/docs/reference/ingest/search-ingest-pipelines.asciidoc @@ -37,7 +37,8 @@ For example: It can be a lot of work to set up and manage production-ready pipelines from scratch. Considerations such as error handling, conditional execution, sequencing, versioning, and modularization must all be taken into account. -To this end, when you create indices for search use cases, (including {enterprise-search-ref}/crawler.html[Elastic web crawler], {enterprise-search-ref}/connectors.html[Elastic connector], and API indices), each index already has a pipeline set up with several processors that optimize your content for search. +To this end, when you create indices for search use cases, (including {enterprise-search-ref}/crawler.html[Elastic web crawler], <>. +, and API indices), each index already has a pipeline set up with several processors that optimize your content for search. This pipeline is called `ent-search-generic-ingestion`. While it is a "managed" pipeline (meaning it should not be tampered with), you can view its details via the Kibana UI or the Elasticsearch API. @@ -45,7 +46,8 @@ You can also <>. +, you can opt out (or back in) per index, and your choices are saved. For API indices, you can opt out (or back in) by including specific fields in your documents. <>. diff --git a/docs/reference/mapping/params/subobjects.asciidoc b/docs/reference/mapping/params/subobjects.asciidoc index 63e8e3c2db3fe..b0a5d3817c332 100644 --- a/docs/reference/mapping/params/subobjects.asciidoc +++ b/docs/reference/mapping/params/subobjects.asciidoc @@ -10,7 +10,7 @@ where for instance a field `metrics.time` holds a value too, which is common whe A document holding a value for both `metrics.time.max` and `metrics.time` gets rejected given that `time` would need to be a leaf field to hold a value as well as an object to hold the `max` sub-field. -The `subobjects: false` setting, which can be applied only to the top-level mapping definition and +The `subobjects` setting, which can be applied only to the top-level mapping definition and to <> fields, disables the ability for an object to hold further subobjects and makes it possible to store documents where field names contain dots and share common prefixes. From the example above, if the object container `metrics` has `subobjects` set to `false`, it can hold values for both `time` and `time.max` directly @@ -109,138 +109,26 @@ PUT my-index-000001/_doc/metric_1 <1> The entire mapping is configured to not support objects. <2> The document does not support objects -Setting `subobjects: false` disallows the definition of <> and <> sub-fields, which -can be too restrictive in cases where it's desirable to have <> objects or sub-objects with specific -behavior (e.g. with `enabled:false`). In this case, it's possible to set `subobjects: auto`, which -<> whenever possible and falls back to creating an object mapper otherwise (instead of -rejecting the mapping as `subobjects: false` does). For instance: - -[source,console] --------------------------------------------------- -PUT my-index-000002 -{ - "mappings": { - "properties": { - "metrics": { - "type": "object", - "subobjects": "auto", <1> - "properties": { - "inner": { - "type": "object", - "enabled": false - }, - "nested": { - "type": "nested" - } - } - } - } - } -} - -PUT my-index-000002/_doc/metric_1 -{ - "metrics.time" : 100, <2> - "metrics.time.min" : 10, - "metrics.time.max" : 900 -} - -PUT my-index-000002/_doc/metric_2 -{ - "metrics" : { <3> - "time" : 100, - "time.min" : 10, - "time.max" : 900, - "inner": { - "foo": "bar", - "path.to.some.field": "baz" - }, - "nested": [ - { "id": 10 }, - { "id": 1 } - ] - } -} - -GET my-index-000002/_mapping --------------------------------------------------- - -[source,console-result] --------------------------------------------------- -{ - "my-index-000002" : { - "mappings" : { - "properties" : { - "metrics" : { - "subobjects" : auto, - "properties" : { - "inner": { <4> - "type": "object", - "enabled": false - }, - "nested": { - "type": "nested", - "properties" : { - "id" : { - "type" : "long" - } - } - }, - "time" : { - "type" : "long" - }, - "time.min" : { - "type" : "long" - }, - "time.max" : { - "type" : "long" - } - } - } - } - } - } -} --------------------------------------------------- - -<1> The `metrics` field can only hold statically defined objects, namely `inner` and `nested`. -<2> Sample document holding flat paths -<3> Sample document holding an object (configured with sub-objects) and its leaf sub-fields -<4> The resulting mapping where dots in field names (`time.min`, `time_max`), as well as the -statically-defined sub-objects `inner` and `nested`, were preserved - The `subobjects` setting for existing fields and the top-level mapping definition cannot be updated. -[[auto-flattening]] ==== Auto-flattening object mappings -It is generally recommended to define the properties of an object that is configured with `subobjects: false` or -`subobjects: auto` with dotted field names (as shown in the first example). However, it is also possible to define -these properties as sub-objects in the mappings. In that case, the mapping will be automatically flattened before -it is stored. This makes it easier to re-use existing mappings without having to re-write them. - -Note that auto-flattening does not apply if any of the following <> are set -on object mappings that are defined under an object configured with `subobjects: false` or `subobjects: auto`: +It is generally recommended to define the properties of an object that is configured with `subobjects: false` with dotted field names +(as shown in the first example). +However, it is also possible to define these properties as sub-objects in the mappings. +In that case, the mapping will be automatically flattened before it is stored. +This makes it easier to re-use existing mappings without having to re-write them. -* The <> mapping parameter is `false`. -* The <> mapping parameter contradicts the implicit or explicit value of the parent. -For example, when `dynamic` is set to `false` in the root of the mapping, object mappers that set `dynamic` to `true` -can't be auto-flattened. -* The <> mapping parameter is set to `auto` or `true` explicitly. +Note that auto-flattening will not work when certain <> are set +on object mappings that are defined under an object configured with `subobjects: false`: -If such a sub-object is detected, the behavior depends on the `subobjects` value: - -* `subobjects: false` is not compatible, so a mapping error is returned during mapping construction. -* `subobjects: auto` reverts to adding the object to the mapping, bypassing auto-flattening for it. Still, any -intermediate objects will be auto-flattened if applicable (i.e. the object name gets directly attached under the parent -object with `subobjects: auto`). Auto-flattening can be applied within sub-objects, if they are configured with -`subobjects: auto` too. - -Auto-flattening example with `subobjects: false`: +* The <> mapping parameter must not be `false`. +* The <> mapping parameter must not contradict the implicit or explicit value of the parent. For example, when `dynamic` is set to `false` in the root of the mapping, object mappers that set `dynamic` to `true` can't be auto-flattened. +* The <> mapping parameter must not be set to `true` explicitly. [source,console] -------------------------------------------------- -PUT my-index-000003 +PUT my-index-000002 { "mappings": { "properties": { @@ -259,13 +147,13 @@ PUT my-index-000003 } } } -GET my-index-000003/_mapping +GET my-index-000002/_mapping -------------------------------------------------- [source,console-result] -------------------------------------------------- { - "my-index-000003" : { + "my-index-000002" : { "mappings" : { "properties" : { "metrics" : { @@ -287,85 +175,5 @@ GET my-index-000003/_mapping <1> The metrics object can contain further object mappings that will be auto-flattened. Object mappings at this level must not set certain mapping parameters as explained above. -<2> This field will be auto-flattened to `time.min` before the mapping is stored. -<3> The auto-flattened `time.min` field can be inspected by looking at the index mapping. - -Auto-flattening example with `subobjects: auto`: - -[source,console] --------------------------------------------------- -PUT my-index-000004 -{ - "mappings": { - "properties": { - "metrics": { - "subobjects": "auto", - "properties": { - "time": { - "type": "object", <1> - "properties": { - "min": { "type": "long" } <2> - } - }, - "to": { - "type": "object", - "properties": { - "inner_metrics": { <3> - "type": "object", - "subobjects": "auto", - "properties": { - "time": { - "type": "object", - "properties": { - "max": { "type": "long" } <4> - } - } - } - } - } - } - } - } - } - } -} -GET my-index-000004/_mapping --------------------------------------------------- - -[source,console-result] --------------------------------------------------- -{ - "my-index-000004" : { - "mappings" : { - "properties" : { - "metrics" : { - "subobjects" : "auto", - "properties" : { - "time.min" : { <5> - "type" : "long" - }, - "to.inner_metrics" : { <6> - "subobjects" : "auto", - "properties" : { - "time.max" : { <7> - "type" : "long" - } - } - } - } - } - } - } - } -} --------------------------------------------------- - -<1> The metrics object can contain further object mappings that may be auto-flattened, depending on their mapping -parameters as explained above. -<2> This field will be auto-flattened to `time.min` before the mapping is stored. -<3> This object has param `subobjects: auto` so it can't be auto-flattened. Its parent does qualify for auto-flattening, -so it becomes `to.inner_metrics` before the mapping is stored. -<4> This field will be auto-flattened to `time.max` before the mapping is stored. -<5> The auto-flattened `time.min` field can be inspected by looking at the index mapping. -<6> The inner object `to.inner_metrics` can be inspected by looking at the index mapping. -<7> The auto-flattened `time.max` field can be inspected by looking at the index mapping. +<2> This field will be auto-flattened to `"time.min"` before the mapping is stored. +<3> The auto-flattened `"time.min"` field can be inspected by looking at the index mapping. diff --git a/docs/reference/query-dsl/semantic-query.asciidoc b/docs/reference/query-dsl/semantic-query.asciidoc index 22b5e6c5e6aad..f3f6aca3fd07a 100644 --- a/docs/reference/query-dsl/semantic-query.asciidoc +++ b/docs/reference/query-dsl/semantic-query.asciidoc @@ -25,7 +25,7 @@ GET my-index-000001/_search } } ------------------------------------------------------------ -// TEST[skip:TBD] +// TEST[skip: Requires inference endpoints] [discrete] @@ -40,9 +40,209 @@ The `semantic_text` field to perform the query on. (Required, string) The query text to be searched for on the field. +`inner_hits`:: +(Optional, object) +Retrieves the specific passages that match the query. +See <> for more information. ++ +.Properties of `inner_hits` +[%collapsible%open] +==== +`from`:: +(Optional, integer) +The offset from the first matching passage to fetch. +Used to paginate through the passages. +Defaults to `0`. + +`size`:: +(Optional, integer) +The maximum number of matching passages to return. +Defaults to `3`. +==== Refer to <> to learn more about semantic search using `semantic_text` and `semantic` query. +[discrete] +[[semantic-query-passage-ranking]] +==== Passage ranking with the `semantic` query +The `inner_hits` parameter can be used for _passage ranking_, which allows you to determine which passages in the document best match the query. +For example, if you have a document that covers varying topics: + +[source,console] +------------------------------------------------------------ +POST my-index/_doc/lake_tahoe +{ + "inference_field": [ + "Lake Tahoe is the largest alpine lake in North America", + "When hiking in the area, please be on alert for bears" + ] +} +------------------------------------------------------------ +// TEST[skip: Requires inference endpoints] + +You can use passage ranking to find the passage that best matches your query: + +[source,console] +------------------------------------------------------------ +GET my-index/_search +{ + "query": { + "semantic": { + "field": "inference_field", + "query": "mountain lake", + "inner_hits": { } + } + } +} +------------------------------------------------------------ +// TEST[skip: Requires inference endpoints] + +[source,console-result] +------------------------------------------------------------ +{ + "took": 67, + "timed_out": false, + "_shards": { + "total": 1, + "successful": 1, + "skipped": 0, + "failed": 0 + }, + "hits": { + "total": { + "value": 1, + "relation": "eq" + }, + "max_score": 10.844536, + "hits": [ + { + "_index": "my-index", + "_id": "lake_tahoe", + "_score": 10.844536, + "_source": { + ... + }, + "inner_hits": { <1> + "inference_field": { + "hits": { + "total": { + "value": 2, + "relation": "eq" + }, + "max_score": 10.844536, + "hits": [ + { + "_index": "my-index", + "_id": "lake_tahoe", + "_nested": { + "field": "inference_field.inference.chunks", + "offset": 0 + }, + "_score": 10.844536, + "_source": { + "text": "Lake Tahoe is the largest alpine lake in North America" + } + }, + { + "_index": "my-index", + "_id": "lake_tahoe", + "_nested": { + "field": "inference_field.inference.chunks", + "offset": 1 + }, + "_score": 3.2726858, + "_source": { + "text": "When hiking in the area, please be on alert for bears" + } + } + ] + } + } + } + } + ] + } +} +------------------------------------------------------------ +<1> Ranked passages will be returned using the <>, with `` set to the `semantic_text` field name. + +By default, the top three matching passages will be returned. +You can use the `size` parameter to control the number of passages returned and the `from` parameter to page through the matching passages: + +[source,console] +------------------------------------------------------------ +GET my-index/_search +{ + "query": { + "semantic": { + "field": "inference_field", + "query": "mountain lake", + "inner_hits": { + "from": 1, + "size": 1 + } + } + } +} +------------------------------------------------------------ +// TEST[skip: Requires inference endpoints] + +[source,console-result] +------------------------------------------------------------ +{ + "took": 42, + "timed_out": false, + "_shards": { + "total": 1, + "successful": 1, + "skipped": 0, + "failed": 0 + }, + "hits": { + "total": { + "value": 1, + "relation": "eq" + }, + "max_score": 10.844536, + "hits": [ + { + "_index": "my-index", + "_id": "lake_tahoe", + "_score": 10.844536, + "_source": { + ... + }, + "inner_hits": { + "inference_field": { + "hits": { + "total": { + "value": 2, + "relation": "eq" + }, + "max_score": 10.844536, + "hits": [ + { + "_index": "my-index", + "_id": "lake_tahoe", + "_nested": { + "field": "inference_field.inference.chunks", + "offset": 1 + }, + "_score": 3.2726858, + "_source": { + "text": "When hiking in the area, please be on alert for bears" + } + } + ] + } + } + } + } + ] + } +} +------------------------------------------------------------ + [discrete] [[hybrid-search-semantic]] ==== Hybrid search with the `semantic` query @@ -79,7 +279,7 @@ POST my-index/_search } } ------------------------------------------------------------ -// TEST[skip:TBD] +// TEST[skip: Requires inference endpoints] You can also use semantic_text as part of <> to make ranking relevant results easier: @@ -116,12 +316,12 @@ GET my-index/_search } } ------------------------------------------------------------ -// TEST[skip:TBD] +// TEST[skip: Requires inference endpoints] [discrete] [[advanced-search]] -=== Advanced search on `semantic_text` fields +==== Advanced search on `semantic_text` fields The `semantic` query uses default settings for searching on `semantic_text` fields for ease of use. If you want to fine-tune a search on a `semantic_text` field, you need to know the task type used by the `inference_id` configured in `semantic_text`. @@ -135,7 +335,7 @@ on a `semantic_text` field, it is not supported to use the `semantic_query` on a [discrete] [[search-sparse-inference]] -==== Search with `sparse_embedding` inference +===== Search with `sparse_embedding` inference When the {infer} endpoint uses a `sparse_embedding` model, you can use a <> on a <> field in the following way: @@ -157,14 +357,14 @@ GET test-index/_search } } ------------------------------------------------------------ -// TEST[skip:TBD] +// TEST[skip: Requires inference endpoints] You can customize the `sparse_vector` query to include specific settings, like <>. [discrete] [[search-text-inferece]] -==== Search with `text_embedding` inference +===== Search with `text_embedding` inference When the {infer} endpoint uses a `text_embedding` model, you can use a <> on a `semantic_text` field in the following way: @@ -190,6 +390,6 @@ GET test-index/_search } } ------------------------------------------------------------ -// TEST[skip:TBD] +// TEST[skip: Requires inference endpoints] You can customize the `knn` query to include specific settings, like `num_candidates` and `k`. diff --git a/docs/reference/snapshot-restore/repository-s3.asciidoc b/docs/reference/snapshot-restore/repository-s3.asciidoc index 3a9c12caebad9..a75a1a3ce1042 100644 --- a/docs/reference/snapshot-restore/repository-s3.asciidoc +++ b/docs/reference/snapshot-restore/repository-s3.asciidoc @@ -378,7 +378,7 @@ If you use a Glacier storage class, or another unsupported storage class, or object expiry, then you may permanently lose access to your repository contents. -You may use the `intellligent_tiering` storage class to automatically manage +You may use the `intelligent_tiering` storage class to automatically manage the class of objects, but you must not enable the optional Archive Access or Deep Archive Access tiers. If you use these tiers then you may permanently lose access to your repository contents. diff --git a/docs/reference/transform/painless-examples.asciidoc b/docs/reference/transform/painless-examples.asciidoc index 8eb50964f4d5b..4b0802c79a340 100644 --- a/docs/reference/transform/painless-examples.asciidoc +++ b/docs/reference/transform/painless-examples.asciidoc @@ -5,6 +5,9 @@ Painless examples ++++ + +IMPORTANT: The examples that use the `scripted_metric` aggregation are not supported on {es} Serverless. + These examples demonstrate how to use Painless in {transforms}. You can learn more about the Painless scripting language in the {painless}/painless-guide.html[Painless guide]. @@ -37,6 +40,8 @@ with the latest timestamp. From a technical perspective, it helps to achieve the function of a <> by using scripted metric aggregation in a {transform}, which provides a metric output. +IMPORTANT: This example uses a `scripted_metric` aggregation which is not supported on {es} Serverless. + [source,js] -------------------------------------------------- "aggregations": { @@ -453,6 +458,8 @@ example for details. The example below assumes that the HTTP response codes are stored as keywords in the `response` field of the documents. +IMPORTANT: This example uses a `scripted_metric` aggregation which is not supported on {es} Serverless. + [source,js] -------------------------------------------------- "aggregations": { <1> @@ -507,7 +514,9 @@ Finally, returns the `counts` array with the response counts. == Comparing indices by using scripted metric aggregations This example shows how to compare the content of two indices by a {transform} -that uses a scripted metric aggregation. +that uses a scripted metric aggregation. + +IMPORTANT: This example uses a `scripted_metric` aggregation which is not supported on {es} Serverless. [source,console] -------------------------------------------------- @@ -623,6 +632,8 @@ By using the `sessionid` as a group-by field, you are able to enumerate events through the session and get more details of the session by using scripted metric aggregation. +IMPORTANT: This example uses a `scripted_metric` aggregation which is not supported on {es} Serverless. + [source,js] -------------------------------------------------- POST _transform/_preview diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties index e955ee28dd349..6acc1431eaec1 100644 --- a/gradle/wrapper/gradle-wrapper.properties +++ b/gradle/wrapper/gradle-wrapper.properties @@ -1,7 +1,7 @@ distributionBase=GRADLE_USER_HOME distributionPath=wrapper/dists -distributionSha256Sum=fdfca5dbc2834f0ece5020465737538e5ba679deeff5ab6c09621d67f8bb1a15 -distributionUrl=https\://services.gradle.org/distributions/gradle-8.10.1-all.zip +distributionSha256Sum=2ab88d6de2c23e6adae7363ae6e29cbdd2a709e992929b48b6530fd0c7133bd6 +distributionUrl=https\://services.gradle.org/distributions/gradle-8.10.2-all.zip networkTimeout=10000 validateDistributionUrl=true zipStoreBase=GRADLE_USER_HOME diff --git a/libs/core/src/main/java/org/elasticsearch/core/RestApiVersion.java b/libs/core/src/main/java/org/elasticsearch/core/RestApiVersion.java index 9ff31a191ce37..387d05db84441 100644 --- a/libs/core/src/main/java/org/elasticsearch/core/RestApiVersion.java +++ b/libs/core/src/main/java/org/elasticsearch/core/RestApiVersion.java @@ -22,14 +22,13 @@ public enum RestApiVersion { V_8(8), + @UpdateForV9 // remove all references to V_7 then delete this annotation V_7(7); public final byte major; - @UpdateForV9 - // We need to bump current and previous to V_9 and V_8, respectively - private static final RestApiVersion CURRENT = V_8; - private static final RestApiVersion PREVIOUS = V_7; + private static final RestApiVersion CURRENT = V_9; + private static final RestApiVersion PREVIOUS = V_8; RestApiVersion(int major) { this.major = (byte) major; @@ -67,8 +66,6 @@ public static Predicate onOrAfter(RestApiVersion restApiVersion) }; } - @UpdateForV9 - // Right now we return api version 8 for major version 9 until we bump the api version above public static RestApiVersion forMajor(int major) { switch (major) { case 7 -> { @@ -78,7 +75,7 @@ public static RestApiVersion forMajor(int major) { return V_8; } case 9 -> { - return V_8; + return V_9; } default -> throw new IllegalArgumentException("Unknown REST API version " + major); } diff --git a/libs/h3/src/main/java/org/elasticsearch/h3/BaseCells.java b/libs/h3/src/main/java/org/elasticsearch/h3/BaseCells.java index b15c86c17ab83..24b60686ff224 100644 --- a/libs/h3/src/main/java/org/elasticsearch/h3/BaseCells.java +++ b/libs/h3/src/main/java/org/elasticsearch/h3/BaseCells.java @@ -27,27 +27,14 @@ */ final class BaseCells { - private static class BaseCellData { - // "home" face and normalized ijk coordinates on that face - final int homeFace; - final int homeI; - final int homeJ; - final int homeK; - // is this base cell a pentagon? - final boolean isPentagon; - // if a pentagon, what are its two clockwise offset - final int[] cwOffsetPent; - - /// faces? - BaseCellData(int homeFace, int homeI, int homeJ, int homeK, boolean isPentagon, int[] cwOffsetPent) { - this.homeFace = homeFace; - this.homeI = homeI; - this.homeJ = homeJ; - this.homeK = homeK; - this.isPentagon = isPentagon; - this.cwOffsetPent = cwOffsetPent; - } - } + private record BaseCellData( + int homeFace, // "home" face and normalized ijk coordinates on that face + int homeI, + int homeJ, + int homeK, + boolean isPentagon, // is this base cell a pentagon? + int[] cwOffsetPent // if a pentagon, what are its two clockwise offset + ) {} /** * Resolution 0 base cell data table. @@ -185,16 +172,10 @@ private static class BaseCellData { /** * base cell at a given ijk and required rotations into its system */ - private static class BaseCellRotation { - final int baseCell; // base cell number - final int ccwRot60; // number of ccw 60 degree rotations relative to current - /// face - - BaseCellRotation(int baseCell, int ccwRot60) { - this.baseCell = baseCell; - this.ccwRot60 = ccwRot60; - } - } + record BaseCellRotation( + int baseCell, // base cell number + int ccwRot60 // number of ccw 60 degree rotations relative to current + ) {} /** @brief Resolution 0 base cell lookup table for each face. * diff --git a/libs/h3/src/main/java/org/elasticsearch/h3/CoordIJK.java b/libs/h3/src/main/java/org/elasticsearch/h3/CoordIJK.java index 8aae7583ef04e..bfb5f662dee8f 100644 --- a/libs/h3/src/main/java/org/elasticsearch/h3/CoordIJK.java +++ b/libs/h3/src/main/java/org/elasticsearch/h3/CoordIJK.java @@ -109,8 +109,8 @@ void reset(int i, int j, int k) { * Find the center point in 2D cartesian coordinates of a hex. */ public Vec2d ijkToHex2d() { - final int i = Math.subtractExact(this.i, this.k); - final int j = Math.subtractExact(this.j, this.k); + final int i = this.i - this.k; + final int j = this.j - this.k; return new Vec2d(i - 0.5 * j, j * Constants.M_SQRT3_2); } @@ -118,8 +118,8 @@ public Vec2d ijkToHex2d() { * Find the center point in spherical coordinates of a hex on a particular icosahedral face. */ public LatLng ijkToGeo(int face, int res, boolean substrate) { - final int i = Math.subtractExact(this.i, this.k); - final int j = Math.subtractExact(this.j, this.k); + final int i = this.i - this.k; + final int j = this.j - this.k; return Vec2d.hex2dToGeo(i - 0.5 * j, j * Constants.M_SQRT3_2, face, res, substrate); } @@ -132,9 +132,9 @@ public LatLng ijkToGeo(int face, int res, boolean substrate) { */ public void ijkAdd(int i, int j, int k) { - this.i = Math.addExact(this.i, i); - this.j = Math.addExact(this.j, j); - this.k = Math.addExact(this.k, k); + this.i += i; + this.j += j; + this.k += k; } /** @@ -145,9 +145,9 @@ public void ijkAdd(int i, int j, int k) { * @param k the k coordinate */ public void ijkSub(int i, int j, int k) { - this.i = Math.subtractExact(this.i, i); - this.j = Math.subtractExact(this.j, j); - this.k = Math.subtractExact(this.k, k); + this.i -= i; + this.j -= j; + this.k -= k; } /** @@ -168,9 +168,9 @@ public void downAp7() { // iVec (3, 0, 1) // jVec (1, 3, 0) // kVec (0, 1, 3) - final int i = Math.addExact(Math.multiplyExact(this.i, 3), this.j); - final int j = Math.addExact(Math.multiplyExact(this.j, 3), this.k); - final int k = Math.addExact(Math.multiplyExact(this.k, 3), this.i); + final int i = this.i * 3 + this.j; + final int j = this.j * 3 + this.k; + final int k = this.k * 3 + this.i; this.i = i; this.j = j; this.k = k; @@ -185,9 +185,9 @@ public void downAp7r() { // iVec (3, 1, 0) // jVec (0, 3, 1) // kVec (1, 0, 3) - final int i = Math.addExact(Math.multiplyExact(this.i, 3), this.k); - final int j = Math.addExact(Math.multiplyExact(this.j, 3), this.i); - final int k = Math.addExact(Math.multiplyExact(this.k, 3), this.j); + final int i = this.i * 3 + this.k; + final int j = this.j * 3 + this.i; + final int k = this.k * 3 + this.j; this.i = i; this.j = j; this.k = k; @@ -203,9 +203,9 @@ public void downAp3() { // iVec (2, 0, 1) // jVec (1, 2, 0) // kVec (0, 1, 2) - final int i = Math.addExact(Math.multiplyExact(this.i, 2), this.j); - final int j = Math.addExact(Math.multiplyExact(this.j, 2), this.k); - final int k = Math.addExact(Math.multiplyExact(this.k, 2), this.i); + final int i = this.i * 2 + this.j; + final int j = this.j * 2 + this.k; + final int k = this.k * 2 + this.i; this.i = i; this.j = j; this.k = k; @@ -221,9 +221,9 @@ public void downAp3r() { // iVec (2, 1, 0) // jVec (0, 2, 1) // kVec (1, 0, 2) - final int i = Math.addExact(Math.multiplyExact(this.i, 2), this.k); - final int j = Math.addExact(Math.multiplyExact(this.j, 2), this.i); - final int k = Math.addExact(Math.multiplyExact(this.k, 2), this.j); + final int i = this.i * 2 + this.k; + final int j = this.j * 2 + this.i; + final int k = this.k * 2 + this.j; this.i = i; this.j = j; this.k = k; @@ -239,9 +239,9 @@ public void ijkRotate60cw() { // iVec (1, 0, 1) // jVec (1, 1, 0) // kVec (0, 1, 1) - final int i = Math.addExact(this.i, this.j); - final int j = Math.addExact(this.j, this.k); - final int k = Math.addExact(this.i, this.k); + final int i = this.i + this.j; + final int j = this.j + this.k; + final int k = this.i + this.k; this.i = i; this.j = j; this.k = k; @@ -256,9 +256,9 @@ public void ijkRotate60ccw() { // iVec (1, 1, 0) // jVec (0, 1, 1) // kVec (1, 0, 1) - final int i = Math.addExact(this.i, this.k); - final int j = Math.addExact(this.i, this.j); - final int k = Math.addExact(this.j, this.k); + final int i = this.i + this.k; + final int j = this.i + this.j; + final int k = this.j + this.k; this.i = i; this.j = j; this.k = k; @@ -282,10 +282,10 @@ public void neighbor(int digit) { * clockwise aperture 7 grid. */ public void upAp7r() { - final int i = Math.subtractExact(this.i, this.k); - final int j = Math.subtractExact(this.j, this.k); - this.i = (int) Math.round((Math.addExact(Math.multiplyExact(2, i), j)) * M_ONESEVENTH); - this.j = (int) Math.round((Math.subtractExact(Math.multiplyExact(3, j), i)) * M_ONESEVENTH); + final int i = this.i - this.k; + final int j = this.j - this.k; + this.i = (int) Math.round((2 * i + j) * M_ONESEVENTH); + this.j = (int) Math.round((3 * j - i) * M_ONESEVENTH); this.k = 0; ijkNormalize(); } @@ -296,10 +296,10 @@ public void upAp7r() { * */ public void upAp7() { - final int i = Math.subtractExact(this.i, this.k); - final int j = Math.subtractExact(this.j, this.k); - this.i = (int) Math.round((Math.subtractExact(Math.multiplyExact(3, i), j)) * M_ONESEVENTH); - this.j = (int) Math.round((Math.addExact(Math.multiplyExact(2, j), i)) * M_ONESEVENTH); + final int i = this.i - this.k; + final int j = this.j - this.k; + this.i = (int) Math.round((3 * i - j) * M_ONESEVENTH); + this.j = (int) Math.round((2 * j + i) * M_ONESEVENTH); this.k = 0; ijkNormalize(); } @@ -363,5 +363,4 @@ public static int rotate60ccw(int digit) { default -> digit; }; } - } diff --git a/libs/h3/src/main/java/org/elasticsearch/h3/FaceIJK.java b/libs/h3/src/main/java/org/elasticsearch/h3/FaceIJK.java index df2ab26ca0686..ae59ff359d1f8 100644 --- a/libs/h3/src/main/java/org/elasticsearch/h3/FaceIJK.java +++ b/libs/h3/src/main/java/org/elasticsearch/h3/FaceIJK.java @@ -149,25 +149,13 @@ enum Overage { /** * Information to transform into an adjacent face IJK system */ - private static class FaceOrientIJK { - // face number - final int face; - // res 0 translation relative to primary face - final int translateI; - final int translateJ; - final int translateK; - // number of 60 degree ccw rotations relative to primary - final int ccwRot60; - - // face - FaceOrientIJK(int face, int translateI, int translateJ, int translateK, int ccwRot60) { - this.face = face; - this.translateI = translateI; - this.translateJ = translateJ; - this.translateK = translateK; - this.ccwRot60 = ccwRot60; - } - } + private record FaceOrientIJK( + int face, // face number + int translateI, // res 0 translation relative to primary face + int translateJ, + int translateK, + int ccwRot60// number of 60 degree ccw rotations relative to primary + ) {} /** * Definition of which faces neighbor each other. @@ -486,11 +474,7 @@ public CellBoundary faceIjkPentToCellBoundary(int res, int start, int length) { } final int unitScale = unitScaleByCIIres[adjRes] * 3; - lastCoord.ijkAdd( - Math.multiplyExact(fijkOrient.translateI, unitScale), - Math.multiplyExact(fijkOrient.translateJ, unitScale), - Math.multiplyExact(fijkOrient.translateK, unitScale) - ); + lastCoord.ijkAdd(fijkOrient.translateI * unitScale, fijkOrient.translateJ * unitScale, fijkOrient.translateK * unitScale); lastCoord.ijkNormalize(); final Vec2d orig2d1 = lastCoord.ijkToHex2d(); @@ -596,18 +580,10 @@ public CellBoundary faceIjkToCellBoundary(final int res, final int start, final // to each vertex to translate the vertices to that cell. final int[] vertexLast = verts[lastV]; final int[] vertexV = verts[v]; - scratch2.reset( - Math.addExact(vertexLast[0], this.coord.i), - Math.addExact(vertexLast[1], this.coord.j), - Math.addExact(vertexLast[2], this.coord.k) - ); + scratch2.reset(vertexLast[0] + this.coord.i, vertexLast[1] + this.coord.j, vertexLast[2] + this.coord.k); scratch2.ijkNormalize(); final Vec2d orig2d0 = scratch2.ijkToHex2d(); - scratch2.reset( - Math.addExact(vertexV[0], this.coord.i), - Math.addExact(vertexV[1], this.coord.j), - Math.addExact(vertexV[2], this.coord.k) - ); + scratch2.reset(vertexV[0] + this.coord.i, vertexV[1] + this.coord.j, vertexV[2] + this.coord.k); scratch2.ijkNormalize(); final Vec2d orig2d1 = scratch2.ijkToHex2d(); @@ -704,7 +680,7 @@ static long faceIjkToH3(int res, int face, CoordIJK coord) { scratch.reset(coord.i, coord.j, coord.k); scratch.downAp7r(); } - scratch.reset(Math.subtractExact(lastI, scratch.i), Math.subtractExact(lastJ, scratch.j), Math.subtractExact(lastK, scratch.k)); + scratch.reset(lastI - scratch.i, lastJ - scratch.j, lastK - scratch.k); scratch.ijkNormalize(); h = H3Index.H3_set_index_digit(h, r, scratch.unitIjkToDigit()); } diff --git a/libs/h3/src/main/java/org/elasticsearch/h3/H3Index.java b/libs/h3/src/main/java/org/elasticsearch/h3/H3Index.java index 6d7af86a9a537..7babedc55eb0e 100644 --- a/libs/h3/src/main/java/org/elasticsearch/h3/H3Index.java +++ b/libs/h3/src/main/java/org/elasticsearch/h3/H3Index.java @@ -325,7 +325,9 @@ public static long h3RotatePent60ccw(long h) { foundFirstNonZeroDigit = true; // adjust for deleted k-axes sequence - if (h3LeadingNonZeroDigit(h) == CoordIJK.Direction.K_AXES_DIGIT.digit()) h = h3Rotate60ccw(h); + if (h3LeadingNonZeroDigit(h) == CoordIJK.Direction.K_AXES_DIGIT.digit()) { + h = h3Rotate60ccw(h); + } } } return h; diff --git a/libs/h3/src/main/java/org/elasticsearch/h3/HexRing.java b/libs/h3/src/main/java/org/elasticsearch/h3/HexRing.java index d7011aa4d48ce..936f636e6a5ce 100644 --- a/libs/h3/src/main/java/org/elasticsearch/h3/HexRing.java +++ b/libs/h3/src/main/java/org/elasticsearch/h3/HexRing.java @@ -290,11 +290,6 @@ final class HexRing { { 0, 0, 1, 0, 1, 5, 1 }, // base cell 121 }; - private static final int E_SUCCESS = 0; // Success (no error) - private static final int E_PENTAGON = 9; // Pentagon distortion was encountered which the algorithm - private static final int E_CELL_INVALID = 5; // `H3Index` cell argument was not valid - private static final int E_FAILED = 1; // The operation failed but a more specific error is not available - /** * Directions used for traversing a hexagonal ring counterclockwise around * {1, 0, 0} diff --git a/libs/h3/src/main/java/org/elasticsearch/h3/Vec2d.java b/libs/h3/src/main/java/org/elasticsearch/h3/Vec2d.java index b0c2627a5f398..3b6f26aa6357a 100644 --- a/libs/h3/src/main/java/org/elasticsearch/h3/Vec2d.java +++ b/libs/h3/src/main/java/org/elasticsearch/h3/Vec2d.java @@ -27,7 +27,10 @@ /** * 2D floating-point vector */ -final class Vec2d { +record Vec2d( + double x, // x component + double y // y component +) { /** 1/sin(60') **/ private static final double M_RSIN60 = 1.0 / Constants.M_SQRT3_2; @@ -90,14 +93,6 @@ final class Vec2d { { 2.361378999196363184, 0.266983896803167583, 4.455774101589558636 }, // face 19 }; - private final double x; /// < x component - private final double y; /// < y component - - Vec2d(double x, double y) { - this.x = x; - this.y = y; - } - /** * Determines the center point in spherical coordinates of a cell given by this 2D * hex coordinates on a particular icosahedral face. @@ -141,7 +136,7 @@ static LatLng hex2dToGeo(double x, double y, int face, int res, boolean substrat // scale accordingly if this is a substrate grid if (substrate) { - r /= 3.0; + r *= M_ONETHIRD; if (H3Index.isResolutionClassIII(res)) { r *= Constants.M_RSQRT7; } @@ -202,17 +197,17 @@ static CoordIJK hex2dToCoordIJK(double x, double y) { j = m2; } else { i = m1; - j = Math.incrementExact(m2); + j = m2 + 1; } } else { if (r2 < (1.0 - r1)) { j = m2; } else { - j = Math.incrementExact(m2); + j = m2 + 1; } if ((1.0 - r1) <= r2 && r2 < (2.0 * r1)) { - i = Math.incrementExact(m1); + i = m1 + 1; } else { i = m1; } @@ -222,21 +217,21 @@ static CoordIJK hex2dToCoordIJK(double x, double y) { if (r2 < (1.0 - r1)) { j = m2; } else { - j = Math.addExact(m2, 1); + j = m2 + 1; } if ((2.0 * r1 - 1.0) < r2 && r2 < (1.0 - r1)) { i = m1; } else { - i = Math.incrementExact(m1); + i = m1 + 1; } } else { if (r2 < (r1 * 0.5)) { - i = Math.incrementExact(m1); + i = m1 + 1; j = m2; } else { - i = Math.incrementExact(m1); - j = Math.incrementExact(m2); + i = m1 + 1; + j = m2 + 1; } } } @@ -247,18 +242,19 @@ static CoordIJK hex2dToCoordIJK(double x, double y) { if ((j % 2) == 0) // even { final int axisi = j / 2; - final int diff = Math.subtractExact(i, axisi); - i = Math.subtractExact(i, Math.multiplyExact(2, diff)); + final int diff = i - axisi; + i = i - (2 * diff); } else { - final int axisi = Math.addExact(j, 1) / 2; - final int diff = Math.subtractExact(i, axisi); - i = Math.subtractExact(i, Math.addExact(Math.multiplyExact(2, diff), 1)); + final int axisi = (j + 1) / 2; + final int diff = i - axisi; + i = i - ((2 * diff) + 1); } } if (y < 0.0) { - i = Math.subtractExact(i, Math.addExact(Math.multiplyExact(2, j), 1) / 2); - j = Math.multiplyExact(-1, j); + + i = i - ((2 * j + 1) / 2); + j *= -1; } final CoordIJK coordIJK = new CoordIJK(i, j, k); coordIJK.ijkNormalize(); diff --git a/libs/h3/src/main/java/org/elasticsearch/h3/Vec3d.java b/libs/h3/src/main/java/org/elasticsearch/h3/Vec3d.java index 5973af4b51f6f..05f504d8e031d 100644 --- a/libs/h3/src/main/java/org/elasticsearch/h3/Vec3d.java +++ b/libs/h3/src/main/java/org/elasticsearch/h3/Vec3d.java @@ -26,7 +26,7 @@ /** * 3D floating-point vector */ -final class Vec3d { +record Vec3d(double x, double y, double z) { /** icosahedron face centers in x/y/z on the unit sphere */ public static final Vec3d[] faceCenterPoint = new Vec3d[] { @@ -52,14 +52,6 @@ final class Vec3d { new Vec3d(-0.1092625278784796, 0.4811951572873210, -0.8697775121287253) // face 19 }; - private final double x, y, z; - - private Vec3d(double x, double y, double z) { - this.x = x; - this.y = y; - this.z = z; - } - /** * Calculate the square of the distance between two 3D coordinates. * @@ -238,5 +230,4 @@ private static double dotProduct(double x1, double y1, double z1, double x2, dou private static double magnitude(double x, double y, double z) { return Math.sqrt(square(x) + square(y) + square(z)); } - } diff --git a/libs/x-content/src/main/java/org/elasticsearch/xcontent/MediaTypeRegistry.java b/libs/x-content/src/main/java/org/elasticsearch/xcontent/MediaTypeRegistry.java index 88724062bb452..ae2c80a136437 100644 --- a/libs/x-content/src/main/java/org/elasticsearch/xcontent/MediaTypeRegistry.java +++ b/libs/x-content/src/main/java/org/elasticsearch/xcontent/MediaTypeRegistry.java @@ -27,7 +27,7 @@ * A MediaType can have only one query parameter representation. * For example "json" (case insensitive) maps back to a JSON media type. * - * Additionally, a http header may optionally have parameters. For example "application/vnd.elasticsearch+json; compatible-with=7". + * Additionally, a http header may optionally have parameters. For example "application/vnd.elasticsearch+json; compatible-with=8". * This class also allows to define a regular expression for valid values of charset. */ public class MediaTypeRegistry { diff --git a/libs/x-content/src/test/java/org/elasticsearch/xcontent/ParsedMediaTypeTests.java b/libs/x-content/src/test/java/org/elasticsearch/xcontent/ParsedMediaTypeTests.java index 9fafd7c7e7150..8cecd3d25201f 100644 --- a/libs/x-content/src/test/java/org/elasticsearch/xcontent/ParsedMediaTypeTests.java +++ b/libs/x-content/src/test/java/org/elasticsearch/xcontent/ParsedMediaTypeTests.java @@ -30,19 +30,19 @@ public void testCanonicalParsing() { assertThat(ParsedMediaType.parseMediaType("application/cbor").toMediaType(mediaTypeRegistry), equalTo(XContentType.CBOR)); assertThat( - ParsedMediaType.parseMediaType("application/vnd.elasticsearch+json;compatible-with=7").toMediaType(mediaTypeRegistry), + ParsedMediaType.parseMediaType("application/vnd.elasticsearch+json;compatible-with=8").toMediaType(mediaTypeRegistry), equalTo(XContentType.VND_JSON) ); assertThat( - ParsedMediaType.parseMediaType("application/vnd.elasticsearch+yaml;compatible-with=7").toMediaType(mediaTypeRegistry), + ParsedMediaType.parseMediaType("application/vnd.elasticsearch+yaml;compatible-with=8").toMediaType(mediaTypeRegistry), equalTo(XContentType.VND_YAML) ); assertThat( - ParsedMediaType.parseMediaType("application/vnd.elasticsearch+smile;compatible-with=7").toMediaType(mediaTypeRegistry), + ParsedMediaType.parseMediaType("application/vnd.elasticsearch+smile;compatible-with=8").toMediaType(mediaTypeRegistry), equalTo(XContentType.VND_SMILE) ); assertThat( - ParsedMediaType.parseMediaType("application/vnd.elasticsearch+cbor;compatible-with=7").toMediaType(mediaTypeRegistry), + ParsedMediaType.parseMediaType("application/vnd.elasticsearch+cbor;compatible-with=8").toMediaType(mediaTypeRegistry), equalTo(XContentType.VND_CBOR) ); } @@ -179,19 +179,19 @@ public void testParseMediaTypeFromXContentType() { ); assertThat( - ParsedMediaType.parseMediaType(XContentType.VND_JSON, Map.of("compatible-with", "7")).toMediaType(mediaTypeRegistry), + ParsedMediaType.parseMediaType(XContentType.VND_JSON, Map.of("compatible-with", "8")).toMediaType(mediaTypeRegistry), equalTo(XContentType.VND_JSON) ); assertThat( - ParsedMediaType.parseMediaType(XContentType.VND_YAML, Map.of("compatible-with", "7")).toMediaType(mediaTypeRegistry), + ParsedMediaType.parseMediaType(XContentType.VND_YAML, Map.of("compatible-with", "8")).toMediaType(mediaTypeRegistry), equalTo(XContentType.VND_YAML) ); assertThat( - ParsedMediaType.parseMediaType(XContentType.VND_SMILE, Map.of("compatible-with", "7")).toMediaType(mediaTypeRegistry), + ParsedMediaType.parseMediaType(XContentType.VND_SMILE, Map.of("compatible-with", "8")).toMediaType(mediaTypeRegistry), equalTo(XContentType.VND_SMILE) ); assertThat( - ParsedMediaType.parseMediaType(XContentType.VND_CBOR, Map.of("compatible-with", "7")).toMediaType(mediaTypeRegistry), + ParsedMediaType.parseMediaType(XContentType.VND_CBOR, Map.of("compatible-with", "8")).toMediaType(mediaTypeRegistry), equalTo(XContentType.VND_CBOR) ); } @@ -215,20 +215,20 @@ public void testResponseContentTypeHeader() { ); assertThat( - ParsedMediaType.parseMediaType(XContentType.VND_JSON, Map.of("compatible-with", "7")).responseContentTypeHeader(), - equalTo("application/vnd.elasticsearch+json;compatible-with=7") + ParsedMediaType.parseMediaType(XContentType.VND_JSON, Map.of("compatible-with", "8")).responseContentTypeHeader(), + equalTo("application/vnd.elasticsearch+json;compatible-with=8") ); assertThat( - ParsedMediaType.parseMediaType(XContentType.VND_YAML, Map.of("compatible-with", "7")).responseContentTypeHeader(), - equalTo("application/vnd.elasticsearch+yaml;compatible-with=7") + ParsedMediaType.parseMediaType(XContentType.VND_YAML, Map.of("compatible-with", "8")).responseContentTypeHeader(), + equalTo("application/vnd.elasticsearch+yaml;compatible-with=8") ); assertThat( - ParsedMediaType.parseMediaType(XContentType.VND_SMILE, Map.of("compatible-with", "7")).responseContentTypeHeader(), - equalTo("application/vnd.elasticsearch+smile;compatible-with=7") + ParsedMediaType.parseMediaType(XContentType.VND_SMILE, Map.of("compatible-with", "8")).responseContentTypeHeader(), + equalTo("application/vnd.elasticsearch+smile;compatible-with=8") ); assertThat( - ParsedMediaType.parseMediaType(XContentType.VND_CBOR, Map.of("compatible-with", "7")).responseContentTypeHeader(), - equalTo("application/vnd.elasticsearch+cbor;compatible-with=7") + ParsedMediaType.parseMediaType(XContentType.VND_CBOR, Map.of("compatible-with", "8")).responseContentTypeHeader(), + equalTo("application/vnd.elasticsearch+cbor;compatible-with=8") ); assertThat( diff --git a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/StemmerTokenFilterFactory.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/StemmerTokenFilterFactory.java index afb3d69733d02..1c71c64311517 100644 --- a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/StemmerTokenFilterFactory.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/StemmerTokenFilterFactory.java @@ -47,6 +47,8 @@ import org.apache.lucene.analysis.snowball.SnowballFilter; import org.apache.lucene.analysis.sv.SwedishLightStemFilter; import org.elasticsearch.common.Strings; +import org.elasticsearch.common.logging.DeprecationCategory; +import org.elasticsearch.common.logging.DeprecationLogger; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.env.Environment; import org.elasticsearch.index.IndexSettings; @@ -81,6 +83,8 @@ public class StemmerTokenFilterFactory extends AbstractTokenFilterFactory { + private static final DeprecationLogger deprecationLogger = DeprecationLogger.getLogger(StemmerTokenFilterFactory.class); + private static final TokenStream EMPTY_TOKEN_STREAM = new EmptyTokenStream(); private String language; @@ -90,6 +94,20 @@ public class StemmerTokenFilterFactory extends AbstractTokenFilterFactory { this.language = Strings.capitalize(settings.get("language", settings.get("name", "porter"))); // check that we have a valid language by trying to create a TokenStream create(EMPTY_TOKEN_STREAM).close(); + if ("lovins".equalsIgnoreCase(language)) { + deprecationLogger.critical( + DeprecationCategory.ANALYSIS, + "lovins_deprecation", + "The [lovins] stemmer is deprecated and will be removed in a future version." + ); + } + if ("dutch_kp".equalsIgnoreCase(language) || "dutchKp".equalsIgnoreCase(language) || "kp".equalsIgnoreCase(language)) { + deprecationLogger.critical( + DeprecationCategory.ANALYSIS, + "dutch_kp_deprecation", + "The [dutch_kp] stemmer is deprecated and will be removed in a future version." + ); + } } @Override diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/ASCIIFoldingTokenFilterFactoryTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/ASCIIFoldingTokenFilterFactoryTests.java index a301bc1c851a7..c3017bd3ea237 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/ASCIIFoldingTokenFilterFactoryTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/ASCIIFoldingTokenFilterFactoryTests.java @@ -21,6 +21,8 @@ import java.io.IOException; import java.io.StringReader; +import static org.apache.lucene.tests.analysis.BaseTokenStreamTestCase.assertTokenStreamContents; + public class ASCIIFoldingTokenFilterFactoryTests extends ESTokenStreamTestCase { public void testDefault() throws IOException { ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings( diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/BaseWordDelimiterTokenFilterFactoryTestCase.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/BaseWordDelimiterTokenFilterFactoryTestCase.java index 001f54ee238d4..ab26112005bd6 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/BaseWordDelimiterTokenFilterFactoryTestCase.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/BaseWordDelimiterTokenFilterFactoryTestCase.java @@ -20,6 +20,8 @@ import java.io.IOException; import java.io.StringReader; +import static org.apache.lucene.tests.analysis.BaseTokenStreamTestCase.assertTokenStreamContents; + /** * Base class to test {@link WordDelimiterTokenFilterFactory} and * {@link WordDelimiterGraphTokenFilterFactory}. diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CJKFilterFactoryTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CJKFilterFactoryTests.java index 16614f056c05a..95b093b03f9a7 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CJKFilterFactoryTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CJKFilterFactoryTests.java @@ -22,6 +22,8 @@ import java.io.IOException; import java.io.StringReader; +import static org.apache.lucene.tests.analysis.BaseTokenStreamTestCase.assertTokenStreamContents; + public class CJKFilterFactoryTests extends ESTokenStreamTestCase { private static final String RESOURCE = "/org/elasticsearch/analysis/common/cjk_analysis.json"; diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CharGroupTokenizerFactoryTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CharGroupTokenizerFactoryTests.java index d5a36b110a7be..ee9701b89c127 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CharGroupTokenizerFactoryTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CharGroupTokenizerFactoryTests.java @@ -24,6 +24,8 @@ import java.io.StringReader; import java.util.Arrays; +import static org.apache.lucene.tests.analysis.BaseTokenStreamTestCase.assertTokenStreamContents; + public class CharGroupTokenizerFactoryTests extends ESTokenStreamTestCase { public void testParseTokenChars() { diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonGramsTokenFilterFactoryTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonGramsTokenFilterFactoryTests.java index 115ed1522381a..77902a2ab982f 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonGramsTokenFilterFactoryTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonGramsTokenFilterFactoryTests.java @@ -27,6 +27,8 @@ import java.nio.file.Files; import java.nio.file.Path; +import static org.apache.lucene.tests.analysis.BaseTokenStreamTestCase.assertTokenStreamContents; + public class CommonGramsTokenFilterFactoryTests extends ESTokenStreamTestCase { public void testDefault() throws IOException { Settings settings = Settings.builder() diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/ESSolrSynonymParserTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/ESSolrSynonymParserTests.java index db8b5c92165a2..b0cda5620c3e5 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/ESSolrSynonymParserTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/ESSolrSynonymParserTests.java @@ -23,6 +23,7 @@ import java.io.StringReader; import java.text.ParseException; +import static org.apache.lucene.tests.analysis.BaseTokenStreamTestCase.assertTokenStreamContents; import static org.hamcrest.Matchers.containsString; public class ESSolrSynonymParserTests extends ESTokenStreamTestCase { diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/ESWordnetSynonymParserTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/ESWordnetSynonymParserTests.java index 16c6aa256009b..17455c431992f 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/ESWordnetSynonymParserTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/ESWordnetSynonymParserTests.java @@ -23,6 +23,7 @@ import java.io.StringReader; import java.text.ParseException; +import static org.apache.lucene.tests.analysis.BaseTokenStreamTestCase.assertTokenStreamContents; import static org.hamcrest.Matchers.containsString; public class ESWordnetSynonymParserTests extends ESTokenStreamTestCase { diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/EdgeNGramTokenFilterFactoryTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/EdgeNGramTokenFilterFactoryTests.java index c4e695cabf695..446cee8f48379 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/EdgeNGramTokenFilterFactoryTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/EdgeNGramTokenFilterFactoryTests.java @@ -21,6 +21,8 @@ import java.io.IOException; import java.io.StringReader; +import static org.apache.lucene.tests.analysis.BaseTokenStreamTestCase.assertTokenStreamContents; + public class EdgeNGramTokenFilterFactoryTests extends ESTokenStreamTestCase { public void testDefault() throws IOException { diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/EdgeNGramTokenizerTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/EdgeNGramTokenizerTests.java index 329318a096efb..11d1653439e59 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/EdgeNGramTokenizerTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/EdgeNGramTokenizerTests.java @@ -29,6 +29,9 @@ import java.io.StringReader; import java.util.Collections; +import static org.apache.lucene.tests.analysis.BaseTokenStreamTestCase.assertAnalyzesTo; +import static org.apache.lucene.tests.analysis.BaseTokenStreamTestCase.assertTokenStreamContents; + public class EdgeNGramTokenizerTests extends ESTokenStreamTestCase { private static IndexAnalyzers buildAnalyzers(IndexVersion version, String tokenizer) throws IOException { diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/FingerprintAnalyzerTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/FingerprintAnalyzerTests.java index 8049c09025cf2..8783860b8e02e 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/FingerprintAnalyzerTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/FingerprintAnalyzerTests.java @@ -13,6 +13,8 @@ import org.apache.lucene.analysis.CharArraySet; import org.elasticsearch.test.ESTokenStreamTestCase; +import static org.apache.lucene.tests.analysis.BaseTokenStreamTestCase.assertAnalyzesTo; + public class FingerprintAnalyzerTests extends ESTokenStreamTestCase { public void testFingerprint() throws Exception { diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/FlattenGraphTokenFilterFactoryTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/FlattenGraphTokenFilterFactoryTests.java index d6c2792af7de7..2f3dd1917ebe2 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/FlattenGraphTokenFilterFactoryTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/FlattenGraphTokenFilterFactoryTests.java @@ -20,6 +20,8 @@ import java.io.IOException; +import static org.apache.lucene.tests.analysis.BaseTokenStreamTestCase.assertTokenStreamContents; + public class FlattenGraphTokenFilterFactoryTests extends ESTokenStreamTestCase { public void testBasic() throws IOException { diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/KeepFilterFactoryTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/KeepFilterFactoryTests.java index 5d84457df1495..a3c9eb2cf3aae 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/KeepFilterFactoryTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/KeepFilterFactoryTests.java @@ -22,6 +22,7 @@ import java.io.IOException; import java.io.StringReader; +import static org.apache.lucene.tests.analysis.BaseTokenStreamTestCase.assertTokenStreamContents; import static org.hamcrest.Matchers.instanceOf; public class KeepFilterFactoryTests extends ESTokenStreamTestCase { diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/KeepTypesFilterFactoryTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/KeepTypesFilterFactoryTests.java index 5a7ead779621e..e499f6f7eebdc 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/KeepTypesFilterFactoryTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/KeepTypesFilterFactoryTests.java @@ -21,6 +21,7 @@ import java.io.IOException; import java.io.StringReader; +import static org.apache.lucene.tests.analysis.BaseTokenStreamTestCase.assertTokenStreamContents; import static org.hamcrest.Matchers.instanceOf; public class KeepTypesFilterFactoryTests extends ESTokenStreamTestCase { diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/KeywordMarkerFilterFactoryTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/KeywordMarkerFilterFactoryTests.java index c249db706a189..8ede58ae2082b 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/KeywordMarkerFilterFactoryTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/KeywordMarkerFilterFactoryTests.java @@ -18,11 +18,11 @@ import org.elasticsearch.index.analysis.AnalysisTestsHelper; import org.elasticsearch.index.analysis.NamedAnalyzer; import org.elasticsearch.index.analysis.TokenFilterFactory; -import org.elasticsearch.test.ESTestCase.TestAnalysis; import org.elasticsearch.test.ESTokenStreamTestCase; import java.io.IOException; +import static org.apache.lucene.tests.analysis.BaseTokenStreamTestCase.assertAnalyzesTo; import static org.hamcrest.Matchers.instanceOf; /** diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/LimitTokenCountFilterFactoryTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/LimitTokenCountFilterFactoryTests.java index c3a9531b4a2ed..ee117de653d95 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/LimitTokenCountFilterFactoryTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/LimitTokenCountFilterFactoryTests.java @@ -21,6 +21,8 @@ import java.io.IOException; import java.io.StringReader; +import static org.apache.lucene.tests.analysis.BaseTokenStreamTestCase.assertTokenStreamContents; + public class LimitTokenCountFilterFactoryTests extends ESTokenStreamTestCase { public void testDefault() throws IOException { Settings settings = Settings.builder() diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/MinHashFilterFactoryTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/MinHashFilterFactoryTests.java index aff05dbc4d3a3..020b78a50b213 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/MinHashFilterFactoryTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/MinHashFilterFactoryTests.java @@ -21,6 +21,8 @@ import java.io.IOException; import java.io.StringReader; +import static org.apache.lucene.tests.analysis.BaseTokenStreamTestCase.assertStreamHasNumberOfTokens; + public class MinHashFilterFactoryTests extends ESTokenStreamTestCase { public void testDefault() throws IOException { int default_hash_count = 1; diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/MultiplexerTokenFilterTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/MultiplexerTokenFilterTests.java index 7436263f8df9e..eb9032061d134 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/MultiplexerTokenFilterTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/MultiplexerTokenFilterTests.java @@ -26,6 +26,8 @@ import java.io.IOException; import java.util.Collections; +import static org.apache.lucene.tests.analysis.BaseTokenStreamTestCase.assertAnalyzesTo; + public class MultiplexerTokenFilterTests extends ESTokenStreamTestCase { public void testMultiplexingFilter() throws IOException { diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/NGramTokenFilterFactoryTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/NGramTokenFilterFactoryTests.java index 4b0232ed95e0e..ef02f91c30a40 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/NGramTokenFilterFactoryTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/NGramTokenFilterFactoryTests.java @@ -21,6 +21,8 @@ import java.io.IOException; import java.io.StringReader; +import static org.apache.lucene.tests.analysis.BaseTokenStreamTestCase.assertTokenStreamContents; + public class NGramTokenFilterFactoryTests extends ESTokenStreamTestCase { public void testDefault() throws IOException { diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/NGramTokenizerFactoryTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/NGramTokenizerFactoryTests.java index 9c4286d40db77..8c365a1362f85 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/NGramTokenizerFactoryTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/NGramTokenizerFactoryTests.java @@ -28,7 +28,7 @@ import java.io.StringReader; import java.util.Arrays; -import static com.carrotsearch.randomizedtesting.RandomizedTest.scaledRandomIntBetween; +import static org.apache.lucene.tests.analysis.BaseTokenStreamTestCase.assertTokenStreamContents; import static org.hamcrest.Matchers.instanceOf; public class NGramTokenizerFactoryTests extends ESTokenStreamTestCase { @@ -183,6 +183,9 @@ public void testBackwardsCompatibilityEdgeNgramTokenFilter() throws Exception { assertThat(edgeNGramTokenFilter, instanceOf(EdgeNGramTokenFilter.class)); } } + assertWarnings( + "The [side] parameter is deprecated and will be removed. Use a [reverse] before and after the [edge_ngram] instead." + ); } /*` diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/PathHierarchyTokenizerFactoryTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/PathHierarchyTokenizerFactoryTests.java index b36bb18529109..5121c6390ceb0 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/PathHierarchyTokenizerFactoryTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/PathHierarchyTokenizerFactoryTests.java @@ -20,6 +20,8 @@ import java.io.IOException; import java.io.StringReader; +import static org.apache.lucene.tests.analysis.BaseTokenStreamTestCase.assertTokenStreamContents; + public class PathHierarchyTokenizerFactoryTests extends ESTokenStreamTestCase { public void testDefaults() throws IOException { diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/PatternAnalyzerTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/PatternAnalyzerTests.java index 6c13c4eac4ab7..91fd74bca9c93 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/PatternAnalyzerTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/PatternAnalyzerTests.java @@ -18,6 +18,9 @@ import java.util.Arrays; import java.util.regex.Pattern; +import static org.apache.lucene.tests.analysis.BaseTokenStreamTestCase.assertAnalyzesTo; +import static org.apache.lucene.tests.analysis.BaseTokenStreamTestCase.checkRandomData; + /** * Verifies the behavior of PatternAnalyzer. */ diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/PatternCaptureTokenFilterTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/PatternCaptureTokenFilterTests.java index 4ac4b44d8ffdd..80a270b033678 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/PatternCaptureTokenFilterTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/PatternCaptureTokenFilterTests.java @@ -19,7 +19,7 @@ import org.elasticsearch.test.ESTokenStreamTestCase; import org.elasticsearch.test.IndexSettingsModule; -import static org.elasticsearch.test.ESTestCase.createTestAnalysis; +import static org.apache.lucene.tests.analysis.BaseTokenStreamTestCase.assertTokenStreamContents; import static org.hamcrest.Matchers.containsString; public class PatternCaptureTokenFilterTests extends ESTokenStreamTestCase { diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/PatternReplaceTokenFilterTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/PatternReplaceTokenFilterTests.java index 48434461fc151..91637f1cb9449 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/PatternReplaceTokenFilterTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/PatternReplaceTokenFilterTests.java @@ -19,6 +19,8 @@ import java.io.IOException; +import static org.apache.lucene.tests.analysis.BaseTokenStreamTestCase.assertTokenStreamContents; + public class PatternReplaceTokenFilterTests extends ESTokenStreamTestCase { public void testNormalizer() throws IOException { diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/PredicateTokenScriptFilterTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/PredicateTokenScriptFilterTests.java index ae8c17decb3b7..40ba9acbc257a 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/PredicateTokenScriptFilterTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/PredicateTokenScriptFilterTests.java @@ -37,6 +37,7 @@ import java.io.IOException; import java.util.Collections; +import static org.apache.lucene.tests.analysis.BaseTokenStreamTestCase.assertAnalyzesTo; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/RemoveDuplicatesFilterFactoryTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/RemoveDuplicatesFilterFactoryTests.java index df0c0aa6e7df6..bb37b9bb7f4ef 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/RemoveDuplicatesFilterFactoryTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/RemoveDuplicatesFilterFactoryTests.java @@ -20,6 +20,7 @@ import java.io.IOException; +import static org.apache.lucene.tests.analysis.BaseTokenStreamTestCase.assertTokenStreamContents; import static org.hamcrest.Matchers.instanceOf; public class RemoveDuplicatesFilterFactoryTests extends ESTokenStreamTestCase { diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/ScriptedConditionTokenFilterTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/ScriptedConditionTokenFilterTests.java index 2a480f7cb4a75..fb5eee96acffb 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/ScriptedConditionTokenFilterTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/ScriptedConditionTokenFilterTests.java @@ -36,6 +36,7 @@ import java.util.Collections; +import static org.apache.lucene.tests.analysis.BaseTokenStreamTestCase.assertAnalyzesTo; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/SnowballAnalyzerTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/SnowballAnalyzerTests.java index 9153b5d9b3819..8fd8b86047488 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/SnowballAnalyzerTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/SnowballAnalyzerTests.java @@ -13,6 +13,8 @@ import org.apache.lucene.analysis.en.EnglishAnalyzer; import org.elasticsearch.test.ESTokenStreamTestCase; +import static org.apache.lucene.tests.analysis.BaseTokenStreamTestCase.assertAnalyzesTo; + public class SnowballAnalyzerTests extends ESTokenStreamTestCase { public void testEnglish() throws Exception { diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/StemmerOverrideTokenFilterFactoryTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/StemmerOverrideTokenFilterFactoryTests.java index 2266d554fcba6..bbe22ff43d52e 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/StemmerOverrideTokenFilterFactoryTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/StemmerOverrideTokenFilterFactoryTests.java @@ -25,6 +25,8 @@ import java.util.List; import java.util.Locale; +import static org.apache.lucene.tests.analysis.BaseTokenStreamTestCase.assertTokenStreamContents; + public class StemmerOverrideTokenFilterFactoryTests extends ESTokenStreamTestCase { @Rule public ExpectedException expectedException = ExpectedException.none(); diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/StemmerTokenFilterFactoryTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/StemmerTokenFilterFactoryTests.java index f9d8dc1aef8ff..8f3d52f0174c6 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/StemmerTokenFilterFactoryTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/StemmerTokenFilterFactoryTests.java @@ -27,12 +27,11 @@ import java.io.IOException; import java.io.StringReader; -import static com.carrotsearch.randomizedtesting.RandomizedTest.scaledRandomIntBetween; +import static org.apache.lucene.tests.analysis.BaseTokenStreamTestCase.assertAnalyzesTo; import static org.elasticsearch.cluster.metadata.IndexMetadata.SETTING_VERSION_CREATED; import static org.hamcrest.Matchers.instanceOf; public class StemmerTokenFilterFactoryTests extends ESTokenStreamTestCase { - private static final CommonAnalysisPlugin PLUGIN = new CommonAnalysisPlugin(); public void testEnglishFilterFactory() throws IOException { @@ -103,4 +102,30 @@ public void testMultipleLanguagesThrowsException() throws IOException { ); assertEquals("Invalid stemmer class specified: [english, light_english]", e.getMessage()); } + + public void testKpDeprecation() throws IOException { + IndexVersion v = IndexVersionUtils.randomVersion(random()); + Settings settings = Settings.builder() + .put("index.analysis.filter.my_kp.type", "stemmer") + .put("index.analysis.filter.my_kp.language", "kp") + .put(SETTING_VERSION_CREATED, v) + .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()) + .build(); + + AnalysisTestsHelper.createTestAnalysisFromSettings(settings, PLUGIN); + assertCriticalWarnings("The [dutch_kp] stemmer is deprecated and will be removed in a future version."); + } + + public void testLovinsDeprecation() throws IOException { + IndexVersion v = IndexVersionUtils.randomVersion(random()); + Settings settings = Settings.builder() + .put("index.analysis.filter.my_lovins.type", "stemmer") + .put("index.analysis.filter.my_lovins.language", "lovins") + .put(SETTING_VERSION_CREATED, v) + .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()) + .build(); + + AnalysisTestsHelper.createTestAnalysisFromSettings(settings, PLUGIN); + assertCriticalWarnings("The [lovins] stemmer is deprecated and will be removed in a future version."); + } } diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/StopAnalyzerTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/StopAnalyzerTests.java index db25d6a0f1845..da84c4814514f 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/StopAnalyzerTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/StopAnalyzerTests.java @@ -19,7 +19,7 @@ import org.elasticsearch.test.ESTokenStreamTestCase; import org.elasticsearch.test.IndexSettingsModule; -import static org.elasticsearch.test.ESTestCase.createTestAnalysis; +import static org.apache.lucene.tests.analysis.BaseTokenStreamTestCase.assertTokenStreamContents; public class StopAnalyzerTests extends ESTokenStreamTestCase { public void testDefaultsCompoundAnalysis() throws Exception { diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/TrimTokenFilterTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/TrimTokenFilterTests.java index f3816f43d2b2b..63e9732f99a8a 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/TrimTokenFilterTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/TrimTokenFilterTests.java @@ -19,6 +19,8 @@ import java.io.IOException; +import static org.apache.lucene.tests.analysis.BaseTokenStreamTestCase.assertTokenStreamContents; + public class TrimTokenFilterTests extends ESTokenStreamTestCase { public void testNormalizer() throws IOException { diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/WordDelimiterGraphTokenFilterFactoryTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/WordDelimiterGraphTokenFilterFactoryTests.java index 4a060ab11e2bd..4995fe844c9c5 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/WordDelimiterGraphTokenFilterFactoryTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/WordDelimiterGraphTokenFilterFactoryTests.java @@ -30,6 +30,9 @@ import java.io.StringReader; import java.util.Collections; +import static org.apache.lucene.tests.analysis.BaseTokenStreamTestCase.assertAnalyzesTo; +import static org.apache.lucene.tests.analysis.BaseTokenStreamTestCase.assertTokenStreamContents; + public class WordDelimiterGraphTokenFilterFactoryTests extends BaseWordDelimiterTokenFilterFactoryTestCase { public WordDelimiterGraphTokenFilterFactoryTests() { super("word_delimiter_graph"); diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/WordDelimiterTokenFilterFactoryTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/WordDelimiterTokenFilterFactoryTests.java index 2644303991b8d..636174f5c79cc 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/WordDelimiterTokenFilterFactoryTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/WordDelimiterTokenFilterFactoryTests.java @@ -19,6 +19,8 @@ import java.io.IOException; import java.io.StringReader; +import static org.apache.lucene.tests.analysis.BaseTokenStreamTestCase.assertTokenStreamContents; + public class WordDelimiterTokenFilterFactoryTests extends BaseWordDelimiterTokenFilterFactoryTestCase { public WordDelimiterTokenFilterFactoryTests() { super("word_delimiter"); diff --git a/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/LogsDataStreamRestIT.java b/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/LogsDataStreamRestIT.java index f95815d1daff9..4b9b29d7187e1 100644 --- a/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/LogsDataStreamRestIT.java +++ b/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/LogsDataStreamRestIT.java @@ -73,7 +73,7 @@ private static void waitForLogs(RestClient client) throws Exception { }); } - private static final String LOGS_TEMPLATE = """ + static final String LOGS_TEMPLATE = """ { "index_patterns": [ "logs-*-*" ], "data_stream": {}, @@ -110,7 +110,7 @@ private static void waitForLogs(RestClient client) throws Exception { } }"""; - private static final String LOGS_STANDARD_INDEX_MODE = """ + static final String LOGS_STANDARD_INDEX_MODE = """ { "index_patterns": [ "logs-*-*" ], "data_stream": {}, @@ -143,7 +143,7 @@ private static void waitForLogs(RestClient client) throws Exception { } }"""; - private static final String STANDARD_TEMPLATE = """ + static final String STANDARD_TEMPLATE = """ { "index_patterns": [ "standard-*-*" ], "data_stream": {}, @@ -216,7 +216,7 @@ private static void waitForLogs(RestClient client) throws Exception { } }"""; - private static final String DOC_TEMPLATE = """ + static final String DOC_TEMPLATE = """ { "@timestamp": "%s", "host.name": "%s", @@ -333,6 +333,23 @@ public void testLogsTimeSeriesIndexModeSwitch() throws IOException { ); assertDataStreamBackingIndexMode("logsdb", 0, DATA_STREAM_NAME); + putTemplate(client, "custom-template", LOGS_STANDARD_INDEX_MODE); + rolloverDataStream(client, DATA_STREAM_NAME); + indexDocument( + client, + DATA_STREAM_NAME, + document( + Instant.now(), + randomAlphaOfLength(10), + randomNonNegativeLong(), + randomFrom("PUT", "POST", "GET"), + randomAlphaOfLength(64), + randomIp(randomBoolean()), + randomLongBetween(1_000_000L, 2_000_000L) + ) + ); + assertDataStreamBackingIndexMode("standard", 1, DATA_STREAM_NAME); + putTemplate(client, "custom-template", TIME_SERIES_TEMPLATE); rolloverDataStream(client, DATA_STREAM_NAME); indexDocument( @@ -348,7 +365,24 @@ public void testLogsTimeSeriesIndexModeSwitch() throws IOException { randomLongBetween(1_000_000L, 2_000_000L) ) ); - assertDataStreamBackingIndexMode("time_series", 1, DATA_STREAM_NAME); + assertDataStreamBackingIndexMode("time_series", 2, DATA_STREAM_NAME); + + putTemplate(client, "custom-template", LOGS_STANDARD_INDEX_MODE); + rolloverDataStream(client, DATA_STREAM_NAME); + indexDocument( + client, + DATA_STREAM_NAME, + document( + Instant.now(), + randomAlphaOfLength(10), + randomNonNegativeLong(), + randomFrom("PUT", "POST", "GET"), + randomAlphaOfLength(64), + randomIp(randomBoolean()), + randomLongBetween(1_000_000L, 2_000_000L) + ) + ); + assertDataStreamBackingIndexMode("standard", 3, DATA_STREAM_NAME); putTemplate(client, "custom-template", LOGS_TEMPLATE); rolloverDataStream(client, DATA_STREAM_NAME); @@ -365,7 +399,7 @@ public void testLogsTimeSeriesIndexModeSwitch() throws IOException { randomLongBetween(1_000_000L, 2_000_000L) ) ); - assertDataStreamBackingIndexMode("logsdb", 2, DATA_STREAM_NAME); + assertDataStreamBackingIndexMode("logsdb", 4, DATA_STREAM_NAME); } public void testLogsDBToStandardReindex() throws IOException { @@ -554,7 +588,7 @@ private void assertDataStreamBackingIndexMode(final String indexMode, int backin assertThat(getSettings(client, getWriteBackingIndex(client, dataStreamName, backingIndex)).get("index.mode"), is(indexMode)); } - private String document( + static String document( final Instant timestamp, final String hostname, long pid, @@ -581,13 +615,13 @@ private static void createDataStream(final RestClient client, final String dataS assertOK(client.performRequest(request)); } - private static void putTemplate(final RestClient client, final String templateName, final String mappings) throws IOException { + static void putTemplate(final RestClient client, final String templateName, final String mappings) throws IOException { final Request request = new Request("PUT", "/_index_template/" + templateName); request.setJsonEntity(mappings); assertOK(client.performRequest(request)); } - private static void indexDocument(final RestClient client, String indexOrtDataStream, String doc) throws IOException { + static void indexDocument(final RestClient client, String indexOrtDataStream, String doc) throws IOException { final Request request = new Request("POST", "/" + indexOrtDataStream + "/_doc?refresh=true"); request.setJsonEntity(doc); final Response response = client.performRequest(request); diff --git a/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/MultiClustersIT.java b/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/MultiClustersIT.java new file mode 100644 index 0000000000000..2f80a230d937a --- /dev/null +++ b/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/MultiClustersIT.java @@ -0,0 +1,363 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.datastreams; + +import org.apache.http.HttpHost; +import org.apache.lucene.tests.util.English; +import org.elasticsearch.client.Request; +import org.elasticsearch.client.Response; +import org.elasticsearch.client.RestClient; +import org.elasticsearch.common.Randomness; +import org.elasticsearch.common.Strings; +import org.elasticsearch.common.xcontent.XContentHelper; +import org.elasticsearch.test.cluster.ElasticsearchCluster; +import org.elasticsearch.test.cluster.local.distribution.DistributionType; +import org.elasticsearch.test.rest.ESRestTestCase; +import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xcontent.XContentType; +import org.elasticsearch.xcontent.json.JsonXContent; +import org.junit.Before; +import org.junit.ClassRule; +import org.junit.rules.RuleChain; +import org.junit.rules.TestRule; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Comparator; +import java.util.List; +import java.util.Map; +import java.util.function.Predicate; +import java.util.stream.Stream; + +import static org.elasticsearch.datastreams.AbstractDataStreamIT.createDataStream; +import static org.elasticsearch.datastreams.LogsDataStreamRestIT.LOGS_TEMPLATE; +import static org.elasticsearch.datastreams.LogsDataStreamRestIT.STANDARD_TEMPLATE; +import static org.elasticsearch.datastreams.LogsDataStreamRestIT.indexDocument; +import static org.elasticsearch.datastreams.LogsDataStreamRestIT.putTemplate; +import static org.elasticsearch.index.mapper.DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.hasSize; + +public class MultiClustersIT extends ESRestTestCase { + static List localLogsDocs = null; + static List remoteLogsDocs = null; + static List localStandardDocs = null; + static List remoteStandardDocs = null; + + public static ElasticsearchCluster remoteCluster = ElasticsearchCluster.local() + .name("remote_cluster") + .distribution(DistributionType.DEFAULT) + .module("data-streams") + .module("x-pack-stack") + .setting("xpack.security.enabled", "false") + .setting("xpack.license.self_generated.type", "trial") + .setting("cluster.logsdb.enabled", "true") + .build(); + + public static ElasticsearchCluster localCluster = ElasticsearchCluster.local() + .name("local_cluster") + .distribution(DistributionType.DEFAULT) + .module("data-streams") + .module("x-pack-stack") + .setting("xpack.security.enabled", "false") + .setting("xpack.license.self_generated.type", "trial") + .setting("cluster.logsdb.enabled", "true") + .setting("node.roles", "[data,ingest,master,remote_cluster_client]") + .setting("cluster.remote.remote_cluster.seeds", () -> "\"" + remoteCluster.getTransportEndpoint(0) + "\"") + .setting("cluster.remote.connections_per_cluster", "1") + .setting("cluster.remote.remote_cluster.skip_unavailable", "false") + .build(); + + @ClassRule + public static TestRule clusterRule = RuleChain.outerRule(remoteCluster).around(localCluster); + + private RestClient localClusterClient() throws IOException { + var clusterHosts = parseClusterHosts(localCluster.getHttpAddresses()); + return buildClient(restClientSettings(), clusterHosts.toArray(new HttpHost[0])); + } + + private RestClient remoteClusterClient() throws IOException { + var clusterHosts = parseClusterHosts(remoteCluster.getHttpAddresses()); + return buildClient(restClientSettings(), clusterHosts.toArray(new HttpHost[0])); + } + + private record Document(long timestamp, String cluster, String hostName, long pid, String method, long messageId, String message) { + + @SuppressWarnings("unchecked") + static Document fromHit(Map hit) { + long timestamp = DEFAULT_DATE_TIME_FORMATTER.parseMillis(hit.get("@timestamp").toString()); + String cluster = (String) hit.get("cluster"); + String hostName = (String) hit.get("host.name"); + if (hostName == null) { + Map host = (Map) hit.get("host"); + hostName = (String) host.get("name"); + } + long pid = ((Number) hit.get("pid")).longValue(); + String method = (String) hit.get("method"); + long messageId = ((Number) hit.get("message_id")).longValue(); + String message = (String) hit.get("message"); + return new Document(timestamp, cluster, hostName, pid, method, messageId, message); + } + + String toJson() throws IOException { + XContentBuilder builder = JsonXContent.contentBuilder() + .startObject() + .field("@timestamp", timestamp) + .field("cluster", cluster) + .field("host.name", hostName) + .field("pid", pid) + .field("method", method) + .field("message_id", messageId) + .field("message", message) + .endObject(); + return Strings.toString(builder); + } + } + + static String randomHostName() { + return randomFrom("qa-", "staging-", "prod-") + between(1, 3); + } + + static List indexDocuments(RestClient client, String cluster, String index, int startMessageId) throws IOException { + int numDocs = between(0, 100); + List docs = new ArrayList<>(numDocs); + long timestamp = DEFAULT_DATE_TIME_FORMATTER.parseMillis("2024-09-15T00:00:00Z"); + for (int i = 0; i < numDocs; i++) { + timestamp += between(0, 5) * 1000L; + long pid = randomLongBetween(1, 10); + String method = randomFrom("GET", "PUT", "POST", "DELETE"); + String message = English.intToEnglish(between(1, 1000000)); + docs.add(new Document(timestamp, cluster, randomHostName(), pid, method, startMessageId + i, message)); + } + Randomness.shuffle(docs); + for (Document doc : docs) { + indexDocument(client, index, doc.toJson()); + if (rarely()) { + refresh(client, index); + } + } + refresh(client, index); + return docs; + } + + @Before + public void setUpIndices() throws Exception { + if (localLogsDocs != null) { + return; + } + try (RestClient client = localClusterClient()) { + putTemplate(client, "logs-template", LOGS_TEMPLATE); + putTemplate(client, "standard-template", STANDARD_TEMPLATE); + + createDataStream(client, "logs-apache-kafka"); + localLogsDocs = indexDocuments(client, "local", "logs-apache-kafka", 0); + assertDocCount(client, "logs-apache-kafka", localLogsDocs.size()); + + createDataStream(client, "standard-apache-kafka"); + localStandardDocs = indexDocuments(client, "local", "standard-apache-kafka", 1000); + assertDocCount(client, "standard-apache-kafka", localStandardDocs.size()); + } + try (RestClient client = remoteClusterClient()) { + putTemplate(client, "logs-template", LOGS_TEMPLATE); + putTemplate(client, "standard-template", STANDARD_TEMPLATE); + + createDataStream(client, "logs-apache-kafka"); + remoteLogsDocs = indexDocuments(client, "remote", "logs-apache-kafka", 2000); + assertDocCount(client, "logs-apache-kafka", remoteLogsDocs.size()); + + createDataStream(client, "standard-apache-kafka"); + remoteStandardDocs = indexDocuments(client, "remote", "standard-apache-kafka", 3000); + assertDocCount(client, "standard-apache-kafka", remoteStandardDocs.size()); + } + } + + public void testSource() throws IOException { + XContentBuilder searchSource = JsonXContent.contentBuilder().startObject().field("_source", true).field("size", 500); + final boolean sorted = randomBoolean(); + if (sorted) { + searchSource.startArray("sort"); + searchSource.value("message_id"); + searchSource.endArray(); + } + final Predicate filterHost; + if (randomBoolean()) { + String host = randomHostName(); + filterHost = s -> s.equals(host); + searchSource.startObject("query"); + searchSource.startObject("term"); + searchSource.startObject("host.name"); + searchSource.field("value", host); + searchSource.endObject(); + searchSource.endObject(); + searchSource.endObject(); + } else { + filterHost = s -> true; + } + searchSource.endObject(); + // remote only + { + var request = new Request("POST", "/*:l*,*:s*/_search"); + request.setJsonEntity(Strings.toString(searchSource)); + if (randomBoolean()) { + request.addParameter("ccs_minimize_roundtrips", Boolean.toString(randomBoolean())); + } + Response resp = client().performRequest(request); + assertOK(resp); + Stream hits = extractHits(resp).stream().map(Document::fromHit); + if (sorted == false) { + hits = hits.sorted(Comparator.comparingLong(Document::messageId)); + } + var expectedHits = Stream.of(remoteLogsDocs, remoteStandardDocs) + .flatMap(Collection::stream) + .filter(d -> filterHost.test(d.hostName)) + .sorted(Comparator.comparingLong(Document::messageId)) + .toList(); + assertThat(hits.toList(), equalTo(expectedHits)); + } + // both clusters + { + var request = new Request("POST", "/*,*:*/_search"); + request.setJsonEntity(Strings.toString(searchSource)); + if (randomBoolean()) { + request.addParameter("ccs_minimize_roundtrips", Boolean.toString(randomBoolean())); + } + Response resp = client().performRequest(request); + assertOK(resp); + Stream hits = extractHits(resp).stream().map(Document::fromHit); + if (sorted == false) { + hits = hits.sorted(Comparator.comparingLong(Document::messageId)); + } + var expectedHits = Stream.of(localLogsDocs, localStandardDocs, remoteLogsDocs, remoteStandardDocs) + .flatMap(Collection::stream) + .filter(d -> filterHost.test(d.hostName)) + .sorted(Comparator.comparingLong(Document::messageId)) + .toList(); + assertThat(hits.toList(), equalTo(expectedHits)); + } + + } + + public void testFields() throws IOException { + XContentBuilder searchSource = JsonXContent.contentBuilder() + .startObject() + .array("fields", "message_id", "host.name") + .field("size", 500); + final boolean sorted = randomBoolean(); + if (sorted) { + searchSource.startArray("sort"); + searchSource.value("message_id"); + searchSource.endArray(); + } + final Predicate filterHost; + if (randomBoolean()) { + String host = randomHostName(); + filterHost = s -> s.equals(host); + searchSource.startObject("query"); + searchSource.startObject("term"); + searchSource.startObject("host.name"); + searchSource.field("value", host); + searchSource.endObject(); + searchSource.endObject(); + searchSource.endObject(); + } else { + filterHost = s -> true; + } + searchSource.endObject(); + record Fields(long messageId, String hostName) { + @SuppressWarnings("unchecked") + static Fields fromResponse(Map hit) { + List hostName = (List) hit.get("host.name"); + assertThat(hostName, hasSize(1)); + List messageId = (List) hit.get("message_id"); + assertThat(messageId, hasSize(1)); + return new Fields(messageId.getFirst().longValue(), hostName.getFirst()); + } + } + // remote only + { + var request = new Request("POST", "/*:l*,*:s*/_search"); + request.setJsonEntity(Strings.toString(searchSource)); + if (randomBoolean()) { + request.addParameter("ccs_minimize_roundtrips", Boolean.toString(randomBoolean())); + } + Response resp = client().performRequest(request); + assertOK(resp); + Stream hits = extractFields(resp).stream().map(Fields::fromResponse); + if (sorted == false) { + hits = hits.sorted(Comparator.comparingLong(Fields::messageId)); + } + var expectedHits = Stream.of(remoteLogsDocs, remoteStandardDocs) + .flatMap(Collection::stream) + .filter(d -> filterHost.test(d.hostName)) + .map(d -> new Fields(d.messageId, d.hostName)) + .sorted(Comparator.comparingLong(Fields::messageId)) + .toList(); + assertThat(hits.toList(), equalTo(expectedHits)); + } + // both clusters + { + var request = new Request("POST", "/*,*:*/_search"); + request.setJsonEntity(Strings.toString(searchSource)); + if (randomBoolean()) { + request.addParameter("ccs_minimize_roundtrips", Boolean.toString(randomBoolean())); + } + Response resp = client().performRequest(request); + assertOK(resp); + Stream hits = extractFields(resp).stream().map(Fields::fromResponse); + if (sorted == false) { + hits = hits.sorted(Comparator.comparingLong(Fields::messageId)); + } + var expectedHits = Stream.of(localLogsDocs, localStandardDocs, remoteLogsDocs, remoteStandardDocs) + .flatMap(Collection::stream) + .filter(d -> filterHost.test(d.hostName)) + .map(d -> new Fields(d.messageId, d.hostName)) + .sorted(Comparator.comparingLong(Fields::messageId)) + .toList(); + assertThat(hits.toList(), equalTo(expectedHits)); + } + } + + @SuppressWarnings("unchecked") + private static List> extractHits(final Response response) throws IOException { + final Map map = XContentHelper.convertToMap(XContentType.JSON.xContent(), response.getEntity().getContent(), true); + final Map hitsMap = (Map) map.get("hits"); + final List> hitsList = (List>) hitsMap.get("hits"); + return hitsList.stream().map(hit -> (Map) hit.get("_source")).toList(); + } + + @SuppressWarnings("unchecked") + private static List> extractFields(final Response response) throws IOException { + final Map map = XContentHelper.convertToMap(XContentType.JSON.xContent(), response.getEntity().getContent(), true); + final Map hitsMap = (Map) map.get("hits"); + final List> hitsList = (List>) hitsMap.get("hits"); + return hitsList.stream().map(hit -> (Map) hit.get("fields")).toList(); + } + + @Override + protected String getTestRestCluster() { + return localCluster.getHttpAddresses(); + } + + @Override + protected boolean preserveIndicesUponCompletion() { + return true; + } + + @Override + protected boolean preserveClusterUponCompletion() { + return true; + } + + @Override + protected boolean preserveDataStreamsUponCompletion() { + return true; + } +} diff --git a/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/LogsIndexModeCustomSettingsIT.java b/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/LogsIndexModeCustomSettingsIT.java index c0e3142b9a8db..87a97b7a44b48 100644 --- a/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/LogsIndexModeCustomSettingsIT.java +++ b/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/LogsIndexModeCustomSettingsIT.java @@ -96,10 +96,15 @@ public void testOverrideIndexSorting() throws IOException { assertThat(type, equalTo("date")); } - public void testConfigureStoredSource() throws IOException { + public void testConfigureStoredSourceBeforeIndexCreation() throws IOException { var storedSourceMapping = """ { "template": { + "settings": { + "index": { + "mode": "logsdb" + } + }, "mappings": { "_source": { "mode": "stored" @@ -111,9 +116,9 @@ public void testConfigureStoredSource() throws IOException { Exception e = assertThrows(ResponseException.class, () -> putComponentTemplate(client, "logs@custom", storedSourceMapping)); assertThat( e.getMessage(), - containsString("updating component template [logs@custom] results in invalid composable template [logs]") + containsString("Failed to parse mapping: Indices with with index mode [logsdb] only support synthetic source") ); - assertThat(e.getMessage(), containsString("Indices with with index mode [logsdb] only support synthetic source")); + assertThat(e.getMessage(), containsString("mapper_parsing_exception")); assertOK(createDataStream(client, "logs-custom-dev")); @@ -122,6 +127,23 @@ public void testConfigureStoredSource() throws IOException { assertThat(sourceMode, equalTo("synthetic")); } + public void testConfigureStoredSourceWhenIndexIsCreated() throws IOException { + var storedSourceMapping = """ + { + "template": { + "mappings": { + "_source": { + "mode": "stored" + } + } + } + }"""; + + assertOK(putComponentTemplate(client, "logs@custom", storedSourceMapping)); + ResponseException e = expectThrows(ResponseException.class, () -> createDataStream(client, "logs-custom-dev")); + assertThat(e.getMessage(), containsString("Indices with with index mode [logsdb] only support synthetic source")); + } + public void testOverrideIndexCodec() throws IOException { var indexCodecOverrideTemplate = """ { diff --git a/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/qa/DataGenerationHelper.java b/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/qa/DataGenerationHelper.java index ce0820b940bf8..515d07103bff8 100644 --- a/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/qa/DataGenerationHelper.java +++ b/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/qa/DataGenerationHelper.java @@ -27,22 +27,33 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.function.Consumer; -class DataGenerationHelper { +public class DataGenerationHelper { private final ObjectMapper.Subobjects subobjects; private final boolean keepArraySource; private final DataGenerator dataGenerator; - DataGenerationHelper() { - this.subobjects = ESTestCase.randomFrom(ObjectMapper.Subobjects.values()); + public DataGenerationHelper() { + this(b -> {}); + } + + public DataGenerationHelper(Consumer builderConfigurator) { + // TODO enable subobjects: auto + // It is disabled because it currently does not have auto flattening and that results in asserts being triggered when using copy_to. + this.subobjects = ESTestCase.randomValueOtherThan( + ObjectMapper.Subobjects.AUTO, + () -> ESTestCase.randomFrom(ObjectMapper.Subobjects.values()) + ); this.keepArraySource = ESTestCase.randomBoolean(); var specificationBuilder = DataGeneratorSpecification.builder().withFullyDynamicMapping(ESTestCase.randomBoolean()); if (subobjects != ObjectMapper.Subobjects.ENABLED) { specificationBuilder = specificationBuilder.withNestedFieldsLimit(0); } - this.dataGenerator = new DataGenerator(specificationBuilder.withDataSourceHandlers(List.of(new DataSourceHandler() { + + specificationBuilder.withDataSourceHandlers(List.of(new DataSourceHandler() { @Override public DataSourceResponse.ObjectMappingParametersGenerator handle(DataSourceRequest.ObjectMappingParametersGenerator request) { if (subobjects == ObjectMapper.Subobjects.ENABLED) { @@ -108,8 +119,12 @@ public CheckedConsumer fieldValueGenerator() { } }) ) - ) - .build()); + ); + + // Customize builder if necessary + builderConfigurator.accept(specificationBuilder); + + this.dataGenerator = new DataGenerator(specificationBuilder.build()); } DataGenerator getDataGenerator() { diff --git a/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/qa/StandardVersusLogsIndexModeRandomDataChallengeRestIT.java b/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/qa/StandardVersusLogsIndexModeRandomDataChallengeRestIT.java index 611f7fc5a9dcd..751336cc1f646 100644 --- a/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/qa/StandardVersusLogsIndexModeRandomDataChallengeRestIT.java +++ b/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/qa/StandardVersusLogsIndexModeRandomDataChallengeRestIT.java @@ -26,8 +26,12 @@ public class StandardVersusLogsIndexModeRandomDataChallengeRestIT extends Standa protected final DataGenerationHelper dataGenerationHelper; public StandardVersusLogsIndexModeRandomDataChallengeRestIT() { + this(new DataGenerationHelper()); + } + + protected StandardVersusLogsIndexModeRandomDataChallengeRestIT(DataGenerationHelper dataGenerationHelper) { super(); - dataGenerationHelper = new DataGenerationHelper(); + this.dataGenerationHelper = dataGenerationHelper; } @Override diff --git a/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/qa/StandardVersusLogsIndexModeRandomDataDynamicMappingChallengeRestIT.java b/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/qa/StandardVersusLogsIndexModeRandomDataDynamicMappingChallengeRestIT.java new file mode 100644 index 0000000000000..6b0e4d4d0b34d --- /dev/null +++ b/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/qa/StandardVersusLogsIndexModeRandomDataDynamicMappingChallengeRestIT.java @@ -0,0 +1,26 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.datastreams.logsdb.qa; + +import org.elasticsearch.common.settings.Settings; + +public class StandardVersusLogsIndexModeRandomDataDynamicMappingChallengeRestIT extends + StandardVersusLogsIndexModeRandomDataChallengeRestIT { + public StandardVersusLogsIndexModeRandomDataDynamicMappingChallengeRestIT() { + super(new DataGenerationHelper(builder -> builder.withFullyDynamicMapping(true))); + } + + @Override + public void contenderSettings(Settings.Builder builder) { + super.contenderSettings(builder); + // ignore_dynamic_beyond_limit is set in the template so it's always true + builder.put("index.mapping.total_fields.limit", randomIntBetween(1, 5000)); + } +} diff --git a/modules/data-streams/src/test/java/org/elasticsearch/datastreams/action/GetDataStreamsResponseTests.java b/modules/data-streams/src/test/java/org/elasticsearch/datastreams/action/GetDataStreamsResponseTests.java index 96e71c9aa65c2..710ea8c15b66e 100644 --- a/modules/data-streams/src/test/java/org/elasticsearch/datastreams/action/GetDataStreamsResponseTests.java +++ b/modules/data-streams/src/test/java/org/elasticsearch/datastreams/action/GetDataStreamsResponseTests.java @@ -13,6 +13,7 @@ import org.elasticsearch.cluster.health.ClusterHealthStatus; import org.elasticsearch.cluster.metadata.DataStream; import org.elasticsearch.cluster.metadata.DataStreamLifecycle; +import org.elasticsearch.cluster.metadata.DataStreamOptions; import org.elasticsearch.cluster.metadata.DataStreamTestHelper; import org.elasticsearch.common.UUIDs; import org.elasticsearch.common.bytes.BytesReference; @@ -83,7 +84,7 @@ public void testResponseIlmAndDataStreamLifecycleRepresentation() throws Excepti .setAllowCustomRouting(true) .setIndexMode(IndexMode.STANDARD) .setLifecycle(new DataStreamLifecycle()) - .setFailureStoreEnabled(true) + .setDataStreamOptions(DataStreamOptions.FAILURE_STORE_ENABLED) .setFailureIndices(DataStream.DataStreamIndices.failureIndicesBuilder(failureStores).build()) .build(); @@ -186,7 +187,7 @@ public void testResponseIlmAndDataStreamLifecycleRepresentation() throws Excepti .setAllowCustomRouting(true) .setIndexMode(IndexMode.STANDARD) .setLifecycle(new DataStreamLifecycle(null, null, false)) - .setFailureStoreEnabled(true) + .setDataStreamOptions(DataStreamOptions.FAILURE_STORE_ENABLED) .setFailureIndices(DataStream.DataStreamIndices.failureIndicesBuilder(failureStores).build()) .build(); diff --git a/modules/data-streams/src/test/java/org/elasticsearch/datastreams/lifecycle/DataStreamLifecycleServiceTests.java b/modules/data-streams/src/test/java/org/elasticsearch/datastreams/lifecycle/DataStreamLifecycleServiceTests.java index 307e16a2137b6..05128e164e865 100644 --- a/modules/data-streams/src/test/java/org/elasticsearch/datastreams/lifecycle/DataStreamLifecycleServiceTests.java +++ b/modules/data-streams/src/test/java/org/elasticsearch/datastreams/lifecycle/DataStreamLifecycleServiceTests.java @@ -42,6 +42,7 @@ import org.elasticsearch.cluster.metadata.DataStreamLifecycle; import org.elasticsearch.cluster.metadata.DataStreamLifecycle.Downsampling; import org.elasticsearch.cluster.metadata.DataStreamLifecycle.Downsampling.Round; +import org.elasticsearch.cluster.metadata.DataStreamOptions; import org.elasticsearch.cluster.metadata.DataStreamTestHelper; import org.elasticsearch.cluster.metadata.IndexAbstraction; import org.elasticsearch.cluster.metadata.IndexGraveyard; @@ -1495,6 +1496,13 @@ public void testTargetIndices() { String dataStreamName = randomAlphaOfLength(10).toLowerCase(Locale.ROOT); int numBackingIndices = 3; int numFailureIndices = 2; + int mutationBranch = randomIntBetween(0, 2); + DataStreamOptions dataStreamOptions = switch (mutationBranch) { + case 0 -> DataStreamOptions.EMPTY; + case 1 -> DataStreamOptions.FAILURE_STORE_ENABLED; + case 2 -> DataStreamOptions.FAILURE_STORE_DISABLED; + default -> throw new IllegalStateException("Unexpected value: " + mutationBranch); + }; Metadata.Builder builder = Metadata.builder(); DataStream dataStream = createDataStream( builder, @@ -1504,7 +1512,7 @@ public void testTargetIndices() { settings(IndexVersion.current()), new DataStreamLifecycle(), now - ).copy().setFailureStoreEnabled(randomBoolean()).build(); // failure store is managed even when disabled + ).copy().setDataStreamOptions(dataStreamOptions).build(); // failure store is managed even when disabled builder.put(dataStream); Metadata metadata = builder.build(); Set indicesToExclude = Set.of(dataStream.getIndices().get(0), dataStream.getFailureIndices().getIndices().get(0)); @@ -1536,7 +1544,7 @@ public void testFailureStoreIsManagedEvenWhenDisabled() { settings(IndexVersion.current()), DataStreamLifecycle.newBuilder().dataRetention(0).build(), now - ).copy().setFailureStoreEnabled(false).build(); // failure store is managed even when it is disabled + ).copy().setDataStreamOptions(DataStreamOptions.FAILURE_STORE_DISABLED).build(); // failure store is managed even when disabled builder.put(dataStream); ClusterState state = ClusterState.builder(ClusterName.DEFAULT).metadata(builder).build(); diff --git a/modules/ingest-geoip/src/internalClusterTest/java/org/elasticsearch/ingest/geoip/DatabaseNodeServiceIT.java b/modules/ingest-geoip/src/internalClusterTest/java/org/elasticsearch/ingest/geoip/DatabaseNodeServiceIT.java new file mode 100644 index 0000000000000..73d8976c3a4b7 --- /dev/null +++ b/modules/ingest-geoip/src/internalClusterTest/java/org/elasticsearch/ingest/geoip/DatabaseNodeServiceIT.java @@ -0,0 +1,223 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.ingest.geoip; + +import com.maxmind.geoip2.model.CountryResponse; +import com.maxmind.geoip2.record.Country; + +import org.elasticsearch.action.admin.indices.flush.FlushRequest; +import org.elasticsearch.action.admin.indices.refresh.RefreshRequest; +import org.elasticsearch.action.index.IndexRequest; +import org.elasticsearch.common.hash.MessageDigests; +import org.elasticsearch.xcontent.XContentType; + +import java.io.BufferedInputStream; +import java.io.BufferedOutputStream; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardCopyOption; +import java.security.MessageDigest; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Locale; +import java.util.zip.GZIPOutputStream; + +import static org.hamcrest.Matchers.equalTo; + +public class DatabaseNodeServiceIT extends AbstractGeoIpIT { + /* + * This test makes sure that if we index an ordinary mmdb file into the .geoip_databases index, it is correctly handled upon retrieval. + */ + public void testNonGzippedDatabase() throws Exception { + String databaseType = "GeoLite2-Country"; + String databaseFileName = databaseType + ".mmdb"; + // making the dabase name unique so we know we're not using another one: + String databaseName = randomAlphaOfLength(20) + "-" + databaseFileName; + byte[] mmdbBytes = getBytesForFile(databaseFileName); + final DatabaseNodeService databaseNodeService = internalCluster().getInstance(DatabaseNodeService.class); + assertNull(databaseNodeService.getDatabase(databaseName)); + int numChunks = indexData(databaseName, mmdbBytes); + retrieveDatabase(databaseNodeService, databaseName, mmdbBytes, numChunks); + assertBusy(() -> assertNotNull(databaseNodeService.getDatabase(databaseName))); + assertValidDatabase(databaseNodeService, databaseName, databaseType); + } + + /* + * This test makes sure that if we index a gzipped tar file wrapping an mmdb file into the .geoip_databases index, it is correctly + * handled upon retrieval. + */ + public void testGzippedDatabase() throws Exception { + String databaseType = "GeoLite2-Country"; + String databaseFileName = databaseType + ".mmdb"; + // making the dabase name unique so we know we're not using another one: + String databaseName = randomAlphaOfLength(20) + "-" + databaseFileName; + byte[] mmdbBytes = getBytesForFile(databaseFileName); + byte[] gzipBytes = gzipFileBytes(databaseName, mmdbBytes); + final DatabaseNodeService databaseNodeService = internalCluster().getInstance(DatabaseNodeService.class); + assertNull(databaseNodeService.getDatabase(databaseName)); + int numChunks = indexData(databaseName, gzipBytes); + retrieveDatabase(databaseNodeService, databaseName, gzipBytes, numChunks); + assertBusy(() -> assertNotNull(databaseNodeService.getDatabase(databaseName))); + assertValidDatabase(databaseNodeService, databaseName, databaseType); + } + + /* + * This makes sure that the database is generally usable + */ + private void assertValidDatabase(DatabaseNodeService databaseNodeService, String databaseFileName, String databaseType) + throws IOException { + IpDatabase database = databaseNodeService.getDatabase(databaseFileName); + assertNotNull(database); + assertThat(database.getDatabaseType(), equalTo(databaseType)); + CountryResponse countryResponse = database.getCountry("89.160.20.128"); + assertNotNull(countryResponse); + Country country = countryResponse.getCountry(); + assertNotNull(country); + assertThat(country.getName(), equalTo("Sweden")); + } + + /* + * This has the databaseNodeService retrieve the database from the .geoip_databases index, making the database ready for use when + * databaseNodeService.getDatabase(databaseFileName) is called. + */ + private void retrieveDatabase(DatabaseNodeService databaseNodeService, String databaseFileName, byte[] expectedBytes, int numChunks) + throws IOException { + GeoIpTaskState.Metadata metadata = new GeoIpTaskState.Metadata(1, 0, numChunks - 1, getMd5(expectedBytes), 1); + databaseNodeService.retrieveAndUpdateDatabase(databaseFileName, metadata); + } + + private String getMd5(byte[] bytes) { + MessageDigest md = MessageDigests.md5(); + md.update(bytes); + return MessageDigests.toHexString(md.digest()); + } + + private byte[] gzipFileBytes(String databaseName, byte[] mmdbBytes) throws IOException { + final byte[] EMPTY_BUF = new byte[512]; + Path mmdbFile = createTempFile(); + Files.copy(new ByteArrayInputStream(mmdbBytes), mmdbFile, StandardCopyOption.REPLACE_EXISTING); + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + try (OutputStream gos = new GZIPOutputStream(new BufferedOutputStream(baos))) { + long size = Files.size(mmdbFile); + gos.write(createTarHeader(databaseName, size)); + Files.copy(mmdbFile, gos); + if (size % 512 != 0) { + gos.write(EMPTY_BUF, 0, (int) (512 - (size % 512))); + } + gos.write(EMPTY_BUF); + gos.write(EMPTY_BUF); + } + return baos.toByteArray(); + } + + private static byte[] createTarHeader(String name, long size) { + byte[] buf = new byte[512]; + byte[] sizeBytes = String.format(Locale.ROOT, "%1$012o", size).getBytes(StandardCharsets.UTF_8); + byte[] nameBytes = name.substring(Math.max(0, name.length() - 100)).getBytes(StandardCharsets.US_ASCII); + byte[] id = "0001750".getBytes(StandardCharsets.UTF_8); + byte[] permission = "000644 ".getBytes(StandardCharsets.UTF_8); + byte[] time = String.format(Locale.ROOT, "%1$012o", System.currentTimeMillis() / 1000).getBytes(StandardCharsets.UTF_8); + System.arraycopy(nameBytes, 0, buf, 0, nameBytes.length); + System.arraycopy(permission, 0, buf, 100, 7); + System.arraycopy(id, 0, buf, 108, 7); + System.arraycopy(id, 0, buf, 116, 7); + System.arraycopy(sizeBytes, 0, buf, 124, 12); + System.arraycopy(time, 0, buf, 136, 12); + int checksum = 256; + for (byte b : buf) { + checksum += b & 0xFF; + } + byte[] checksumBytes = String.format(Locale.ROOT, "%1$07o", checksum).getBytes(StandardCharsets.UTF_8); + System.arraycopy(checksumBytes, 0, buf, 148, 7); + return buf; + } + + /* + * Finds the given databaseFileName on the classpath, and returns its bytes. + */ + private static byte[] getBytesForFile(String databaseFileName) throws IOException { + try (InputStream is = DatabaseNodeServiceIT.class.getResourceAsStream("/" + databaseFileName)) { + if (is == null) { + throw new FileNotFoundException("Resource [" + databaseFileName + "] not found in classpath"); + } + try (BufferedInputStream bis = new BufferedInputStream(is)) { + return bis.readAllBytes(); + } + } + } + + /* + * This indexes data into the .geoip_databases index in a random number of chunks. + */ + private static int indexData(String databaseFileName, byte[] content) throws IOException { + List chunks = chunkBytes(content, randomIntBetween(1, 100)); + indexChunks(databaseFileName, chunks); + return chunks.size(); + } + + /* + * This turns the given content bytes into the given number of chunks. + */ + private static List chunkBytes(byte[] content, int chunks) throws IOException { + ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); + byteArrayOutputStream.write(content); + byteArrayOutputStream.close(); + + byte[] all = byteArrayOutputStream.toByteArray(); + int chunkSize = Math.max(1, all.length / chunks); + List data = new ArrayList<>(); + + for (int from = 0; from < all.length;) { + int to = from + chunkSize; + if (to > all.length) { + to = all.length; + } + data.add(Arrays.copyOfRange(all, from, to)); + from = to; + } + + while (data.size() > chunks) { + byte[] last = data.removeLast(); + byte[] secondLast = data.removeLast(); + byte[] merged = new byte[secondLast.length + last.length]; + System.arraycopy(secondLast, 0, merged, 0, secondLast.length); + System.arraycopy(last, 0, merged, secondLast.length, last.length); + data.add(merged); + } + return data; + } + + /* + * This writes the given chunks into the .geoip_databases index. + */ + private static void indexChunks(String name, List chunks) { + int chunk = 0; + for (byte[] buf : chunks) { + IndexRequest indexRequest = new IndexRequest(GeoIpDownloader.DATABASES_INDEX).id(name + "_" + chunk + "_" + 1) + .create(true) + .source(XContentType.SMILE, "name", name, "chunk", chunk, "data", buf); + client().index(indexRequest).actionGet(); + chunk++; + } + FlushRequest flushRequest = new FlushRequest(GeoIpDownloader.DATABASES_INDEX); + client().admin().indices().flush(flushRequest).actionGet(); + // Ensure that the chunk documents are visible: + RefreshRequest refreshRequest = new RefreshRequest(GeoIpDownloader.DATABASES_INDEX); + client().admin().indices().refresh(refreshRequest).actionGet(); + } +} diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/DatabaseNodeService.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/DatabaseNodeService.java index 8461167b48de8..ce15e02e6efcc 100644 --- a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/DatabaseNodeService.java +++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/DatabaseNodeService.java @@ -351,15 +351,15 @@ void checkDatabases(ClusterState state) { } void retrieveAndUpdateDatabase(String databaseName, GeoIpTaskState.Metadata metadata) throws IOException { - logger.trace("Retrieving database {}", databaseName); + logger.trace("retrieving database [{}]", databaseName); final String recordedMd5 = metadata.md5(); // This acts as a lock, if this method for a specific db is executed later and downloaded for this db is still ongoing then // FileAlreadyExistsException is thrown and this method silently returns. // (this method is never invoked concurrently and is invoked by a cluster state applier thread) - final Path databaseTmpGzFile; + final Path retrievedFile; try { - databaseTmpGzFile = Files.createFile(geoipTmpDirectory.resolve(databaseName + ".tmp.gz")); + retrievedFile = Files.createFile(geoipTmpDirectory.resolve(databaseName + ".tmp.retrieved")); } catch (FileAlreadyExistsException e) { logger.debug("database update [{}] already in progress, skipping...", databaseName); return; @@ -374,24 +374,21 @@ void retrieveAndUpdateDatabase(String databaseName, GeoIpTaskState.Metadata meta DatabaseReaderLazyLoader lazyLoader = databases.get(databaseName); if (lazyLoader != null && recordedMd5.equals(lazyLoader.getMd5())) { logger.debug("deleting tmp file because database [{}] has already been updated.", databaseName); - Files.delete(databaseTmpGzFile); + Files.delete(retrievedFile); return; } final Path databaseTmpFile = Files.createFile(geoipTmpDirectory.resolve(databaseName + ".tmp")); - logger.debug("retrieve geoip database [{}] from [{}] to [{}]", databaseName, GeoIpDownloader.DATABASES_INDEX, databaseTmpGzFile); - retrieveDatabase( - databaseName, - recordedMd5, - metadata, - bytes -> Files.write(databaseTmpGzFile, bytes, StandardOpenOption.APPEND), - () -> { - logger.debug("decompressing [{}]", databaseTmpGzFile.getFileName()); - - Path databaseFile = geoipTmpDirectory.resolve(databaseName); + logger.debug("retrieving database [{}] from [{}] to [{}]", databaseName, GeoIpDownloader.DATABASES_INDEX, retrievedFile); + retrieveDatabase(databaseName, recordedMd5, metadata, bytes -> Files.write(retrievedFile, bytes, StandardOpenOption.APPEND), () -> { + final Path databaseFile = geoipTmpDirectory.resolve(databaseName); + + boolean isTarGz = MMDBUtil.isGzip(retrievedFile); + if (isTarGz) { // tarball contains .mmdb, LICENSE.txt, COPYRIGHTS.txt and optional README.txt files. // we store mmdb file as is and prepend database name to all other entries to avoid conflicts - try (TarInputStream is = new TarInputStream(new GZIPInputStream(Files.newInputStream(databaseTmpGzFile), 8192))) { + logger.debug("decompressing [{}]", retrievedFile.getFileName()); + try (TarInputStream is = new TarInputStream(new GZIPInputStream(Files.newInputStream(retrievedFile), 8192))) { TarInputStream.TarEntry entry; while ((entry = is.getNextEntry()) != null) { // there might be ./ entry in tar, we should skip it @@ -407,28 +404,33 @@ void retrieveAndUpdateDatabase(String databaseName, GeoIpTaskState.Metadata meta } } } - - logger.debug("moving database from [{}] to [{}]", databaseTmpFile, databaseFile); - Files.move(databaseTmpFile, databaseFile, StandardCopyOption.ATOMIC_MOVE, StandardCopyOption.REPLACE_EXISTING); - updateDatabase(databaseName, recordedMd5, databaseFile); - Files.delete(databaseTmpGzFile); - }, - failure -> { - logger.error(() -> "failed to retrieve database [" + databaseName + "]", failure); - try { - Files.deleteIfExists(databaseTmpFile); - Files.deleteIfExists(databaseTmpGzFile); - } catch (IOException ioe) { - ioe.addSuppressed(failure); - logger.error("Unable to delete tmp database file after failure", ioe); - } + } else { + /* + * Given that this is not code that will be called extremely frequently, we copy the file to the expected location here in + * order to avoid making the rest of the code more complex to avoid this. + */ + Files.copy(retrievedFile, databaseTmpFile, StandardCopyOption.REPLACE_EXISTING); } - ); + // finally, atomically move some-database.mmdb.tmp to some-database.mmdb + logger.debug("moving database from [{}] to [{}]", databaseTmpFile, databaseFile); + Files.move(databaseTmpFile, databaseFile, StandardCopyOption.ATOMIC_MOVE, StandardCopyOption.REPLACE_EXISTING); + updateDatabase(databaseName, recordedMd5, databaseFile); + Files.delete(retrievedFile); + }, failure -> { + logger.error(() -> "failed to retrieve database [" + databaseName + "]", failure); + try { + Files.deleteIfExists(databaseTmpFile); + Files.deleteIfExists(retrievedFile); + } catch (IOException ioe) { + ioe.addSuppressed(failure); + logger.error("unable to delete tmp database file after failure", ioe); + } + }); } void updateDatabase(String databaseFileName, String recordedMd5, Path file) { try { - logger.debug("starting reload of changed geoip database file [{}]", file); + logger.debug("starting reload of changed database file [{}]", file); DatabaseReaderLazyLoader loader = new DatabaseReaderLazyLoader(cache, file, recordedMd5); DatabaseReaderLazyLoader existing = databases.put(databaseFileName, loader); if (existing != null) { @@ -458,7 +460,7 @@ void updateDatabase(String databaseFileName, String recordedMd5, Path file) { logger.debug("no pipelines found to reload"); } } - logger.info("successfully loaded geoip database file [{}]", file.getFileName()); + logger.info("successfully loaded database file [{}]", file.getFileName()); } catch (Exception e) { logger.error(() -> "failed to update database [" + databaseFileName + "]", e); } diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/EnterpriseGeoIpDownloader.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/EnterpriseGeoIpDownloader.java index cd2649c210500..acc51c1bb0b53 100644 --- a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/EnterpriseGeoIpDownloader.java +++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/EnterpriseGeoIpDownloader.java @@ -20,9 +20,9 @@ import org.elasticsearch.client.internal.Client; import org.elasticsearch.cluster.block.ClusterBlockLevel; import org.elasticsearch.cluster.service.ClusterService; +import org.elasticsearch.common.CheckedSupplier; import org.elasticsearch.common.Strings; import org.elasticsearch.common.hash.MessageDigests; -import org.elasticsearch.core.Nullable; import org.elasticsearch.core.TimeValue; import org.elasticsearch.core.Tuple; import org.elasticsearch.index.query.BoolQueryBuilder; @@ -40,9 +40,9 @@ import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.xcontent.XContentType; +import java.io.Closeable; import java.io.IOException; import java.io.InputStream; -import java.net.PasswordAuthentication; import java.nio.charset.StandardCharsets; import java.security.MessageDigest; import java.util.Arrays; @@ -67,32 +67,18 @@ public class EnterpriseGeoIpDownloader extends AllocatedPersistentTask { private static final Logger logger = LogManager.getLogger(EnterpriseGeoIpDownloader.class); - private static final Pattern CHECKSUM_PATTERN = Pattern.compile("(\\w{64})\\s\\s(.*)"); + + // a sha256 checksum followed by two spaces followed by an (ignored) file name + private static final Pattern SHA256_CHECKSUM_PATTERN = Pattern.compile("(\\w{64})\\s\\s(.*)"); // for overriding in tests static String DEFAULT_MAXMIND_ENDPOINT = System.getProperty( - MAXMIND_SETTINGS_PREFIX + "endpoint.default", + MAXMIND_SETTINGS_PREFIX + "endpoint.default", // "https://download.maxmind.com/geoip/databases" ); // n.b. a future enhancement might be to allow for a MAXMIND_ENDPOINT_SETTING, but // at the moment this is an unsupported system property for use in tests (only) - static String downloadUrl(final String name, final String suffix) { - String endpointPattern = DEFAULT_MAXMIND_ENDPOINT; - if (endpointPattern.contains("%")) { - throw new IllegalArgumentException("Invalid endpoint [" + endpointPattern + "]"); - } - if (endpointPattern.endsWith("/") == false) { - endpointPattern += "/"; - } - endpointPattern += "%s/download?suffix=%s"; - - // at this point the pattern looks like this (in the default case): - // https://download.maxmind.com/geoip/databases/%s/download?suffix=%s - - return Strings.format(endpointPattern, name, suffix); - } - static final String DATABASES_INDEX = ".geoip_databases"; static final int MAX_CHUNK_SIZE = 1024 * 1024; @@ -105,7 +91,7 @@ static String downloadUrl(final String name, final String suffix) { protected volatile EnterpriseGeoIpTaskState state; private volatile Scheduler.ScheduledCancellable scheduled; private final Supplier pollIntervalSupplier; - private final Function credentialsBuilder; + private final Function tokenProvider; EnterpriseGeoIpDownloader( Client client, @@ -119,7 +105,7 @@ static String downloadUrl(final String name, final String suffix) { TaskId parentTask, Map headers, Supplier pollIntervalSupplier, - Function credentialsBuilder + Function tokenProvider ) { super(id, type, action, description, parentTask, headers); this.client = client; @@ -127,7 +113,7 @@ static String downloadUrl(final String name, final String suffix) { this.clusterService = clusterService; this.threadPool = threadPool; this.pollIntervalSupplier = pollIntervalSupplier; - this.credentialsBuilder = credentialsBuilder; + this.tokenProvider = tokenProvider; } void setState(EnterpriseGeoIpTaskState state) { @@ -156,7 +142,7 @@ void updateDatabases() throws IOException { } } - logger.trace("Updating geoip databases"); + logger.trace("Updating databases"); IngestGeoIpMetadata geoIpMeta = clusterState.metadata().custom(IngestGeoIpMetadata.TYPE, IngestGeoIpMetadata.EMPTY); // if there are entries in the cs that aren't in the persistent task state, @@ -174,15 +160,8 @@ void updateDatabases() throws IOException { DatabaseConfiguration database = entry.getValue().database(); if (existingDatabaseNames.contains(database.name() + ".mmdb") == false) { logger.debug("A new database appeared [{}]", database.name()); - - final String accountId = database.maxmind().accountId(); - try (HttpClient.PasswordAuthenticationHolder holder = credentialsBuilder.apply(accountId)) { - if (holder == null) { - logger.warn("No credentials found to download database [{}], skipping download...", id); - } else { - processDatabase(holder.get(), database); - addedSomething = true; - } + if (processDatabase(id, database)) { + addedSomething = true; } } } @@ -225,14 +204,8 @@ void updateDatabases() throws IOException { for (Map.Entry entry : geoIpMeta.getDatabases().entrySet()) { final String id = entry.getKey(); DatabaseConfiguration database = entry.getValue().database(); - - final String accountId = database.maxmind().accountId(); - try (HttpClient.PasswordAuthenticationHolder holder = credentialsBuilder.apply(accountId)) { - if (holder == null) { - logger.warn("No credentials found to download database [{}], skipping download...", id); - } else { - processDatabase(holder.get(), database); - } + try { + processDatabase(id, database); } catch (Exception e) { accumulator = ExceptionsHelper.useOrSuppress(accumulator, ExceptionsHelper.convertToRuntime(e)); } @@ -244,68 +217,69 @@ void updateDatabases() throws IOException { } /** - * This method fetches the sha256 file and tar.gz file for the given database from the Maxmind endpoint, then indexes that tar.gz - * file into the .geoip_databases Elasticsearch index, deleting any old versions of the database tar.gz from the index if they exist. - * If the computed sha256 does not match the expected sha256, an error will be logged and the database will not be put into the - * Elasticsearch index. + * This method fetches the checksum and database for the given database from the Maxmind endpoint, then indexes that database + * file into the .geoip_databases Elasticsearch index, deleting any old versions of the database from the index if they exist. + * If the computed checksum does not match the expected checksum, an error will be logged and the database will not be put into + * the Elasticsearch index. *

- * As an implementation detail, this method retrieves the sha256 checksum of the database to download and then invokes - * {@link EnterpriseGeoIpDownloader#processDatabase(PasswordAuthentication, String, String, String)} with that checksum, deferring to - * that method to actually download and process the tar.gz itself. + * As an implementation detail, this method retrieves the checksum of the database to download and then invokes + * {@link EnterpriseGeoIpDownloader#processDatabase(String, Checksum, CheckedSupplier)} with that checksum, + * deferring to that method to actually download and process the database file itself. * - * @param auth The credentials to use to download from the Maxmind endpoint - * @param database The database to be downloaded from Maxmind and indexed into an Elasticsearch index - * @throws IOException If there is an error fetching the sha256 file + * @param id The identifier for this database (just for logging purposes) + * @param database The database to be downloaded and indexed into an Elasticsearch index + * @return true if the file was processed, false if the file wasn't processed (for example if credentials haven't been configured) + * @throws IOException If there is an error fetching the checksum or database file */ - void processDatabase(PasswordAuthentication auth, DatabaseConfiguration database) throws IOException { + boolean processDatabase(String id, DatabaseConfiguration database) throws IOException { final String name = database.name(); logger.debug("Processing database [{}] for configuration [{}]", name, database.id()); - final String sha256Url = downloadUrl(name, "tar.gz.sha256"); - final String tgzUrl = downloadUrl(name, "tar.gz"); - - String result = new String(httpClient.getBytes(auth, sha256Url), StandardCharsets.UTF_8).trim(); // this throws if the auth is bad - var matcher = CHECKSUM_PATTERN.matcher(result); - boolean match = matcher.matches(); - if (match == false) { - throw new RuntimeException("Unexpected sha256 response from [" + sha256Url + "]"); + try (ProviderDownload downloader = downloaderFor(database)) { + if (downloader.validCredentials()) { + // the name that comes from the enterprise downloader cluster state doesn't include the .mmdb extension, + // but the downloading and indexing of database code expects it to be there, so we add it on here before continuing + final String fileName = name + ".mmdb"; + processDatabase(fileName, downloader.checksum(), downloader.download()); + return true; + } else { + logger.warn("No credentials found to download database [{}], skipping download...", id); + return false; + } } - final String sha256 = matcher.group(1); - // the name that comes from the enterprise downloader cluster state doesn't include the .mmdb extension, - // but the downloading and indexing of database code expects it to be there, so we add it on here before further processing - processDatabase(auth, name + ".mmdb", sha256, tgzUrl); } /** - * This method fetches the tar.gz file for the given database from the Maxmind endpoint, then indexes that tar.gz - * file into the .geoip_databases Elasticsearch index, deleting any old versions of the database tar.gz from the index if they exist. + * This method fetches the database file for the given database from the passed-in source, then indexes that database + * file into the .geoip_databases Elasticsearch index, deleting any old versions of the database from the index if they exist. * - * @param auth The credentials to use to download from the Maxmind endpoint - * The name of the database to be downloaded from Maxmind and indexed into an Elasticsearch index - * @param sha256 The sha256 to compare to the computed sha256 of the downloaded tar.gz file - * @param url The URL for the Maxmind endpoint from which the database's tar.gz will be downloaded + * @param name The name of the database to be downloaded and indexed into an Elasticsearch index + * @param checksum The checksum to compare to the computed checksum of the downloaded file + * @param source The supplier of an InputStream that will actually download the file */ - private void processDatabase(PasswordAuthentication auth, String name, String sha256, String url) { + private void processDatabase(final String name, final Checksum checksum, final CheckedSupplier source) { Metadata metadata = state.getDatabases().getOrDefault(name, Metadata.EMPTY); - if (Objects.equals(metadata.sha256(), sha256)) { + if (checksum.matches(metadata)) { updateTimestamp(name, metadata); return; } - logger.debug("downloading geoip database [{}]", name); + logger.debug("downloading database [{}]", name); long start = System.currentTimeMillis(); - try (InputStream is = httpClient.get(auth, url)) { + try (InputStream is = source.get()) { int firstChunk = metadata.lastChunk() + 1; // if there is no metadata, then Metadata.EMPTY + 1 = 0 - Tuple tuple = indexChunks(name, is, firstChunk, MessageDigests.sha256(), sha256, start); + Tuple tuple = indexChunks(name, is, firstChunk, checksum, start); int lastChunk = tuple.v1(); - String md5 = tuple.v2(); + String md5 = tuple.v2(); // the md5 of the bytes as they passed through indexChunks if (lastChunk > firstChunk) { + // if there is a sha256 for this download, then record it (otherwise record null for it, which is also fine) + String sha256 = checksum.type == Checksum.Type.SHA256 ? checksum.checksum : null; state = state.put(name, new Metadata(start, firstChunk, lastChunk - 1, md5, start, sha256)); updateTaskState(); - logger.info("successfully downloaded geoip database [{}]", name); + logger.info("successfully downloaded database [{}]", name); deleteOldChunks(name, firstChunk); } } catch (Exception e) { - logger.error(() -> "error downloading geoip database [" + name + "]", e); + logger.error(() -> "error downloading database [" + name + "]", e); } } @@ -319,13 +293,13 @@ void deleteOldChunks(String name, int firstChunk) { client.execute( DeleteByQueryAction.INSTANCE, request, - ActionListener.wrap(r -> {}, e -> logger.warn("could not delete old chunks for geoip database [" + name + "]", e)) + ActionListener.wrap(r -> {}, e -> logger.warn("could not delete old chunks for database [" + name + "]", e)) ); } // visible for testing protected void updateTimestamp(String name, Metadata old) { - logger.debug("geoip database [{}] is up to date, updated timestamp", name); + logger.debug("database [{}] is up to date, updated timestamp", name); state = state.put( name, new Metadata(old.lastUpdate(), old.firstChunk(), old.lastChunk(), old.md5(), System.currentTimeMillis(), old.sha256()) @@ -340,15 +314,11 @@ void updateTaskState() { } // visible for testing - Tuple indexChunks( - String name, - InputStream is, - int chunk, - @Nullable MessageDigest digest, - String expectedChecksum, - long timestamp - ) throws IOException { + Tuple indexChunks(String name, InputStream is, int chunk, final Checksum checksum, long timestamp) throws IOException { + // we have to calculate and return md5 sums as a matter of course (see actualMd5 being return below), + // but we don't have to do it *twice* -- so if the passed-in checksum is also md5, then we'll get null here MessageDigest md5 = MessageDigests.md5(); + MessageDigest digest = checksum.digest(); // this returns null for md5 for (byte[] buf = getChunk(is); buf.length != 0; buf = getChunk(is)) { md5.update(buf); if (digest != null) { @@ -371,6 +341,7 @@ Tuple indexChunks( String actualMd5 = MessageDigests.toHexString(md5.digest()); String actualChecksum = digest == null ? actualMd5 : MessageDigests.toHexString(digest.digest()); + String expectedChecksum = checksum.checksum; if (Objects.equals(expectedChecksum, actualChecksum) == false) { throw new IOException("checksum mismatch, expected [" + expectedChecksum + "], actual [" + actualChecksum + "]"); } @@ -418,12 +389,12 @@ synchronized void runDownloader() { try { updateDatabases(); // n.b. this downloads bytes from the internet, it can take a while } catch (Exception e) { - logger.error("exception during geoip databases update", e); + logger.error("exception during databases update", e); } try { cleanDatabases(); } catch (Exception e) { - logger.error("exception during geoip databases cleanup", e); + logger.error("exception during databases cleanup", e); } } @@ -472,4 +443,136 @@ private void scheduleNextRun(TimeValue time) { } } + private ProviderDownload downloaderFor(DatabaseConfiguration database) { + return new MaxmindDownload(database.name(), database.maxmind()); + } + + class MaxmindDownload implements ProviderDownload { + + final String name; + final DatabaseConfiguration.Maxmind maxmind; + HttpClient.PasswordAuthenticationHolder auth; + + MaxmindDownload(String name, DatabaseConfiguration.Maxmind maxmind) { + this.name = name; + this.maxmind = maxmind; + this.auth = buildCredentials(); + } + + @Override + public HttpClient.PasswordAuthenticationHolder buildCredentials() { + // if the username is missing, empty, or blank, return null as 'no auth' + final String username = maxmind.accountId(); + if (username == null || username.isEmpty() || username.isBlank()) { + return null; + } + + // likewise if the password chars array is missing or empty, return null as 'no auth' + final char[] passwordChars = tokenProvider.apply("maxmind"); + if (passwordChars == null || passwordChars.length == 0) { + return null; + } + + return new HttpClient.PasswordAuthenticationHolder(username, passwordChars); + } + + @Override + public boolean validCredentials() { + return auth.get() != null; + } + + @Override + public String url(String suffix) { + String endpointPattern = DEFAULT_MAXMIND_ENDPOINT; + if (endpointPattern.contains("%")) { + throw new IllegalArgumentException("Invalid endpoint [" + endpointPattern + "]"); + } + if (endpointPattern.endsWith("/") == false) { + endpointPattern += "/"; + } + endpointPattern += "%s/download?suffix=%s"; + + // at this point the pattern looks like this (in the default case): + // https://download.maxmind.com/geoip/databases/%s/download?suffix=%s + + return Strings.format(endpointPattern, name, suffix); + } + + @Override + public Checksum checksum() throws IOException { + final String sha256Url = this.url("tar.gz.sha256"); + var result = new String(httpClient.getBytes(auth.get(), sha256Url), StandardCharsets.UTF_8).trim(); // throws if the auth is bad + var matcher = SHA256_CHECKSUM_PATTERN.matcher(result); + boolean match = matcher.matches(); + if (match == false) { + throw new RuntimeException("Unexpected sha256 response from [" + sha256Url + "]"); + } + final String sha256 = matcher.group(1); + return Checksum.sha256(sha256); + } + + @Override + public CheckedSupplier download() { + final String tgzUrl = this.url("tar.gz"); + return () -> httpClient.get(auth.get(), tgzUrl); + } + + @Override + public void close() throws IOException { + auth.close(); + } + } + + interface ProviderDownload extends Closeable { + // note: buildCredentials and url are inherently just implementation details of checksum() and download(), + // but it's useful to have unit tests for this logic and to keep it separate + HttpClient.PasswordAuthenticationHolder buildCredentials(); + + String url(String suffix); + + boolean validCredentials(); + + Checksum checksum() throws IOException; + + CheckedSupplier download(); + + @Override + void close() throws IOException; + } + + record Checksum(Type type, String checksum) { + + // use the static factory methods, though, rather than this + public Checksum { + Objects.requireNonNull(type); + Objects.requireNonNull(checksum); + } + + static Checksum md5(String checksum) { + return new Checksum(Type.MD5, checksum); + } + + static Checksum sha256(String checksum) { + return new Checksum(Type.SHA256, checksum); + } + + enum Type { + MD5, + SHA256 + } + + MessageDigest digest() { + return switch (type) { + case MD5 -> null; // a leaky implementation detail, we don't need to calculate two md5s + case SHA256 -> MessageDigests.sha256(); + }; + } + + boolean matches(Metadata metadata) { + return switch (type) { + case MD5 -> checksum.equals(metadata.md5()); + case SHA256 -> checksum.equals(metadata.sha256()); + }; + } + } } diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/EnterpriseGeoIpDownloaderTaskExecutor.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/EnterpriseGeoIpDownloaderTaskExecutor.java index 6f04ad4422c66..5214c0e4a6a51 100644 --- a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/EnterpriseGeoIpDownloaderTaskExecutor.java +++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/EnterpriseGeoIpDownloaderTaskExecutor.java @@ -100,25 +100,14 @@ private void setPollInterval(TimeValue pollInterval) { } } - private HttpClient.PasswordAuthenticationHolder buildCredentials(final String username) { - final char[] passwordChars; - if (cachedSecureSettings.getSettingNames().contains(MAXMIND_LICENSE_KEY_SETTING.getKey())) { - passwordChars = cachedSecureSettings.getString(MAXMIND_LICENSE_KEY_SETTING.getKey()).getChars(); - } else { - passwordChars = null; - } - - // if the username is missing, empty, or blank, return null as 'no auth' - if (username == null || username.isEmpty() || username.isBlank()) { - return null; - } - - // likewise if the password chars array is missing or empty, return null as 'no auth' - if (passwordChars == null || passwordChars.length == 0) { - return null; + private char[] getSecureToken(final String type) { + char[] token = null; + if (type.equals("maxmind")) { + if (cachedSecureSettings.getSettingNames().contains(MAXMIND_LICENSE_KEY_SETTING.getKey())) { + token = cachedSecureSettings.getString(MAXMIND_LICENSE_KEY_SETTING.getKey()).getChars(); + } } - - return new HttpClient.PasswordAuthenticationHolder(username, passwordChars); + return token; } @Override @@ -142,7 +131,7 @@ protected EnterpriseGeoIpDownloader createTask( parentTaskId, headers, () -> pollInterval, - this::buildCredentials + this::getSecureToken ); } diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/MMDBUtil.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/MMDBUtil.java index b0d4d98701704..3c21296ff294a 100644 --- a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/MMDBUtil.java +++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/MMDBUtil.java @@ -14,6 +14,8 @@ import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; +import java.util.zip.GZIPInputStream; +import java.util.zip.ZipException; public final class MMDBUtil { @@ -98,4 +100,13 @@ public static String getDatabaseType(final Path database) throws IOException { private static int fromBytes(byte b1) { return b1 & 0xFF; } + + public static boolean isGzip(Path path) throws IOException { + try (InputStream is = Files.newInputStream(path); InputStream gzis = new GZIPInputStream(is)) { + gzis.read(); // nooping, the point is just whether it's a gzip or not + return true; + } catch (ZipException e) { + return false; + } + } } diff --git a/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/DatabaseNodeServiceTests.java b/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/DatabaseNodeServiceTests.java index be105237b4582..0ef4686dc033e 100644 --- a/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/DatabaseNodeServiceTests.java +++ b/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/DatabaseNodeServiceTests.java @@ -64,6 +64,7 @@ import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; +import java.io.OutputStream; import java.io.UncheckedIOException; import java.nio.charset.StandardCharsets; import java.nio.file.Files; @@ -311,12 +312,24 @@ public void testUpdateDatabase() throws Exception { private String mockSearches(String databaseName, int firstChunk, int lastChunk) throws IOException { String dummyContent = "test: " + databaseName; - List data = gzip(databaseName, dummyContent, lastChunk - firstChunk + 1); - assertThat(gunzip(data), equalTo(dummyContent)); + List data; + // We want to make sure we handle gzip files or plain mmdb files equally well: + if (randomBoolean()) { + data = gzip(databaseName, dummyContent, lastChunk - firstChunk + 1); + assertThat(gunzip(data), equalTo(dummyContent)); + } else { + data = chunkBytes(dummyContent, lastChunk - firstChunk + 1); + assertThat(unchunkBytes(data), equalTo(dummyContent)); + } Map> requestMap = new HashMap<>(); for (int i = firstChunk; i <= lastChunk; i++) { - byte[] chunk = data.get(i - firstChunk); + byte[] chunk; + if (i - firstChunk < data.size()) { + chunk = data.get(i - firstChunk); + } else { + chunk = new byte[0]; // We had so little data that the chunk(s) at the end will be empty + } SearchHit hit = SearchHit.unpooled(i); try (XContentBuilder builder = XContentBuilder.builder(XContentType.SMILE.xContent())) { builder.map(Map.of("data", chunk)); @@ -390,6 +403,39 @@ static ClusterState createClusterState(PersistentTasksCustomMetadata tasksCustom .build(); } + private static List chunkBytes(String content, int chunks) throws IOException { + ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); + OutputStream outputStream = byteArrayOutputStream; + byte[] contentBytes = content.getBytes(StandardCharsets.UTF_8); + outputStream.write(contentBytes); + outputStream.close(); + + byte[] all = byteArrayOutputStream.toByteArray(); + int chunkSize = Math.max(1, all.length / chunks); + List data = new ArrayList<>(); + + for (int from = 0; from < all.length;) { + int to = from + chunkSize; + if (to > all.length) { + to = all.length; + } + data.add(Arrays.copyOfRange(all, from, to)); + from = to; + } + + while (data.size() > chunks) { + byte[] last = data.remove(data.size() - 1); + byte[] secondLast = data.remove(data.size() - 1); + byte[] merged = new byte[secondLast.length + last.length]; + System.arraycopy(secondLast, 0, merged, 0, secondLast.length); + System.arraycopy(last, 0, merged, secondLast.length, last.length); + data.add(merged); + } + + assert data.size() == Math.min(chunks, content.length()); + return data; + } + private static List gzip(String name, String content, int chunks) throws IOException { ByteArrayOutputStream bytes = new ByteArrayOutputStream(); GZIPOutputStream gzipOutputStream = new GZIPOutputStream(bytes); @@ -432,13 +478,23 @@ private static List gzip(String name, String content, int chunks) throws return data; } - private static String gunzip(List chunks) throws IOException { - byte[] gzippedContent = new byte[chunks.stream().mapToInt(value -> value.length).sum()]; + private static byte[] unchunkBytesToByteArray(List chunks) throws IOException { + byte[] allBytes = new byte[chunks.stream().mapToInt(value -> value.length).sum()]; int written = 0; for (byte[] chunk : chunks) { - System.arraycopy(chunk, 0, gzippedContent, written, chunk.length); + System.arraycopy(chunk, 0, allBytes, written, chunk.length); written += chunk.length; } + return allBytes; + } + + private static String unchunkBytes(List chunks) throws IOException { + byte[] allBytes = unchunkBytesToByteArray(chunks); + return new String(allBytes, StandardCharsets.UTF_8); + } + + private static String gunzip(List chunks) throws IOException { + byte[] gzippedContent = unchunkBytesToByteArray(chunks); TarInputStream gzipInputStream = new TarInputStream(new GZIPInputStream(new ByteArrayInputStream(gzippedContent))); gzipInputStream.getNextEntry(); return Streams.readFully(gzipInputStream).utf8ToString(); diff --git a/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/EnterpriseGeoIpDownloaderTests.java b/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/EnterpriseGeoIpDownloaderTests.java index c254c54ae983c..88c37409713ac 100644 --- a/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/EnterpriseGeoIpDownloaderTests.java +++ b/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/EnterpriseGeoIpDownloaderTests.java @@ -29,11 +29,11 @@ import org.elasticsearch.cluster.metadata.IndexMetadata; import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.ReferenceDocs; -import org.elasticsearch.common.hash.MessageDigests; import org.elasticsearch.common.settings.ClusterSettings; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.core.Tuple; import org.elasticsearch.ingest.EnterpriseGeoIpTask; +import org.elasticsearch.ingest.geoip.EnterpriseGeoIpDownloader.Checksum; import org.elasticsearch.ingest.geoip.direct.DatabaseConfiguration; import org.elasticsearch.node.Node; import org.elasticsearch.persistent.PersistentTasksCustomMetadata; @@ -51,11 +51,7 @@ import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; -import java.net.PasswordAuthentication; import java.nio.charset.StandardCharsets; -import java.security.MessageDigest; -import java.time.Instant; -import java.time.temporal.ChronoUnit; import java.util.HashMap; import java.util.Map; import java.util.Set; @@ -111,7 +107,7 @@ public void setup() throws IOException { EMPTY_TASK_ID, Map.of(), () -> GeoIpDownloaderTaskExecutor.POLL_INTERVAL_SETTING.getDefault(Settings.EMPTY), - (input) -> new HttpClient.PasswordAuthenticationHolder("name", "password".toCharArray()) + (type) -> "password".toCharArray() ) { { EnterpriseGeoIpTask.EnterpriseGeoIpTaskParams geoIpTaskParams = mock(EnterpriseGeoIpTask.EnterpriseGeoIpTaskParams.class); @@ -206,8 +202,7 @@ public void testIndexChunksNoData() throws IOException { "test", empty, 0, - MessageDigests.sha256(), - "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", + Checksum.sha256("e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"), 0 ) ); @@ -228,7 +223,7 @@ public void testIndexChunksMd5Mismatch() { IOException exception = expectThrows( IOException.class, - () -> geoIpDownloader.indexChunks("test", new ByteArrayInputStream(new byte[0]), 0, MessageDigests.sha256(), "123123", 0) + () -> geoIpDownloader.indexChunks("test", new ByteArrayInputStream(new byte[0]), 0, Checksum.sha256("123123"), 0) ); assertEquals( "checksum mismatch, expected [123123], actual [e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855]", @@ -279,8 +274,7 @@ public void testIndexChunks() throws IOException { "test", big, 15, - MessageDigests.sha256(), - "f2304545f224ff9ffcc585cb0a993723f911e03beb552cc03937dd443e931eab", + Checksum.sha256("f2304545f224ff9ffcc585cb0a993723f911e03beb552cc03937dd443e931eab"), 0 ) ); @@ -304,7 +298,7 @@ public void testProcessDatabaseNew() throws IOException { EMPTY_TASK_ID, Map.of(), () -> GeoIpDownloaderTaskExecutor.POLL_INTERVAL_SETTING.getDefault(Settings.EMPTY), - (input) -> new HttpClient.PasswordAuthenticationHolder("name", "password".toCharArray()) + (type) -> "password".toCharArray() ) { @Override protected void updateTimestamp(String name, GeoIpTaskState.Metadata metadata) { @@ -312,18 +306,11 @@ protected void updateTimestamp(String name, GeoIpTaskState.Metadata metadata) { } @Override - Tuple indexChunks( - String name, - InputStream is, - int chunk, - MessageDigest digest, - String expectedMd5, - long start - ) { + Tuple indexChunks(String name, InputStream is, int chunk, Checksum checksum, long start) { assertSame(bais, is); assertEquals(0, chunk); indexedChunks.set(true); - return Tuple.tuple(11, expectedMd5); + return Tuple.tuple(11, checksum.checksum()); } @Override @@ -340,10 +327,9 @@ void deleteOldChunks(String name, int firstChunk) { }; geoIpDownloader.setState(EnterpriseGeoIpTaskState.EMPTY); - PasswordAuthentication auth = new PasswordAuthentication("name", "password".toCharArray()); String id = randomIdentifier(); DatabaseConfiguration databaseConfiguration = new DatabaseConfiguration(id, "test", new DatabaseConfiguration.Maxmind("name")); - geoIpDownloader.processDatabase(auth, databaseConfiguration); + geoIpDownloader.processDatabase(id, databaseConfiguration); assertThat(indexedChunks.get(), equalTo(true)); } @@ -363,7 +349,7 @@ public void testProcessDatabaseUpdate() throws IOException { EMPTY_TASK_ID, Map.of(), () -> GeoIpDownloaderTaskExecutor.POLL_INTERVAL_SETTING.getDefault(Settings.EMPTY), - (input) -> new HttpClient.PasswordAuthenticationHolder("name", "password".toCharArray()) + (type) -> "password".toCharArray() ) { @Override protected void updateTimestamp(String name, GeoIpTaskState.Metadata metadata) { @@ -371,18 +357,11 @@ protected void updateTimestamp(String name, GeoIpTaskState.Metadata metadata) { } @Override - Tuple indexChunks( - String name, - InputStream is, - int chunk, - MessageDigest digest, - String expectedMd5, - long start - ) { + Tuple indexChunks(String name, InputStream is, int chunk, Checksum checksum, long start) { assertSame(bais, is); assertEquals(9, chunk); indexedChunks.set(true); - return Tuple.tuple(1, expectedMd5); + return Tuple.tuple(1, checksum.checksum()); } @Override @@ -399,10 +378,9 @@ void deleteOldChunks(String name, int firstChunk) { }; geoIpDownloader.setState(EnterpriseGeoIpTaskState.EMPTY.put("test.mmdb", new GeoIpTaskState.Metadata(0, 5, 8, "0", 0))); - PasswordAuthentication auth = new PasswordAuthentication("name", "password".toCharArray()); String id = randomIdentifier(); DatabaseConfiguration databaseConfiguration = new DatabaseConfiguration(id, "test", new DatabaseConfiguration.Maxmind("name")); - geoIpDownloader.processDatabase(auth, databaseConfiguration); + geoIpDownloader.processDatabase(id, databaseConfiguration); assertThat(indexedChunks.get(), equalTo(true)); } @@ -431,7 +409,7 @@ public void testProcessDatabaseSame() throws IOException { EMPTY_TASK_ID, Map.of(), () -> GeoIpDownloaderTaskExecutor.POLL_INTERVAL_SETTING.getDefault(Settings.EMPTY), - (input) -> new HttpClient.PasswordAuthenticationHolder("name", "password".toCharArray()) + (type) -> "password".toCharArray() ) { @Override protected void updateTimestamp(String name, GeoIpTaskState.Metadata newMetadata) { @@ -440,16 +418,9 @@ protected void updateTimestamp(String name, GeoIpTaskState.Metadata newMetadata) } @Override - Tuple indexChunks( - String name, - InputStream is, - int chunk, - MessageDigest digest, - String expectedChecksum, - long start - ) { + Tuple indexChunks(String name, InputStream is, int chunk, Checksum checksum, long start) { fail(); - return Tuple.tuple(0, expectedChecksum); + return Tuple.tuple(0, checksum.checksum()); } @Override @@ -463,10 +434,9 @@ void deleteOldChunks(String name, int firstChunk) { } }; geoIpDownloader.setState(taskState); - PasswordAuthentication auth = new PasswordAuthentication("name", "password".toCharArray()); String id = randomIdentifier(); DatabaseConfiguration databaseConfiguration = new DatabaseConfiguration(id, "test", new DatabaseConfiguration.Maxmind("name")); - geoIpDownloader.processDatabase(auth, databaseConfiguration); + geoIpDownloader.processDatabase(id, databaseConfiguration); } public void testUpdateDatabasesWriteBlock() { @@ -502,14 +472,20 @@ public void testUpdateDatabasesIndexNotReady() throws IOException { verifyNoInteractions(httpClient); } - private GeoIpTaskState.Metadata newGeoIpTaskStateMetadata(boolean expired) { - Instant lastChecked; - if (expired) { - lastChecked = Instant.now().minus(randomIntBetween(31, 100), ChronoUnit.DAYS); - } else { - lastChecked = Instant.now().minus(randomIntBetween(0, 29), ChronoUnit.DAYS); + public void testMaxmindUrls() { + // non-static classes have fun syntax, but it's nice to be able to test this behavior by itself + final EnterpriseGeoIpDownloader.MaxmindDownload download = geoIpDownloader.new MaxmindDownload( + "GeoLite2-City", new DatabaseConfiguration.Maxmind("account_id") + ); + + { + String url = "https://download.maxmind.com/geoip/databases/GeoLite2-City/download?suffix=tar.gz"; + assertThat(download.url("tar.gz"), equalTo(url)); + } + { + String url = "https://download.maxmind.com/geoip/databases/GeoLite2-City/download?suffix=tar.gz.sha256"; + assertThat(download.url("tar.gz.sha256"), equalTo(url)); } - return new GeoIpTaskState.Metadata(0, 0, 0, randomAlphaOfLength(20), lastChecked.toEpochMilli()); } private static class MockClient extends NoOpClient { diff --git a/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/MMDBUtilTests.java b/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/MMDBUtilTests.java index d441b749f4225..f1c7d809b98fe 100644 --- a/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/MMDBUtilTests.java +++ b/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/MMDBUtilTests.java @@ -15,8 +15,10 @@ import org.junit.Before; import java.io.IOException; +import java.io.OutputStream; import java.nio.file.Files; import java.nio.file.Path; +import java.util.zip.GZIPOutputStream; import static org.elasticsearch.ingest.geoip.GeoIpTestUtils.copyDatabase; import static org.hamcrest.Matchers.endsWith; @@ -67,6 +69,21 @@ public void testSmallFileWithALongDescription() throws IOException { assertThat(Files.size(database), is(444L)); // 444 is <512 } + public void testIsGzip() throws IOException { + Path database = tmpDir.resolve("GeoLite2-City.mmdb"); + copyDatabase("GeoLite2-City-Test.mmdb", database); + + Path gzipDatabase = tmpDir.resolve("GeoLite2-City.mmdb.gz"); + + // gzip the test mmdb + try (OutputStream out = new GZIPOutputStream(Files.newOutputStream(gzipDatabase))) { + Files.copy(database, out); + } + + assertThat(MMDBUtil.isGzip(database), is(false)); + assertThat(MMDBUtil.isGzip(gzipDatabase), is(true)); + } + public void testDatabaseTypeParsing() throws IOException { // this test is a little bit overloaded -- it's testing that we're getting the expected sorts of // database_type strings from these files, *and* it's also testing that we dispatch on those strings diff --git a/modules/lang-mustache/src/test/java/org/elasticsearch/script/mustache/RestMultiSearchTemplateActionTests.java b/modules/lang-mustache/src/test/java/org/elasticsearch/script/mustache/RestMultiSearchTemplateActionTests.java deleted file mode 100644 index 3613d7390fda2..0000000000000 --- a/modules/lang-mustache/src/test/java/org/elasticsearch/script/mustache/RestMultiSearchTemplateActionTests.java +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the "Elastic License - * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side - * Public License v 1"; you may not use this file except in compliance with, at - * your election, the "Elastic License 2.0", the "GNU Affero General Public - * License v3.0 only", or the "Server Side Public License, v 1". - */ - -package org.elasticsearch.script.mustache; - -import org.elasticsearch.common.bytes.BytesArray; -import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.core.RestApiVersion; -import org.elasticsearch.rest.RestRequest; -import org.elasticsearch.test.rest.FakeRestRequest; -import org.elasticsearch.test.rest.RestActionTestCase; -import org.elasticsearch.xcontent.XContentType; -import org.junit.Before; -import org.mockito.Mockito; - -import java.nio.charset.StandardCharsets; -import java.util.Collections; -import java.util.List; -import java.util.Map; - -public final class RestMultiSearchTemplateActionTests extends RestActionTestCase { - final List contentTypeHeader = Collections.singletonList(compatibleMediaType(XContentType.VND_JSON, RestApiVersion.V_7)); - - @Before - public void setUpAction() { - controller().registerHandler(new RestMultiSearchTemplateAction(Settings.EMPTY)); - // todo how to workaround this? we get AssertionError without this - verifyingClient.setExecuteVerifier((actionType, request) -> Mockito.mock(MultiSearchTemplateResponse.class)); - verifyingClient.setExecuteLocallyVerifier((actionType, request) -> Mockito.mock(MultiSearchTemplateResponse.class)); - } - - public void testTypeInPath() { - String content = """ - { "index": "some_index" } - {"source": {"query" : {"match_all" :{}}}} - """; - BytesArray bytesContent = new BytesArray(content.getBytes(StandardCharsets.UTF_8)); - - RestRequest request = new FakeRestRequest.Builder(xContentRegistry()).withHeaders( - Map.of("Content-Type", contentTypeHeader, "Accept", contentTypeHeader) - ).withMethod(RestRequest.Method.GET).withPath("/some_index/some_type/_msearch/template").withContent(bytesContent, null).build(); - - dispatchRequest(request); - assertCriticalWarnings(RestMultiSearchTemplateAction.TYPES_DEPRECATION_MESSAGE); - } - - public void testTypeInBody() { - String content = """ - { "index": "some_index", "type": "some_type" }\s - {"source": {"query" : {"match_all" :{}}}}\s - """; - BytesArray bytesContent = new BytesArray(content.getBytes(StandardCharsets.UTF_8)); - - RestRequest request = new FakeRestRequest.Builder(xContentRegistry()).withHeaders( - Map.of("Content-Type", contentTypeHeader, "Accept", contentTypeHeader) - ).withPath("/some_index/_msearch/template").withContent(bytesContent, null).build(); - - dispatchRequest(request); - assertCriticalWarnings(RestMultiSearchTemplateAction.TYPES_DEPRECATION_MESSAGE); - } -} diff --git a/modules/lang-mustache/src/test/java/org/elasticsearch/script/mustache/RestSearchTemplateActionTests.java b/modules/lang-mustache/src/test/java/org/elasticsearch/script/mustache/RestSearchTemplateActionTests.java deleted file mode 100644 index 0216e750c55e0..0000000000000 --- a/modules/lang-mustache/src/test/java/org/elasticsearch/script/mustache/RestSearchTemplateActionTests.java +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the "Elastic License - * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side - * Public License v 1"; you may not use this file except in compliance with, at - * your election, the "Elastic License 2.0", the "GNU Affero General Public - * License v3.0 only", or the "Server Side Public License, v 1". - */ - -package org.elasticsearch.script.mustache; - -import org.elasticsearch.core.RestApiVersion; -import org.elasticsearch.rest.RestRequest; -import org.elasticsearch.rest.action.search.RestSearchAction; -import org.elasticsearch.test.rest.FakeRestRequest; -import org.elasticsearch.test.rest.RestActionTestCase; -import org.junit.Before; - -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import static org.mockito.Mockito.mock; - -public final class RestSearchTemplateActionTests extends RestActionTestCase { - final List contentTypeHeader = Collections.singletonList(randomCompatibleMediaType(RestApiVersion.V_7)); - - @Before - public void setUpAction() { - controller().registerHandler(new RestSearchTemplateAction(nf -> false)); - verifyingClient.setExecuteVerifier((actionType, request) -> mock(SearchTemplateResponse.class)); - verifyingClient.setExecuteLocallyVerifier((actionType, request) -> mock(SearchTemplateResponse.class)); - } - - public void testTypeInPath() { - RestRequest request = new FakeRestRequest.Builder(xContentRegistry()).withHeaders( - Map.of("Content-Type", contentTypeHeader, "Accept", contentTypeHeader) - ).withMethod(RestRequest.Method.GET).withPath("/some_index/some_type/_search/template").build(); - - dispatchRequest(request); - assertCriticalWarnings(RestSearchAction.TYPES_DEPRECATION_MESSAGE); - } - - public void testTypeParameter() { - Map params = new HashMap<>(); - params.put("type", "some_type"); - - RestRequest request = new FakeRestRequest.Builder(xContentRegistry()).withHeaders( - Map.of("Content-Type", contentTypeHeader, "Accept", contentTypeHeader) - ).withMethod(RestRequest.Method.GET).withPath("/some_index/_search/template").withParams(params).build(); - - dispatchRequest(request); - assertCriticalWarnings(RestSearchAction.TYPES_DEPRECATION_MESSAGE); - } -} diff --git a/modules/lang-painless/src/main/java/org/elasticsearch/painless/action/PainlessExecuteAction.java b/modules/lang-painless/src/main/java/org/elasticsearch/painless/action/PainlessExecuteAction.java index 1950c72c80ec4..4f34cbd3cc475 100644 --- a/modules/lang-painless/src/main/java/org/elasticsearch/painless/action/PainlessExecuteAction.java +++ b/modules/lang-painless/src/main/java/org/elasticsearch/painless/action/PainlessExecuteAction.java @@ -108,7 +108,6 @@ import java.io.IOException; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.List; @@ -238,29 +237,21 @@ static Tuple parseClusterAliasAndIndex(String indexExpression) { return new Tuple<>(null, null); } String trimmed = indexExpression.trim(); - String sep = String.valueOf(RemoteClusterAware.REMOTE_CLUSTER_INDEX_SEPARATOR); - if (trimmed.startsWith(sep) || trimmed.endsWith(sep)) { - throw new IllegalArgumentException( - "Unable to parse one single valid index name from the provided index: [" + indexExpression + "]" - ); - } - + String[] parts = RemoteClusterAware.splitIndexName(trimmed); // The parser here needs to ensure that the indexExpression is not of the form "remote1:blogs,remote2:blogs" // because (1) only a single index is allowed for Painless Execute and // (2) if this method returns Tuple("remote1", "blogs,remote2:blogs") that will not fail with "index not found". // Instead, it will fail with the inaccurate and confusing error message: // "Cross-cluster calls are not supported in this context but remote indices were requested: [blogs,remote1:blogs]" // which comes later out of the IndexNameExpressionResolver pathway this code uses. - String[] parts = indexExpression.split(sep, 2); - if (parts.length == 1) { - return new Tuple<>(null, parts[0]); - } else if (parts.length == 2 && parts[1].contains(sep) == false) { - return new Tuple<>(parts[0], parts[1]); - } else { + if ((parts[0] != null && parts[1].isEmpty()) + || parts[1].contains(String.valueOf(RemoteClusterAware.REMOTE_CLUSTER_INDEX_SEPARATOR))) { throw new IllegalArgumentException( "Unable to parse one single valid index name from the provided index: [" + indexExpression + "]" ); } + + return new Tuple<>(parts[0], parts[1]); } public String getClusterAlias() { @@ -556,8 +547,8 @@ protected void doExecute(Task task, Request request, ActionListener li // Visible for testing static void removeClusterAliasFromIndexExpression(Request request) { if (request.index() != null) { - String[] split = request.index().split(String.valueOf(RemoteClusterAware.REMOTE_CLUSTER_INDEX_SEPARATOR)); - if (split.length > 1) { + String[] split = RemoteClusterAware.splitIndexName(request.index()); + if (split[0] != null) { /* * if the cluster alias is null and the index field has a clusterAlias (clusterAlias:index notation) * that means this is executing on a remote cluster (it was forwarded by the querying cluster). @@ -565,9 +556,6 @@ static void removeClusterAliasFromIndexExpression(Request request) { * We need to strip off the clusterAlias from the index before executing the script locally, * so it will resolve to a local index */ - assert split.length == 2 - : "If the index contains the REMOTE_CLUSTER_INDEX_SEPARATOR it should have only two parts but it has " - + Arrays.toString(split); request.index(split[1]); } } diff --git a/modules/percolator/src/test/java/org/elasticsearch/percolator/PercolateQueryBuilderTests.java b/modules/percolator/src/test/java/org/elasticsearch/percolator/PercolateQueryBuilderTests.java index d6908f58a901a..88b773d413fab 100644 --- a/modules/percolator/src/test/java/org/elasticsearch/percolator/PercolateQueryBuilderTests.java +++ b/modules/percolator/src/test/java/org/elasticsearch/percolator/PercolateQueryBuilderTests.java @@ -21,7 +21,6 @@ import org.elasticsearch.common.compress.CompressedXContent; import org.elasticsearch.common.io.stream.BytesStreamOutput; import org.elasticsearch.common.lucene.uid.Versions; -import org.elasticsearch.core.RestApiVersion; import org.elasticsearch.index.get.GetResult; import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.index.query.QueryBuilder; @@ -32,9 +31,7 @@ import org.elasticsearch.test.AbstractQueryTestCase; import org.elasticsearch.xcontent.XContentBuilder; import org.elasticsearch.xcontent.XContentFactory; -import org.elasticsearch.xcontent.XContentParser; import org.elasticsearch.xcontent.XContentType; -import org.elasticsearch.xcontent.json.JsonXContent; import org.hamcrest.Matchers; import java.io.IOException; @@ -379,31 +376,4 @@ public void testDisallowExpensiveQueries() { ElasticsearchException e = expectThrows(ElasticsearchException.class, () -> queryBuilder.toQuery(searchExecutionContext)); assertEquals("[percolate] queries cannot be executed when 'search.allow_expensive_queries' is set to false.", e.getMessage()); } - - public void testFromJsonWithDocumentType() throws IOException { - SearchExecutionContext searchExecutionContext = createSearchExecutionContext(); - String queryAsString = Strings.format(""" - {"percolate" : { "document": {}, "document_type":"%s", "field":"%s"}} - """, docType, queryField); - XContentParser parser = createParserWithCompatibilityFor(JsonXContent.jsonXContent, queryAsString, RestApiVersion.V_7); - QueryBuilder queryBuilder = parseQuery(parser); - queryBuilder.toQuery(searchExecutionContext); - assertCriticalWarnings(PercolateQueryBuilder.DOCUMENT_TYPE_DEPRECATION_MESSAGE); - } - - public void testFromJsonWithType() throws IOException { - indexedDocumentIndex = randomAlphaOfLength(4); - indexedDocumentId = randomAlphaOfLength(4); - indexedDocumentVersion = Versions.MATCH_ANY; - documentSource = Collections.singletonList(randomSource(new HashSet<>())); - SearchExecutionContext searchExecutionContext = createSearchExecutionContext(); - - String queryAsString = Strings.format(""" - {"percolate" : { "index": "%s", "type": "_doc", "id": "%s", "field":"%s"}} - """, indexedDocumentIndex, indexedDocumentId, queryField); - XContentParser parser = createParserWithCompatibilityFor(JsonXContent.jsonXContent, queryAsString, RestApiVersion.V_7); - QueryBuilder queryBuilder = parseQuery(parser); - rewriteAndFetch(queryBuilder, searchExecutionContext).toQuery(searchExecutionContext); - assertCriticalWarnings(PercolateQueryBuilder.TYPE_DEPRECATION_MESSAGE); - } } diff --git a/modules/reindex/src/main/java/org/elasticsearch/reindex/ReindexValidator.java b/modules/reindex/src/main/java/org/elasticsearch/reindex/ReindexValidator.java index 4d18f00ab572d..4b960e97ce0e0 100644 --- a/modules/reindex/src/main/java/org/elasticsearch/reindex/ReindexValidator.java +++ b/modules/reindex/src/main/java/org/elasticsearch/reindex/ReindexValidator.java @@ -156,21 +156,10 @@ static void validateAgainstAliases( } private static SearchRequest skipRemoteIndexNames(SearchRequest source) { - return new SearchRequest(source).indices( - Arrays.stream(source.indices()).filter(name -> isRemoteExpression(name) == false).toArray(String[]::new) - ); - } - - private static boolean isRemoteExpression(String expression) { // An index expression that references a remote cluster uses ":" to separate the cluster-alias from the index portion of the // expression, e.g., cluster0:index-name - // in the same time date-math `expression` can also contain ':' symbol inside its name - // to distinguish between those two, given `expression` is pre-evaluated using date-math resolver - // after evaluation date-math `expression` should not contain ':' symbol - // otherwise if `expression` is legit remote name, ':' symbol remains - // NOTE: index expressions can be prefixed with "-", which will not be parsed by resolveDateMathExpression, - // but in this particular case it doesn't seem to be relevant. - return IndexNameExpressionResolver.resolveDateMathExpression(expression) - .contains(String.valueOf(RemoteClusterAware.REMOTE_CLUSTER_INDEX_SEPARATOR)); + return new SearchRequest(source).indices( + Arrays.stream(source.indices()).filter(name -> RemoteClusterAware.isRemoteIndexName(name) == false).toArray(String[]::new) + ); } } diff --git a/modules/reindex/src/test/java/org/elasticsearch/reindex/RestDeleteByQueryActionTests.java b/modules/reindex/src/test/java/org/elasticsearch/reindex/RestDeleteByQueryActionTests.java deleted file mode 100644 index 505b12833fb5e..0000000000000 --- a/modules/reindex/src/test/java/org/elasticsearch/reindex/RestDeleteByQueryActionTests.java +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the "Elastic License - * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side - * Public License v 1"; you may not use this file except in compliance with, at - * your election, the "Elastic License 2.0", the "GNU Affero General Public - * License v3.0 only", or the "Server Side Public License, v 1". - */ - -package org.elasticsearch.reindex; - -import org.elasticsearch.core.RestApiVersion; -import org.elasticsearch.index.reindex.BulkByScrollResponse; -import org.elasticsearch.rest.RestRequest; -import org.elasticsearch.rest.action.search.RestSearchAction; -import org.elasticsearch.test.rest.FakeRestRequest; -import org.elasticsearch.test.rest.RestActionTestCase; -import org.elasticsearch.xcontent.XContentType; -import org.junit.Before; - -import java.io.IOException; -import java.util.Collections; -import java.util.List; -import java.util.Map; - -import static org.mockito.Mockito.mock; - -public final class RestDeleteByQueryActionTests extends RestActionTestCase { - - final List contentTypeHeader = Collections.singletonList(compatibleMediaType(XContentType.VND_JSON, RestApiVersion.V_7)); - - @Before - public void setUpAction() { - controller().registerHandler(new RestDeleteByQueryAction(nf -> false)); - verifyingClient.setExecuteVerifier((actionType, request) -> mock(BulkByScrollResponse.class)); - verifyingClient.setExecuteLocallyVerifier((actionType, request) -> mock(BulkByScrollResponse.class)); - } - - public void testTypeInPath() throws IOException { - RestRequest request = new FakeRestRequest.Builder(xContentRegistry()).withHeaders( - Map.of("Content-Type", contentTypeHeader, "Accept", contentTypeHeader) - ).withMethod(RestRequest.Method.POST).withPath("/some_index/some_type/_delete_by_query").build(); - - // checks the type in the URL is propagated correctly to the request object - // only works after the request is dispatched, so its params are filled from url. - dispatchRequest(request); - - // RestDeleteByQueryAction itself doesn't check for a deprecated type usage - // checking here for a deprecation from its internal search request - assertCriticalWarnings(RestSearchAction.TYPES_DEPRECATION_MESSAGE); - } - -} diff --git a/modules/reindex/src/test/java/org/elasticsearch/reindex/RestUpdateByQueryActionTests.java b/modules/reindex/src/test/java/org/elasticsearch/reindex/RestUpdateByQueryActionTests.java deleted file mode 100644 index 0d35b30c86a5a..0000000000000 --- a/modules/reindex/src/test/java/org/elasticsearch/reindex/RestUpdateByQueryActionTests.java +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the "Elastic License - * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side - * Public License v 1"; you may not use this file except in compliance with, at - * your election, the "Elastic License 2.0", the "GNU Affero General Public - * License v3.0 only", or the "Server Side Public License, v 1". - */ - -package org.elasticsearch.reindex; - -import org.elasticsearch.core.RestApiVersion; -import org.elasticsearch.index.reindex.BulkByScrollResponse; -import org.elasticsearch.rest.RestRequest; -import org.elasticsearch.rest.action.search.RestSearchAction; -import org.elasticsearch.test.rest.FakeRestRequest; -import org.elasticsearch.test.rest.RestActionTestCase; -import org.elasticsearch.xcontent.XContentType; -import org.junit.Before; - -import java.io.IOException; -import java.util.Collections; -import java.util.List; -import java.util.Map; - -import static org.mockito.Mockito.mock; - -public final class RestUpdateByQueryActionTests extends RestActionTestCase { - - final List contentTypeHeader = Collections.singletonList(compatibleMediaType(XContentType.VND_JSON, RestApiVersion.V_7)); - - @Before - public void setUpAction() { - controller().registerHandler(new RestUpdateByQueryAction(nf -> false)); - verifyingClient.setExecuteVerifier((actionType, request) -> mock(BulkByScrollResponse.class)); - verifyingClient.setExecuteLocallyVerifier((actionType, request) -> mock(BulkByScrollResponse.class)); - } - - public void testTypeInPath() throws IOException { - RestRequest request = new FakeRestRequest.Builder(xContentRegistry()).withHeaders( - Map.of("Content-Type", contentTypeHeader, "Accept", contentTypeHeader) - ).withMethod(RestRequest.Method.POST).withPath("/some_index/some_type/_update_by_query").build(); - - // checks the type in the URL is propagated correctly to the request object - // only works after the request is dispatched, so its params are filled from url. - dispatchRequest(request); - - // RestUpdateByQueryAction itself doesn't check for a deprecated type usage - // checking here for a deprecation from its internal search request - assertCriticalWarnings(RestSearchAction.TYPES_DEPRECATION_MESSAGE); - } -} diff --git a/modules/repository-azure/src/internalClusterTest/java/org/elasticsearch/repositories/azure/AzureRepositoryMissingCredentialsIT.java b/modules/repository-azure/src/internalClusterTest/java/org/elasticsearch/repositories/azure/AzureRepositoryMissingCredentialsIT.java index 7410b9acaf2b5..947f73c2ce580 100644 --- a/modules/repository-azure/src/internalClusterTest/java/org/elasticsearch/repositories/azure/AzureRepositoryMissingCredentialsIT.java +++ b/modules/repository-azure/src/internalClusterTest/java/org/elasticsearch/repositories/azure/AzureRepositoryMissingCredentialsIT.java @@ -9,7 +9,6 @@ package org.elasticsearch.repositories.azure; -import org.elasticsearch.ExceptionsHelper; import org.elasticsearch.action.admin.cluster.repositories.put.PutRepositoryRequest; import org.elasticsearch.action.admin.cluster.repositories.put.TransportPutRepositoryAction; import org.elasticsearch.action.support.master.AcknowledgedResponse; @@ -41,17 +40,13 @@ protected Settings nodeSettings(int nodeOrdinal, Settings otherSettings) { public void testMissingCredentialsException() { assertThat( - asInstanceOf( + safeAwaitAndUnwrapFailure( RepositoryVerificationException.class, - ExceptionsHelper.unwrapCause( - safeAwaitFailure( - AcknowledgedResponse.class, - l -> client().execute( - TransportPutRepositoryAction.TYPE, - new PutRepositoryRequest(TEST_REQUEST_TIMEOUT, TEST_REQUEST_TIMEOUT, "test-repo").type("azure"), - l - ) - ) + AcknowledgedResponse.class, + l -> client().execute( + TransportPutRepositoryAction.TYPE, + new PutRepositoryRequest(TEST_REQUEST_TIMEOUT, TEST_REQUEST_TIMEOUT, "test-repo").type("azure"), + l ) ).getCause().getMessage(), allOf( diff --git a/muted-tests.yml b/muted-tests.yml index e6ea16be8c631..adb2bc75b81b1 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -266,9 +266,6 @@ tests: - class: org.elasticsearch.xpack.esql.EsqlAsyncSecurityIT method: testLimitedPrivilege issue: https://github.com/elastic/elasticsearch/issues/113419 -- class: org.elasticsearch.index.mapper.extras.TokenCountFieldMapperTests - method: testBlockLoaderFromRowStrideReaderWithSyntheticSource - issue: https://github.com/elastic/elasticsearch/issues/113427 - class: org.elasticsearch.xpack.esql.ccq.MultiClusterSpecIT method: test {categorize.Categorize} issue: https://github.com/elastic/elasticsearch/issues/113428 @@ -284,9 +281,6 @@ tests: - class: org.elasticsearch.validation.DotPrefixClientYamlTestSuiteIT method: test {p0=dot_prefix/10_basic/Deprecated index template with a dot prefix index pattern} issue: https://github.com/elastic/elasticsearch/issues/113529 -- class: org.elasticsearch.backwards.MixedClusterClientYamlTestSuiteIT - method: test {p0=search/180_locale_dependent_mapping/Test Index and Search locale dependent mappings / dates} - issue: https://github.com/elastic/elasticsearch/issues/113537 - class: org.elasticsearch.xpack.inference.TextEmbeddingCrudIT method: testPutE5WithTrainedModelAndInference issue: https://github.com/elastic/elasticsearch/issues/113565 @@ -302,12 +296,38 @@ tests: - class: org.elasticsearch.integration.KibanaUserRoleIntegTests method: testSearchAndMSearch issue: https://github.com/elastic/elasticsearch/issues/113593 -- class: org.elasticsearch.xpack.esql.qa.mixed.MixedClusterEsqlSpecIT - method: test {date_nanos.Date_nanos to date nanos, index version SYNC} - issue: https://github.com/elastic/elasticsearch/issues/113632 - class: org.elasticsearch.xpack.transform.integration.TransformIT method: testStopWaitForCheckpoint issue: https://github.com/elastic/elasticsearch/issues/106113 +- class: org.elasticsearch.smoketest.MlWithSecurityIT + method: test {yaml=ml/3rd_party_deployment/Test start and stop multiple deployments} + issue: https://github.com/elastic/elasticsearch/issues/101458 +- class: org.elasticsearch.xpack.ml.integration.MlJobIT + method: testGetJobs_GivenMultipleJobs + issue: https://github.com/elastic/elasticsearch/issues/113654 +- class: org.elasticsearch.xpack.ml.integration.MlJobIT + method: testGetJobs_GivenSingleJob + issue: https://github.com/elastic/elasticsearch/issues/113655 +- class: org.elasticsearch.xpack.security.authz.interceptor.SearchRequestCacheDisablingInterceptorTests + method: testHasRemoteIndices + issue: https://github.com/elastic/elasticsearch/issues/113660 +- class: org.elasticsearch.xpack.security.authz.interceptor.SearchRequestCacheDisablingInterceptorTests + method: testRequestCacheWillBeDisabledWhenSearchRemoteIndices + issue: https://github.com/elastic/elasticsearch/issues/113659 +- class: org.elasticsearch.xpack.esql.qa.mixed.MixedClusterEsqlSpecIT + method: test {categorize.Categorize ASYNC} + issue: https://github.com/elastic/elasticsearch/issues/113721 +- class: org.elasticsearch.xpack.esql.qa.mixed.MixedClusterEsqlSpecIT + method: test {categorize.Categorize SYNC} + issue: https://github.com/elastic/elasticsearch/issues/113722 +- class: org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToDateNanosTests + issue: https://github.com/elastic/elasticsearch/issues/113661 +- class: org.elasticsearch.xpack.restart.MLModelDeploymentFullClusterRestartIT + method: testDeploymentSurvivesRestart {cluster=UPGRADED} + issue: https://github.com/elastic/elasticsearch/issues/112980 +- class: org.elasticsearch.xpack.searchablesnapshots.hdfs.SecureHdfsSearchableSnapshotsIT + issue: https://github.com/elastic/elasticsearch/issues/113753 + # Examples: # diff --git a/plugins/analysis-nori/src/test/java/org/elasticsearch/plugin/analysis/nori/NoriAnalysisTests.java b/plugins/analysis-nori/src/test/java/org/elasticsearch/plugin/analysis/nori/NoriAnalysisTests.java index 1709d02263eea..f9a0b7d5c5216 100644 --- a/plugins/analysis-nori/src/test/java/org/elasticsearch/plugin/analysis/nori/NoriAnalysisTests.java +++ b/plugins/analysis-nori/src/test/java/org/elasticsearch/plugin/analysis/nori/NoriAnalysisTests.java @@ -24,7 +24,6 @@ import org.elasticsearch.index.analysis.NamedAnalyzer; import org.elasticsearch.index.analysis.TokenFilterFactory; import org.elasticsearch.index.analysis.TokenizerFactory; -import org.elasticsearch.test.ESTestCase.TestAnalysis; import org.elasticsearch.test.ESTokenStreamTestCase; import java.io.IOException; @@ -33,6 +32,7 @@ import java.nio.file.Files; import java.nio.file.Path; +import static org.apache.lucene.tests.analysis.BaseTokenStreamTestCase.assertTokenStreamContents; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.instanceOf; diff --git a/plugins/examples/build.gradle b/plugins/examples/build.gradle index 8be4dfd7d1ab7..b60485edd1cb8 100644 --- a/plugins/examples/build.gradle +++ b/plugins/examples/build.gradle @@ -16,8 +16,8 @@ subprojects { apply plugin: 'java' java { - sourceCompatibility = 17 - targetCompatibility = 17 + sourceCompatibility = 21 + targetCompatibility = 21 } repositories { diff --git a/plugins/examples/gradle/wrapper/gradle-wrapper.properties b/plugins/examples/gradle/wrapper/gradle-wrapper.properties index e955ee28dd349..6acc1431eaec1 100644 --- a/plugins/examples/gradle/wrapper/gradle-wrapper.properties +++ b/plugins/examples/gradle/wrapper/gradle-wrapper.properties @@ -1,7 +1,7 @@ distributionBase=GRADLE_USER_HOME distributionPath=wrapper/dists -distributionSha256Sum=fdfca5dbc2834f0ece5020465737538e5ba679deeff5ab6c09621d67f8bb1a15 -distributionUrl=https\://services.gradle.org/distributions/gradle-8.10.1-all.zip +distributionSha256Sum=2ab88d6de2c23e6adae7363ae6e29cbdd2a709e992929b48b6530fd0c7133bd6 +distributionUrl=https\://services.gradle.org/distributions/gradle-8.10.2-all.zip networkTimeout=10000 validateDistributionUrl=true zipStoreBase=GRADLE_USER_HOME diff --git a/qa/full-cluster-restart/src/javaRestTest/java/org/elasticsearch/upgrades/FullClusterRestartIT.java b/qa/full-cluster-restart/src/javaRestTest/java/org/elasticsearch/upgrades/FullClusterRestartIT.java index d4090909ee82d..ee18f8fc2ec4b 100644 --- a/qa/full-cluster-restart/src/javaRestTest/java/org/elasticsearch/upgrades/FullClusterRestartIT.java +++ b/qa/full-cluster-restart/src/javaRestTest/java/org/elasticsearch/upgrades/FullClusterRestartIT.java @@ -15,7 +15,6 @@ import org.apache.http.util.EntityUtils; import org.elasticsearch.Build; -import org.elasticsearch.action.admin.cluster.settings.RestClusterGetSettingsResponse; import org.elasticsearch.client.Request; import org.elasticsearch.client.Response; import org.elasticsearch.client.ResponseException; @@ -43,6 +42,7 @@ import org.elasticsearch.test.rest.ESRestTestCase; import org.elasticsearch.test.rest.ObjectPath; import org.elasticsearch.test.rest.RestTestLegacyFeatures; +import org.elasticsearch.test.rest.TestResponseParsers; import org.elasticsearch.transport.Compression; import org.elasticsearch.xcontent.ToXContent; import org.elasticsearch.xcontent.XContentBuilder; @@ -1861,7 +1861,7 @@ public void testTransportCompressionSetting() throws IOException { final Request getSettingsRequest = new Request("GET", "/_cluster/settings"); final Response getSettingsResponse = client().performRequest(getSettingsRequest); try (XContentParser parser = createParser(JsonXContent.jsonXContent, getSettingsResponse.getEntity().getContent())) { - final Settings settings = RestClusterGetSettingsResponse.fromXContent(parser).getPersistentSettings(); + final Settings settings = TestResponseParsers.parseClusterSettingsResponse(parser).getPersistentSettings(); assertThat(REMOTE_CLUSTER_COMPRESS.getConcreteSettingForNamespace("foo").get(settings), equalTo(Compression.Enabled.TRUE)); } } diff --git a/qa/packaging/src/test/java/org/elasticsearch/packaging/util/docker/DockerRun.java b/qa/packaging/src/test/java/org/elasticsearch/packaging/util/docker/DockerRun.java index 2b3eb7ff7a617..e562e7591564e 100644 --- a/qa/packaging/src/test/java/org/elasticsearch/packaging/util/docker/DockerRun.java +++ b/qa/packaging/src/test/java/org/elasticsearch/packaging/util/docker/DockerRun.java @@ -163,7 +163,7 @@ String build() { public static String getImageName(Distribution distribution) { String suffix = switch (distribution.packaging) { case DOCKER -> ""; - case DOCKER_UBI -> "-ubi8"; + case DOCKER_UBI -> "-ubi"; case DOCKER_IRON_BANK -> "-ironbank"; case DOCKER_CLOUD -> "-cloud"; case DOCKER_CLOUD_ESS -> "-cloud-ess"; diff --git a/qa/smoke-test-http/src/javaRestTest/java/org/elasticsearch/http/ClusterHealthRestCancellationIT.java b/qa/smoke-test-http/src/javaRestTest/java/org/elasticsearch/http/ClusterHealthRestCancellationIT.java index 9d70686c085b0..c2a40143b6eb1 100644 --- a/qa/smoke-test-http/src/javaRestTest/java/org/elasticsearch/http/ClusterHealthRestCancellationIT.java +++ b/qa/smoke-test-http/src/javaRestTest/java/org/elasticsearch/http/ClusterHealthRestCancellationIT.java @@ -19,25 +19,15 @@ import org.elasticsearch.cluster.ClusterStateUpdateTask; import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.Priority; -import org.elasticsearch.test.junit.annotations.TestIssueLogging; import java.util.concurrent.CancellationException; import java.util.concurrent.CyclicBarrier; -import java.util.concurrent.TimeUnit; import static org.elasticsearch.action.support.ActionTestUtils.wrapAsRestResponseListener; import static org.elasticsearch.test.TaskAssertions.assertAllCancellableTasksAreCancelled; -import static org.elasticsearch.test.TaskAssertions.awaitTaskWithPrefixOnMaster; public class ClusterHealthRestCancellationIT extends HttpSmokeTestCase { - @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/100062") - @TestIssueLogging( - issueUrl = "https://github.com/elastic/elasticsearch/issues/100062", - value = "org.elasticsearch.test.TaskAssertions:TRACE" - + ",org.elasticsearch.cluster.service.MasterService:TRACE" - + ",org.elasticsearch.tasks.TaskManager:TRACE" - ) public void testClusterHealthRestCancellation() throws Exception { final var barrier = new CyclicBarrier(2); @@ -47,18 +37,7 @@ public void testClusterHealthRestCancellation() throws Exception { @Override public ClusterState execute(ClusterState currentState) { safeAwait(barrier); - // safeAwait(barrier); - - // temporarily lengthen timeout on safeAwait while investigating #100062 - try { - barrier.await(60, TimeUnit.SECONDS); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - throw new AssertionError("unexpected", e); - } catch (Exception e) { - throw new AssertionError("unexpected", e); - } - + safeAwait(barrier); return currentState; } @@ -72,12 +51,23 @@ public void onFailure(Exception e) { clusterHealthRequest.addParameter("wait_for_events", Priority.LANGUID.toString()); final PlainActionFuture future = new PlainActionFuture<>(); - logger.info("--> sending cluster state request"); + logger.info("--> sending cluster health request"); final Cancellable cancellable = getRestClient().performRequestAsync(clusterHealthRequest, wrapAsRestResponseListener(future)); safeAwait(barrier); - awaitTaskWithPrefixOnMaster(TransportClusterHealthAction.NAME); + // wait until the health request is waiting on the (blocked) master service + assertBusy( + () -> assertTrue( + internalCluster().getCurrentMasterNodeInstance(ClusterService.class) + .getMasterService() + .pendingTasks() + .stream() + .anyMatch( + pendingClusterTask -> pendingClusterTask.source().string().equals("cluster_health (wait_for_events [LANGUID])") + ) + ) + ); logger.info("--> cancelling cluster health request"); cancellable.cancel(); diff --git a/qa/smoke-test-ingest-with-all-dependencies/src/yamlRestTest/resources/rest-api-spec/test/ingest/80_ingest_simulate.yml b/qa/smoke-test-ingest-with-all-dependencies/src/yamlRestTest/resources/rest-api-spec/test/ingest/80_ingest_simulate.yml index f3a977cd96f62..9c6a1ca2e96d2 100644 --- a/qa/smoke-test-ingest-with-all-dependencies/src/yamlRestTest/resources/rest-api-spec/test/ingest/80_ingest_simulate.yml +++ b/qa/smoke-test-ingest-with-all-dependencies/src/yamlRestTest/resources/rest-api-spec/test/ingest/80_ingest_simulate.yml @@ -375,7 +375,7 @@ setup: - do: allowed_warnings: - - "index template [test-composable-1] has index patterns [tsdb_templated_*] matching patterns from existing older templates [global] with patterns (global => [*]); this template [test-composable-1] will take precedence during new index creation" + - "index template [test-composable-1] has index patterns [foo*] matching patterns from existing older templates [global] with patterns (global => [*]); this template [test-composable-1] will take precedence during new index creation" indices.put_index_template: name: test-composable-1 body: @@ -527,7 +527,7 @@ setup: - do: allowed_warnings: - - "index template [test-composable-1] has index patterns [tsdb_templated_*] matching patterns from existing older templates [global] with patterns (global => [*]); this template [test-composable-1] will take precedence during new index creation" + - "index template [test-composable-1] has index patterns [foo*] matching patterns from existing older templates [global] with patterns (global => [*]); this template [test-composable-1] will take precedence during new index creation" indices.put_index_template: name: test-composable-1 body: diff --git a/rest-api-spec/build.gradle b/rest-api-spec/build.gradle index a742e83255bbb..ed1cf905f7e9d 100644 --- a/rest-api-spec/build.gradle +++ b/rest-api-spec/build.gradle @@ -57,4 +57,5 @@ tasks.named("precommit").configure { tasks.named("yamlRestCompatTestTransform").configure({task -> task.skipTest("indices.sort/10_basic/Index Sort", "warning does not exist for compatibility") task.skipTest("search/330_fetch_fields/Test search rewrite", "warning does not exist for compatibility") + task.skipTestsByFilePattern("indices.create/synthetic_source*.yml", "@UpdateForV9 -> tests do not pass after bumping API version to 9 [ES-9597]") }) diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/cluster.stats.json b/rest-api-spec/src/main/resources/rest-api-spec/api/cluster.stats.json index 4a8ca46ceba8c..23f6ed4ec5b76 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/api/cluster.stats.json +++ b/rest-api-spec/src/main/resources/rest-api-spec/api/cluster.stats.json @@ -32,9 +32,9 @@ ] }, "params":{ - "flat_settings":{ + "include_remotes":{ "type":"boolean", - "description":"Return settings in flat format (default: false)" + "description":"Include remote cluster data into the response (default: false)" }, "timeout":{ "type":"time", diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/cluster.stats/30_ccs_stats.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/cluster.stats/30_ccs_stats.yml new file mode 100644 index 0000000000000..955c68634e617 --- /dev/null +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/cluster.stats/30_ccs_stats.yml @@ -0,0 +1,151 @@ +--- +"cross-cluster search stats basic": + - requires: + test_runner_features: [ capabilities ] + capabilities: + - method: GET + path: /_cluster/stats + capabilities: + - "ccs-stats" + reason: "Capability required to run test" + + - do: + cluster.stats: { } + + - is_true: ccs + - is_true: ccs._search + - is_false: ccs.clusters # no ccs clusters configured + - exists: ccs._search.total + - exists: ccs._search.success + - exists: ccs._search.skipped + - is_true: ccs._search.took + - is_true: ccs._search.took_mrt_true + - is_true: ccs._search.took_mrt_false + - exists: ccs._search.remotes_per_search_max + - exists: ccs._search.remotes_per_search_avg + - exists: ccs._search.failure_reasons + - exists: ccs._search.features + - exists: ccs._search.clients + - exists: ccs._search.clusters + +--- +"cross-cluster search stats search": + - requires: + test_runner_features: [ capabilities ] + capabilities: + - method: GET + path: /_cluster/stats + capabilities: + - "ccs-stats" + reason: "Capability required to run test" + + - do: + cluster.state: {} + - set: { master_node: master } + - do: + nodes.info: + metric: [ http, transport ] + - set: {nodes.$master.http.publish_address: host} + - set: {nodes.$master.transport.publish_address: transport_host} + + - do: + cluster.put_settings: + body: + persistent: + cluster: + remote: + cluster_one: + seeds: + - "${transport_host}" + skip_unavailable: true + cluster_two: + seeds: + - "${transport_host}" + skip_unavailable: false + - is_true: persistent.cluster.remote.cluster_one + + - do: + indices.create: + index: test + body: + settings: + number_of_replicas: 0 + + - do: + index: + index: test + id: "1" + refresh: true + body: + foo: bar + + - do: + cluster.health: + wait_for_status: green + + - do: + search: + index: "*,*:*" + body: + query: + match: + foo: bar + + - do: + cluster.stats: {} + - is_true: ccs + - is_true: ccs._search + - is_false: ccs.clusters # Still no remotes since include_remotes is not set + + - do: + cluster.stats: + include_remotes: true + - is_true: ccs + - is_true: ccs._search + - is_true: ccs.clusters # Now we have remotes + - is_true: ccs.clusters.cluster_one + - is_true: ccs.clusters.cluster_two + - is_true: ccs.clusters.cluster_one.cluster_uuid + - match: { ccs.clusters.cluster_one.mode: sniff } + - match: { ccs.clusters.cluster_one.skip_unavailable: true } + - match: { ccs.clusters.cluster_two.skip_unavailable: false } + - is_true: ccs.clusters.cluster_one.version + - match: { ccs.clusters.cluster_one.status: green } + - match: { ccs.clusters.cluster_two.status: green } + - is_true: ccs.clusters.cluster_one.nodes_count + - is_true: ccs.clusters.cluster_one.shards_count + - is_true: ccs.clusters.cluster_one.indices_count + - is_true: ccs.clusters.cluster_one.indices_total_size_in_bytes + - is_true: ccs.clusters.cluster_one.max_heap_in_bytes + - is_true: ccs.clusters.cluster_one.mem_total_in_bytes + - is_true: ccs._search.total + - is_true: ccs._search.success + - exists: ccs._search.skipped + - is_true: ccs._search.took + - is_true: ccs._search.took.max + - is_true: ccs._search.took.avg + - is_true: ccs._search.took.p90 + - is_true: ccs._search.took_mrt_true + - exists: ccs._search.took_mrt_true.max + - exists: ccs._search.took_mrt_true.avg + - exists: ccs._search.took_mrt_true.p90 + - is_true: ccs._search.took_mrt_false + - exists: ccs._search.took_mrt_false.max + - exists: ccs._search.took_mrt_false.avg + - exists: ccs._search.took_mrt_false.p90 + - match: { ccs._search.remotes_per_search_max: 2 } + - match: { ccs._search.remotes_per_search_avg: 2.0 } + - exists: ccs._search.failure_reasons + - exists: ccs._search.features + - exists: ccs._search.clients + - is_true: ccs._search.clusters + - is_true: ccs._search.clusters.cluster_one + - is_true: ccs._search.clusters.cluster_two + - gte: {ccs._search.clusters.cluster_one.total: 1} + - gte: {ccs._search.clusters.cluster_two.total: 1} + - exists: ccs._search.clusters.cluster_one.skipped + - exists: ccs._search.clusters.cluster_two.skipped + - is_true: ccs._search.clusters.cluster_one.took + - is_true: ccs._search.clusters.cluster_one.took.max + - is_true: ccs._search.clusters.cluster_one.took.avg + - is_true: ccs._search.clusters.cluster_one.took.p90 diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/index/92_metrics_auto_subobjects.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/index/92_metrics_auto_subobjects.yml index 603cc4fc2e304..414c24cfffd7d 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/index/92_metrics_auto_subobjects.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/index/92_metrics_auto_subobjects.yml @@ -2,7 +2,7 @@ "Metrics object indexing": - requires: test_runner_features: [ "allowed_warnings", "allowed_warnings_regex" ] - cluster_features: ["mapper.subobjects_auto_fixes"] + cluster_features: ["mapper.subobjects_auto"] reason: requires supporting subobjects auto setting - do: @@ -69,7 +69,7 @@ "Root with metrics": - requires: test_runner_features: [ "allowed_warnings", "allowed_warnings_regex" ] - cluster_features: ["mapper.subobjects_auto_fixes"] + cluster_features: ["mapper.subobjects_auto"] reason: requires supporting subobjects auto setting - do: @@ -131,7 +131,7 @@ "Metrics object indexing with synthetic source": - requires: test_runner_features: [ "allowed_warnings", "allowed_warnings_regex" ] - cluster_features: ["mapper.subobjects_auto_fixes"] + cluster_features: ["mapper.subobjects_auto"] reason: added in 8.4.0 - do: @@ -201,7 +201,7 @@ "Root without subobjects with synthetic source": - requires: test_runner_features: [ "allowed_warnings", "allowed_warnings_regex" ] - cluster_features: ["mapper.subobjects_auto_fixes"] + cluster_features: ["mapper.subobjects_auto"] reason: added in 8.4.0 - do: diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/20_synthetic_source.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/20_synthetic_source.yml index 41d9fcc30a880..b5a9146bc54a6 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/20_synthetic_source.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/20_synthetic_source.yml @@ -887,7 +887,7 @@ doubly nested object: --- subobjects auto: - requires: - cluster_features: ["mapper.subobjects_auto_fixes"] + cluster_features: ["mapper.subobjects_auto"] reason: requires tracking ignored source and supporting subobjects auto setting - do: @@ -924,21 +924,9 @@ subobjects auto: type: keyword nested: type: nested - path: - properties: - to: - properties: - auto_obj: - type: object - subobjects: auto - properties: - inner: - properties: - id: - type: keyword - id: - type: - integer + auto_obj: + type: object + subobjects: auto - do: bulk: @@ -946,13 +934,13 @@ subobjects auto: refresh: true body: - '{ "create": { } }' - - '{ "id": 1, "foo": 10, "foo.bar": 100, "regular.trace.id": ["b", "a", "b"], "regular.span.id": "1" }' + - '{ "id": 1, "foo": 10, "foo.bar": 100, "regular": [ { "trace": { "id": "a" }, "span": { "id": "1" } }, { "trace": { "id": "b" }, "span": { "id": "1" } } ] }' - '{ "create": { } }' - '{ "id": 2, "foo": 20, "foo.bar": 200, "stored": [ { "trace": { "id": "a" }, "span": { "id": "1" } }, { "trace": { "id": "b" }, "span": { "id": "1" } } ] }' - '{ "create": { } }' - '{ "id": 3, "foo": 30, "foo.bar": 300, "nested": [ { "a": 10, "b": 20 }, { "a": 100, "b": 200 } ] }' - '{ "create": { } }' - - '{ "id": 4, "path.to.auto_obj": { "foo": 40, "foo.bar": 400, "inner.id": "baz" }, "path.to.id": 4000 }' + - '{ "id": 4, "auto_obj": { "foo": 40, "foo.bar": 400 } }' - match: { errors: false } @@ -964,8 +952,8 @@ subobjects auto: - match: { hits.hits.0._source.id: 1 } - match: { hits.hits.0._source.foo: 10 } - match: { hits.hits.0._source.foo\.bar: 100 } - - match: { hits.hits.0._source.regular\.span\.id: "1" } - - match: { hits.hits.0._source.regular\.trace\.id: [ "a", "b" ] } + - match: { hits.hits.0._source.regular.span.id: "1" } + - match: { hits.hits.0._source.regular.trace.id: [ "a", "b" ] } - match: { hits.hits.1._source.id: 2 } - match: { hits.hits.1._source.foo: 20 } - match: { hits.hits.1._source.foo\.bar: 200 } @@ -981,110 +969,8 @@ subobjects auto: - match: { hits.hits.2._source.nested.1.a: 100 } - match: { hits.hits.2._source.nested.1.b: 200 } - match: { hits.hits.3._source.id: 4 } - - match: { hits.hits.3._source.path\.to\.auto_obj.foo: 40 } - - match: { hits.hits.3._source.path\.to\.auto_obj.foo\.bar: 400 } - - match: { hits.hits.3._source.path\.to\.auto_obj.inner\.id: baz } - - match: { hits.hits.3._source.path\.to\.id: 4000 } - - ---- -subobjects auto with path flattening: - - requires: - cluster_features: ["mapper.subobjects_auto_fixes"] - reason: requires tracking ignored source and supporting subobjects auto setting - - - do: - indices.create: - index: test - body: - mappings: - _source: - mode: synthetic - subobjects: auto - properties: - id: - type: integer - attributes: - type: object - subobjects: auto - - - do: - bulk: - index: test - refresh: true - body: - - '{ "create": { } }' - - '{ "id": 1, "attributes": { "foo": { "bar": 10 } } }' - - '{ "create": { } }' - - '{ "id": 2, "attributes": { "foo": { "bar": 20 } } }' - - '{ "create": { } }' - - '{ "id": 3, "attributes": { "foo": { "bar": 30 } } }' - - '{ "create": { } }' - - '{ "id": 4, "attributes": { "foo": { "bar": 40 } } }' - - - match: { errors: false } - - - do: - search: - index: test - sort: id - - - match: { hits.hits.0._source.id: 1 } - - match: { hits.hits.0._source.attributes.foo\.bar: 10 } - - match: { hits.hits.1._source.id: 2 } - - match: { hits.hits.1._source.attributes.foo\.bar: 20 } - - match: { hits.hits.2._source.id: 3 } - - match: { hits.hits.2._source.attributes.foo\.bar: 30 } - - match: { hits.hits.3._source.id: 4 } - - match: { hits.hits.3._source.attributes.foo\.bar: 40 } - - ---- -subobjects auto with dynamic template: - - requires: - cluster_features: ["mapper.subobjects_auto_fixes"] - reason: requires tracking ignored source and supporting subobjects auto setting - - - do: - indices.create: - index: test - body: - mappings: - _source: - mode: synthetic - subobjects: auto - dynamic_templates: - - attributes_tmpl: - match: attributes - mapping: - type: object - enabled: false - subobjects: auto - properties: - id: - type: integer - - - do: - bulk: - index: test - refresh: true - body: - - '{ "create": { } }' - - '{ "id": 1, "attributes": { "foo": 10, "path.to.bar": "val1" }, "a": 100, "a.b": 1000 }' - - - match: { errors: false } - - - do: - search: - index: test - sort: id - - - match: { hits.hits.0._source.id: 1 } - - match: { hits.hits.0._source.attributes.foo: 10 } - - match: { hits.hits.0._source.attributes.path\.to\.bar: val1 } - - match: { hits.hits.0._source.a: 100 } - - match: { hits.hits.0._source.a\.b: 1000 } - + - match: { hits.hits.3._source.auto_obj.foo: 40 } + - match: { hits.hits.3._source.auto_obj.foo\.bar: 400 } --- synthetic_source with copy_to: @@ -1869,7 +1755,7 @@ synthetic_source with copy_to pointing to ambiguous field and subobjects false: --- synthetic_source with copy_to pointing to ambiguous field and subobjects auto: - requires: - cluster_features: ["mapper.subobjects_auto_fixes"] + cluster_features: ["mapper.source.synthetic_source_copy_to_inside_objects_fix"] reason: requires copy_to support in synthetic source - do: diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.put_index_template/15_composition.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.put_index_template/15_composition.yml index 912f4e9f93df9..3d82539944a97 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.put_index_template/15_composition.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.put_index_template/15_composition.yml @@ -453,7 +453,7 @@ --- "Composable index templates that include subobjects: auto at root": - requires: - cluster_features: ["mapper.subobjects_auto_fixes"] + cluster_features: ["mapper.subobjects_auto"] reason: "https://github.com/elastic/elasticsearch/issues/96768 fixed at 8.11.0" test_runner_features: "allowed_warnings" @@ -504,7 +504,7 @@ --- "Composable index templates that include subobjects: auto on arbitrary field": - requires: - cluster_features: ["mapper.subobjects_auto_fixes"] + cluster_features: ["mapper.subobjects_auto"] reason: "https://github.com/elastic/elasticsearch/issues/96768 fixed at 8.11.0" test_runner_features: "allowed_warnings" diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/180_locale_dependent_mapping.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/180_locale_dependent_mapping.yml index 975113953c995..94301b6369b04 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/180_locale_dependent_mapping.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/180_locale_dependent_mapping.yml @@ -1,10 +1,10 @@ --- "Test Index and Search locale dependent mappings / dates": - requires: - test_runner_features: ["allowed_warnings"] + test_runner_features: ["allowed_warnings_regex"] - do: - allowed_warnings: - - "Date format [E, d MMM yyyy HH:mm:ss Z] contains textual field specifiers that could change in JDK 23" + allowed_warnings_regex: + - "Date format \\[E, d MMM yyyy HH:mm:ss Z] contains textual field specifiers that could change in JDK 23.*" indices.create: index: test_index body: diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/330_fetch_fields.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/330_fetch_fields.yml index 2b77b5558b3d3..8a8dffda69e20 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/330_fetch_fields.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/330_fetch_fields.yml @@ -1129,7 +1129,7 @@ fetch geo_point: --- "Test with subobjects: auto": - requires: - cluster_features: "mapper.subobjects_auto_fixes" + cluster_features: "mapper.subobjects_auto" reason: requires support for subobjects auto setting - do: diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/540_ignore_above_synthetic_source.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/540_ignore_above_synthetic_source.yml index 11259d3e1bfd1..435cda637cca6 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/540_ignore_above_synthetic_source.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/540_ignore_above_synthetic_source.yml @@ -80,8 +80,7 @@ ignore_above mapping level setting on arrays: match_all: {} - length: { hits.hits: 1 } - #TODO: synthetic source field reconstruction bug (TBD: add link to the issue here) - #- match: { hits.hits.0._source.keyword: ["foo bar", "the quick brown fox"] } + - match: { hits.hits.0._source.keyword: ["foo bar", "the quick brown fox"] } - match: { hits.hits.0._source.flattened.value: [ "jumps over", "the quick brown fox" ] } - match: { hits.hits.0.fields.keyword.0: "foo bar" } - match: { hits.hits.0.fields.flattened.0.value: "jumps over" } diff --git a/server/src/internalClusterTest/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsRemoteIT.java b/server/src/internalClusterTest/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsRemoteIT.java new file mode 100644 index 0000000000000..50fa7cfa1fdef --- /dev/null +++ b/server/src/internalClusterTest/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsRemoteIT.java @@ -0,0 +1,150 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.action.admin.cluster.stats; + +import org.elasticsearch.Version; +import org.elasticsearch.action.search.SearchRequest; +import org.elasticsearch.client.internal.Client; +import org.elasticsearch.cluster.health.ClusterHealthStatus; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.core.TimeValue; +import org.elasticsearch.index.query.MatchAllQueryBuilder; +import org.elasticsearch.search.builder.SearchSourceBuilder; +import org.elasticsearch.test.AbstractMultiClustersTestCase; +import org.elasticsearch.test.ESIntegTestCase.ClusterScope; +import org.elasticsearch.test.ESIntegTestCase.Scope; +import org.elasticsearch.test.InternalTestCluster; +import org.junit.Assert; + +import java.util.Collection; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ExecutionException; + +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertResponse; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.equalToIgnoringCase; +import static org.hamcrest.Matchers.greaterThan; +import static org.hamcrest.Matchers.hasItem; +import static org.hamcrest.Matchers.hasKey; +import static org.hamcrest.Matchers.not; +import static org.hamcrest.Matchers.oneOf; + +@ClusterScope(scope = Scope.TEST, numDataNodes = 0) +public class ClusterStatsRemoteIT extends AbstractMultiClustersTestCase { + private static final String REMOTE1 = "cluster-a"; + private static final String REMOTE2 = "cluster-b"; + + private static final String INDEX_NAME = "demo"; + + @Override + protected boolean reuseClusters() { + return false; + } + + @Override + protected Collection remoteClusterAlias() { + return List.of(REMOTE1, REMOTE2); + } + + @Override + protected Map skipUnavailableForRemoteClusters() { + return Map.of(REMOTE1, false, REMOTE2, true); + } + + public void testRemoteClusterStats() throws ExecutionException, InterruptedException { + setupClusters(); + final Client client = client(LOCAL_CLUSTER); + SearchRequest searchRequest = new SearchRequest("*", "*:*"); + searchRequest.allowPartialSearchResults(false); + searchRequest.setCcsMinimizeRoundtrips(randomBoolean()); + searchRequest.source(new SearchSourceBuilder().query(new MatchAllQueryBuilder()).size(10)); + + // do a search + assertResponse(cluster(LOCAL_CLUSTER).client().search(searchRequest), Assert::assertNotNull); + // collect stats without remotes + ClusterStatsResponse response = client.admin().cluster().prepareClusterStats().get(); + assertNotNull(response.getCcsMetrics()); + var remotesUsage = response.getCcsMetrics().getByRemoteCluster(); + assertThat(remotesUsage.size(), equalTo(3)); + assertNull(response.getRemoteClustersStats()); + // collect stats with remotes + response = client.admin().cluster().execute(TransportClusterStatsAction.TYPE, new ClusterStatsRequest(true)).get(); + assertNotNull(response.getCcsMetrics()); + remotesUsage = response.getCcsMetrics().getByRemoteCluster(); + assertThat(remotesUsage.size(), equalTo(3)); + assertNotNull(response.getRemoteClustersStats()); + var remoteStats = response.getRemoteClustersStats(); + assertThat(remoteStats.size(), equalTo(2)); + for (String clusterAlias : remoteClusterAlias()) { + assertThat(remoteStats, hasKey(clusterAlias)); + assertThat(remotesUsage, hasKey(clusterAlias)); + assertThat(remoteStats.get(clusterAlias).status(), equalToIgnoringCase(ClusterHealthStatus.GREEN.name())); + assertThat(remoteStats.get(clusterAlias).indicesCount(), greaterThan(0L)); + assertThat(remoteStats.get(clusterAlias).nodesCount(), greaterThan(0L)); + assertThat(remoteStats.get(clusterAlias).shardsCount(), greaterThan(0L)); + assertThat(remoteStats.get(clusterAlias).heapBytes(), greaterThan(0L)); + assertThat(remoteStats.get(clusterAlias).memBytes(), greaterThan(0L)); + assertThat(remoteStats.get(clusterAlias).indicesBytes(), greaterThan(0L)); + assertThat(remoteStats.get(clusterAlias).versions(), hasItem(Version.CURRENT.toString())); + assertThat(remoteStats.get(clusterAlias).clusterUUID(), not(equalTo(""))); + assertThat(remoteStats.get(clusterAlias).mode(), oneOf("sniff", "proxy")); + } + assertFalse(remoteStats.get(REMOTE1).skipUnavailable()); + assertTrue(remoteStats.get(REMOTE2).skipUnavailable()); + } + + private void setupClusters() { + int numShardsLocal = randomIntBetween(2, 5); + Settings localSettings = indexSettings(numShardsLocal, randomIntBetween(0, 1)).build(); + assertAcked( + client(LOCAL_CLUSTER).admin() + .indices() + .prepareCreate(INDEX_NAME) + .setSettings(localSettings) + .setMapping("@timestamp", "type=date", "f", "type=text") + ); + indexDocs(client(LOCAL_CLUSTER)); + + int numShardsRemote = randomIntBetween(2, 10); + for (String clusterAlias : remoteClusterAlias()) { + final InternalTestCluster remoteCluster = cluster(clusterAlias); + remoteCluster.ensureAtLeastNumDataNodes(randomIntBetween(1, 3)); + assertAcked( + client(clusterAlias).admin() + .indices() + .prepareCreate(INDEX_NAME) + .setSettings(indexSettings(numShardsRemote, randomIntBetween(0, 1))) + .setMapping("@timestamp", "type=date", "f", "type=text") + ); + assertFalse( + client(clusterAlias).admin() + .cluster() + .prepareHealth(TEST_REQUEST_TIMEOUT, INDEX_NAME) + .setWaitForGreenStatus() + .setTimeout(TimeValue.timeValueSeconds(30)) + .get() + .isTimedOut() + ); + indexDocs(client(clusterAlias)); + } + + } + + private void indexDocs(Client client) { + int numDocs = between(5, 20); + for (int i = 0; i < numDocs; i++) { + client.prepareIndex(INDEX_NAME).setSource("f", "v", "@timestamp", randomNonNegativeLong()).get(); + } + client.admin().indices().prepareRefresh(INDEX_NAME).get(); + } + +} diff --git a/server/src/internalClusterTest/java/org/elasticsearch/cluster/shards/ClusterSearchShardsIT.java b/server/src/internalClusterTest/java/org/elasticsearch/cluster/shards/ClusterSearchShardsIT.java index c81b9a82e8e32..99d7b28536f7a 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/cluster/shards/ClusterSearchShardsIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/cluster/shards/ClusterSearchShardsIT.java @@ -8,7 +8,6 @@ */ package org.elasticsearch.cluster.shards; -import org.elasticsearch.ExceptionsHelper; import org.elasticsearch.action.admin.cluster.shards.ClusterSearchShardsGroup; import org.elasticsearch.action.admin.cluster.shards.ClusterSearchShardsRequest; import org.elasticsearch.action.admin.cluster.shards.ClusterSearchShardsResponse; @@ -145,17 +144,13 @@ public void testClusterSearchShardsWithBlocks() { enableIndexBlock("test-blocks", SETTING_BLOCKS_METADATA); assertBlocked( null, - asInstanceOf( + safeAwaitAndUnwrapFailure( ClusterBlockException.class, - ExceptionsHelper.unwrapCause( - safeAwaitFailure( - ClusterSearchShardsResponse.class, - l -> client().execute( - TransportClusterSearchShardsAction.TYPE, - new ClusterSearchShardsRequest(TEST_REQUEST_TIMEOUT, "test-blocks"), - l - ) - ) + ClusterSearchShardsResponse.class, + l -> client().execute( + TransportClusterSearchShardsAction.TYPE, + new ClusterSearchShardsRequest(TEST_REQUEST_TIMEOUT, "test-blocks"), + l ) ) ); diff --git a/server/src/internalClusterTest/java/org/elasticsearch/features/ClusterFeaturesIT.java b/server/src/internalClusterTest/java/org/elasticsearch/features/ClusterFeaturesIT.java index 48009676ee6b7..74fd945ed3779 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/features/ClusterFeaturesIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/features/ClusterFeaturesIT.java @@ -31,6 +31,7 @@ public void testClusterHasFeatures() { FeatureService service = internalCluster().getCurrentMasterNodeInstance(FeatureService.class); assertThat(service.getNodeFeatures(), hasKey(FeatureService.FEATURES_SUPPORTED.id())); + assertThat(service.getNodeFeatures(), hasKey(FeatureService.TEST_FEATURES_ENABLED.id())); // check the nodes all have a feature in their cluster state (there should always be features_supported) var response = clusterAdmin().state(new ClusterStateRequest(TEST_REQUEST_TIMEOUT).clear().nodes(true)).actionGet(); diff --git a/server/src/internalClusterTest/java/org/elasticsearch/ingest/IngestProcessorNotInstalledOnAllNodesIT.java b/server/src/internalClusterTest/java/org/elasticsearch/ingest/IngestProcessorNotInstalledOnAllNodesIT.java index 5e728bede4dbe..6ebb5f5287e1b 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/ingest/IngestProcessorNotInstalledOnAllNodesIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/ingest/IngestProcessorNotInstalledOnAllNodesIT.java @@ -10,7 +10,6 @@ package org.elasticsearch.ingest; import org.elasticsearch.ElasticsearchParseException; -import org.elasticsearch.ExceptionsHelper; import org.elasticsearch.action.ingest.PutPipelineTransportAction; import org.elasticsearch.action.support.master.AcknowledgedResponse; import org.elasticsearch.common.bytes.BytesReference; @@ -61,17 +60,13 @@ public void testFailPipelineCreation() { ensureStableCluster(2, node2); assertThat( - asInstanceOf( + safeAwaitAndUnwrapFailure( ElasticsearchParseException.class, - ExceptionsHelper.unwrapCause( - safeAwaitFailure( - AcknowledgedResponse.class, - l -> client().execute( - PutPipelineTransportAction.TYPE, - IngestPipelineTestUtils.putJsonPipelineRequest("id", pipelineSource), - l - ) - ) + AcknowledgedResponse.class, + l -> client().execute( + PutPipelineTransportAction.TYPE, + IngestPipelineTestUtils.putJsonPipelineRequest("id", pipelineSource), + l ) ).getMessage(), containsString("Processor type [test] is not installed on node") @@ -84,17 +79,13 @@ public void testFailPipelineCreationProcessorNotInstalledOnMasterNode() throws E internalCluster().startNode(); assertThat( - asInstanceOf( + safeAwaitAndUnwrapFailure( ElasticsearchParseException.class, - ExceptionsHelper.unwrapCause( - safeAwaitFailure( - AcknowledgedResponse.class, - l -> client().execute( - PutPipelineTransportAction.TYPE, - IngestPipelineTestUtils.putJsonPipelineRequest("id", pipelineSource), - l - ) - ) + AcknowledgedResponse.class, + l -> client().execute( + PutPipelineTransportAction.TYPE, + IngestPipelineTestUtils.putJsonPipelineRequest("id", pipelineSource), + l ) ).getMessage(), equalTo("No processor type exists with name [test]") diff --git a/server/src/internalClusterTest/java/org/elasticsearch/monitor/metrics/IndicesMetricsIT.java b/server/src/internalClusterTest/java/org/elasticsearch/monitor/metrics/IndicesMetricsIT.java new file mode 100644 index 0000000000000..b72257b884f08 --- /dev/null +++ b/server/src/internalClusterTest/java/org/elasticsearch/monitor/metrics/IndicesMetricsIT.java @@ -0,0 +1,245 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.monitor.metrics; + +import org.elasticsearch.common.settings.Setting; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.core.TimeValue; +import org.elasticsearch.plugins.Plugin; +import org.elasticsearch.plugins.PluginsService; +import org.elasticsearch.telemetry.Measurement; +import org.elasticsearch.telemetry.TestTelemetryPlugin; +import org.elasticsearch.test.ESIntegTestCase; +import org.hamcrest.Matcher; + +import java.util.Collection; +import java.util.List; +import java.util.Map; + +import static org.elasticsearch.index.mapper.DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.greaterThan; +import static org.hamcrest.Matchers.hasSize; + +@ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST, numDataNodes = 0, numClientNodes = 0) +public class IndicesMetricsIT extends ESIntegTestCase { + + public static class TestAPMInternalSettings extends Plugin { + @Override + public List> getSettings() { + return List.of( + Setting.timeSetting("telemetry.agent.metrics_interval", TimeValue.timeValueSeconds(0), Setting.Property.NodeScope) + ); + } + } + + @Override + protected Collection> nodePlugins() { + return List.of(TestTelemetryPlugin.class, TestAPMInternalSettings.class); + } + + @Override + protected Settings nodeSettings(int nodeOrdinal, Settings otherSettings) { + return Settings.builder() + .put(super.nodeSettings(nodeOrdinal, otherSettings)) + .put("telemetry.agent.metrics_interval", TimeValue.timeValueSeconds(0)) // disable metrics cache refresh delay + .build(); + } + + static final String STANDARD_INDEX_COUNT = "es.indices.standard.total"; + static final String STANDARD_DOCS_COUNT = "es.indices.standard.docs.total"; + static final String STANDARD_BYTES_SIZE = "es.indices.standard.bytes.total"; + + static final String TIME_SERIES_INDEX_COUNT = "es.indices.time_series.total"; + static final String TIME_SERIES_DOCS_COUNT = "es.indices.time_series.docs.total"; + static final String TIME_SERIES_BYTES_SIZE = "es.indices.time_series.bytes.total"; + + static final String LOGSDB_INDEX_COUNT = "es.indices.logsdb.total"; + static final String LOGSDB_DOCS_COUNT = "es.indices.logsdb.docs.total"; + static final String LOGSDB_BYTES_SIZE = "es.indices.logsdb.bytes.total"; + + public void testIndicesMetrics() { + String node = internalCluster().startNode(); + ensureStableCluster(1); + final TestTelemetryPlugin telemetry = internalCluster().getInstance(PluginsService.class, node) + .filterPlugins(TestTelemetryPlugin.class) + .findFirst() + .orElseThrow(); + telemetry.resetMeter(); + long numStandardIndices = randomIntBetween(1, 5); + long numStandardDocs = populateStandardIndices(numStandardIndices); + collectThenAssertMetrics( + telemetry, + 1, + Map.of( + STANDARD_INDEX_COUNT, + equalTo(numStandardIndices), + STANDARD_DOCS_COUNT, + equalTo(numStandardDocs), + STANDARD_BYTES_SIZE, + greaterThan(0L), + + TIME_SERIES_INDEX_COUNT, + equalTo(0L), + TIME_SERIES_DOCS_COUNT, + equalTo(0L), + TIME_SERIES_BYTES_SIZE, + equalTo(0L), + + LOGSDB_INDEX_COUNT, + equalTo(0L), + LOGSDB_DOCS_COUNT, + equalTo(0L), + LOGSDB_BYTES_SIZE, + equalTo(0L) + ) + ); + + long numTimeSeriesIndices = randomIntBetween(1, 5); + long numTimeSeriesDocs = populateTimeSeriesIndices(numTimeSeriesIndices); + collectThenAssertMetrics( + telemetry, + 2, + Map.of( + STANDARD_INDEX_COUNT, + equalTo(numStandardIndices), + STANDARD_DOCS_COUNT, + equalTo(numStandardDocs), + STANDARD_BYTES_SIZE, + greaterThan(0L), + + TIME_SERIES_INDEX_COUNT, + equalTo(numTimeSeriesIndices), + TIME_SERIES_DOCS_COUNT, + equalTo(numTimeSeriesDocs), + TIME_SERIES_BYTES_SIZE, + greaterThan(20L), + + LOGSDB_INDEX_COUNT, + equalTo(0L), + LOGSDB_DOCS_COUNT, + equalTo(0L), + LOGSDB_BYTES_SIZE, + equalTo(0L) + ) + ); + + long numLogsdbIndices = randomIntBetween(1, 5); + long numLogsdbDocs = populateLogsdbIndices(numLogsdbIndices); + collectThenAssertMetrics( + telemetry, + 3, + Map.of( + STANDARD_INDEX_COUNT, + equalTo(numStandardIndices), + STANDARD_DOCS_COUNT, + equalTo(numStandardDocs), + STANDARD_BYTES_SIZE, + greaterThan(0L), + + TIME_SERIES_INDEX_COUNT, + equalTo(numTimeSeriesIndices), + TIME_SERIES_DOCS_COUNT, + equalTo(numTimeSeriesDocs), + TIME_SERIES_BYTES_SIZE, + greaterThan(20L), + + LOGSDB_INDEX_COUNT, + equalTo(numLogsdbIndices), + LOGSDB_DOCS_COUNT, + equalTo(numLogsdbDocs), + LOGSDB_BYTES_SIZE, + greaterThan(0L) + ) + ); + } + + void collectThenAssertMetrics(TestTelemetryPlugin telemetry, int times, Map> matchers) { + telemetry.collect(); + for (Map.Entry> e : matchers.entrySet()) { + String name = e.getKey(); + List measurements = telemetry.getLongGaugeMeasurement(name); + assertThat(name, measurements, hasSize(times)); + assertThat(name, measurements.getLast().getLong(), e.getValue()); + } + } + + int populateStandardIndices(long numIndices) { + int totalDocs = 0; + for (int i = 0; i < numIndices; i++) { + String indexName = "standard-" + i; + createIndex(indexName); + int numDocs = between(1, 5); + for (int d = 0; d < numDocs; d++) { + indexDoc(indexName, Integer.toString(d), "f", Integer.toString(d)); + } + totalDocs += numDocs; + flush(indexName); + } + return totalDocs; + } + + int populateTimeSeriesIndices(long numIndices) { + int totalDocs = 0; + for (int i = 0; i < numIndices; i++) { + String indexName = "time_series-" + i; + Settings settings = Settings.builder().put("mode", "time_series").putList("routing_path", List.of("host")).build(); + client().admin() + .indices() + .prepareCreate(indexName) + .setSettings(settings) + .setMapping( + "@timestamp", + "type=date", + "host", + "type=keyword,time_series_dimension=true", + "cpu", + "type=long,time_series_metric=gauge" + ) + .get(); + long timestamp = DEFAULT_DATE_TIME_FORMATTER.parseMillis("2024-04-15T00:00:00Z"); + int numDocs = between(1, 5); + for (int d = 0; d < numDocs; d++) { + timestamp += between(1, 5) * 1000L; + client().prepareIndex(indexName) + .setSource("@timestamp", timestamp, "host", randomFrom("prod", "qa"), "cpu", randomIntBetween(1, 100)) + .get(); + } + totalDocs += numDocs; + flush(indexName); + } + return totalDocs; + } + + int populateLogsdbIndices(long numIndices) { + int totalDocs = 0; + for (int i = 0; i < numIndices; i++) { + String indexName = "logsdb-" + i; + Settings settings = Settings.builder().put("mode", "logsdb").build(); + client().admin() + .indices() + .prepareCreate(indexName) + .setSettings(settings) + .setMapping("@timestamp", "type=date", "host.name", "type=keyword", "cpu", "type=long") + .get(); + long timestamp = DEFAULT_DATE_TIME_FORMATTER.parseMillis("2024-04-15T00:00:00Z"); + int numDocs = between(1, 5); + for (int d = 0; d < numDocs; d++) { + timestamp += between(1, 5) * 1000L; + client().prepareIndex(indexName) + .setSource("@timestamp", timestamp, "host.name", randomFrom("prod", "qa"), "cpu", randomIntBetween(1, 100)) + .get(); + } + totalDocs += numDocs; + flush(indexName); + } + return totalDocs; + } +} diff --git a/server/src/internalClusterTest/java/org/elasticsearch/script/StoredScriptsIT.java b/server/src/internalClusterTest/java/org/elasticsearch/script/StoredScriptsIT.java index d68aae26560bf..e9efab5934e52 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/script/StoredScriptsIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/script/StoredScriptsIT.java @@ -8,7 +8,6 @@ */ package org.elasticsearch.script; -import org.elasticsearch.ExceptionsHelper; import org.elasticsearch.action.admin.cluster.storedscripts.DeleteStoredScriptRequest; import org.elasticsearch.action.admin.cluster.storedscripts.GetStoredScriptAction; import org.elasticsearch.action.admin.cluster.storedscripts.GetStoredScriptRequest; @@ -71,20 +70,16 @@ public void testBasics() { assertEquals( "Validation Failed: 1: id cannot contain '#' for stored script;", - asInstanceOf( + safeAwaitAndUnwrapFailure( IllegalArgumentException.class, - ExceptionsHelper.unwrapCause( - safeAwaitFailure( - AcknowledgedResponse.class, - l -> client().execute( - TransportPutStoredScriptAction.TYPE, - new PutStoredScriptRequest(TEST_REQUEST_TIMEOUT, TEST_REQUEST_TIMEOUT).id("id#") - .content(new BytesArray(Strings.format(""" - {"script": {"lang": "%s", "source": "1"} } - """, LANG)), XContentType.JSON), - l - ) - ) + AcknowledgedResponse.class, + l -> client().execute( + TransportPutStoredScriptAction.TYPE, + new PutStoredScriptRequest(TEST_REQUEST_TIMEOUT, TEST_REQUEST_TIMEOUT).id("id#") + .content(new BytesArray(Strings.format(""" + {"script": {"lang": "%s", "source": "1"} } + """, LANG)), XContentType.JSON), + l ) ).getMessage() ); @@ -93,21 +88,16 @@ public void testBasics() { public void testMaxScriptSize() { assertEquals( "exceeded max allowed stored script size in bytes [64] with size [65] for script [foobar]", - asInstanceOf( + safeAwaitAndUnwrapFailure( IllegalArgumentException.class, - ExceptionsHelper.unwrapCause( - safeAwaitFailure( - AcknowledgedResponse.class, - l -> client().execute( - TransportPutStoredScriptAction.TYPE, - new PutStoredScriptRequest(TEST_REQUEST_TIMEOUT, TEST_REQUEST_TIMEOUT).id("foobar") - .content(new BytesArray(Strings.format(""" - {"script": { "lang": "%s", "source":"0123456789abcdef"} }\ - """, LANG)), XContentType.JSON), - l - ) - - ) + AcknowledgedResponse.class, + l -> client().execute( + TransportPutStoredScriptAction.TYPE, + new PutStoredScriptRequest(TEST_REQUEST_TIMEOUT, TEST_REQUEST_TIMEOUT).id("foobar") + .content(new BytesArray(Strings.format(""" + {"script": { "lang": "%s", "source":"0123456789abcdef"} }\ + """, LANG)), XContentType.JSON), + l ) ).getMessage() ); diff --git a/server/src/internalClusterTest/java/org/elasticsearch/search/aggregations/metrics/MedianAbsoluteDeviationIT.java b/server/src/internalClusterTest/java/org/elasticsearch/search/aggregations/metrics/MedianAbsoluteDeviationIT.java index 44f0ab4fb22a1..1232a61fac2cb 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/search/aggregations/metrics/MedianAbsoluteDeviationIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/search/aggregations/metrics/MedianAbsoluteDeviationIT.java @@ -129,7 +129,7 @@ protected Collection> nodePlugins() { private static MedianAbsoluteDeviationAggregationBuilder randomBuilder() { final MedianAbsoluteDeviationAggregationBuilder builder = new MedianAbsoluteDeviationAggregationBuilder("mad"); if (randomBoolean()) { - builder.compression(randomDoubleBetween(25, 1000, false)); + builder.compression(randomDoubleBetween(30, 1000, false)); } return builder; } diff --git a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CorruptedBlobStoreRepositoryIT.java b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CorruptedBlobStoreRepositoryIT.java index 2206f34e4d2f3..eea60ce13af2f 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CorruptedBlobStoreRepositoryIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CorruptedBlobStoreRepositoryIT.java @@ -382,9 +382,10 @@ public void testMountCorruptedRepositoryData() throws Exception { Files.write(repo.resolve(getRepositoryDataBlobName(repositoryData.getGenId())), randomByteArrayOfLength(randomIntBetween(1, 100))); logger.info("--> verify loading repository data throws RepositoryException"); - asInstanceOf( + safeAwaitFailure( RepositoryException.class, - safeAwaitFailure(RepositoryData.class, l -> repository.getRepositoryData(EsExecutors.DIRECT_EXECUTOR_SERVICE, l)) + RepositoryData.class, + l -> repository.getRepositoryData(EsExecutors.DIRECT_EXECUTOR_SERVICE, l) ); final String otherRepoName = "other-repo"; @@ -397,9 +398,10 @@ public void testMountCorruptedRepositoryData() throws Exception { final Repository otherRepo = getRepositoryOnMaster(otherRepoName); logger.info("--> verify loading repository data from newly mounted repository throws RepositoryException"); - asInstanceOf( + safeAwaitFailure( RepositoryException.class, - safeAwaitFailure(RepositoryData.class, l -> repository.getRepositoryData(EsExecutors.DIRECT_EXECUTOR_SERVICE, l)) + RepositoryData.class, + l -> repository.getRepositoryData(EsExecutors.DIRECT_EXECUTOR_SERVICE, l) ); } diff --git a/server/src/main/java/org/elasticsearch/TransportVersions.java b/server/src/main/java/org/elasticsearch/TransportVersions.java index 2d0f526f64a69..3b7cc05e54351 100644 --- a/server/src/main/java/org/elasticsearch/TransportVersions.java +++ b/server/src/main/java/org/elasticsearch/TransportVersions.java @@ -225,6 +225,10 @@ static TransportVersion def(int id) { public static final TransportVersion ILM_ADD_SEARCHABLE_SNAPSHOT_TOTAL_SHARDS_PER_NODE = def(8_749_00_0); public static final TransportVersion SEMANTIC_TEXT_SEARCH_INFERENCE_ID = def(8_750_00_0); public static final TransportVersion ML_INFERENCE_CHUNKING_SETTINGS = def(8_751_00_0); + public static final TransportVersion SEMANTIC_QUERY_INNER_HITS = def(8_752_00_0); + public static final TransportVersion RETAIN_ILM_STEP_INFO = def(8_753_00_0); + public static final TransportVersion ADD_DATA_STREAM_OPTIONS = def(8_754_00_0); + public static final TransportVersion CCS_REMOTE_TELEMETRY_STATS = def(8_755_00_0); /* * STOP! READ THIS FIRST! No, really, diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/node/capabilities/TransportNodesCapabilitiesAction.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/node/capabilities/TransportNodesCapabilitiesAction.java index 8df34d882941a..fb7539ea218d1 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/node/capabilities/TransportNodesCapabilitiesAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/node/capabilities/TransportNodesCapabilitiesAction.java @@ -19,6 +19,7 @@ import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.core.RestApiVersion; +import org.elasticsearch.core.UpdateForV9; import org.elasticsearch.features.FeatureService; import org.elasticsearch.injection.guice.Inject; import org.elasticsearch.rest.RestController; @@ -151,6 +152,10 @@ public NodeCapabilitiesRequest( this.restApiVersion = restApiVersion; } + @UpdateForV9 // 8.x blows up in a mixed cluster when trying to read RestApiVersion.forMajor(9) + // ./gradlew ":qa:mixed-cluster:v8.16.0#mixedClusterTest" + // -Dtests.class="org.elasticsearch.backwards.MixedClusterClientYamlTestSuiteIT" + // -Dtests.method="test {p0=capabilities/10_basic/Capabilities API}" @Override public void writeTo(StreamOutput out) throws IOException { super.writeTo(out); @@ -159,7 +164,9 @@ public void writeTo(StreamOutput out) throws IOException { out.writeString(path); out.writeCollection(parameters, StreamOutput::writeString); out.writeCollection(capabilities, StreamOutput::writeString); - out.writeVInt(restApiVersion.major); + // Fixme: lies! all lies! + out.writeVInt(8); + // out.writeVInt(restApiVersion.major); } } } diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/settings/RestClusterGetSettingsResponse.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/settings/RestClusterGetSettingsResponse.java index 983aec7173776..a9badf4694e68 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/settings/RestClusterGetSettingsResponse.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/settings/RestClusterGetSettingsResponse.java @@ -11,18 +11,12 @@ import org.elasticsearch.common.Strings; import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.xcontent.ConstructingObjectParser; -import org.elasticsearch.xcontent.ParseField; import org.elasticsearch.xcontent.ToXContentObject; import org.elasticsearch.xcontent.XContentBuilder; -import org.elasticsearch.xcontent.XContentParser; import java.io.IOException; import java.util.Objects; -import static org.elasticsearch.xcontent.ConstructingObjectParser.constructorArg; -import static org.elasticsearch.xcontent.ConstructingObjectParser.optionalConstructorArg; - /** * This response is specific to the REST client. {@link org.elasticsearch.action.admin.cluster.state.ClusterStateResponse} * is used on the transport layer. @@ -33,23 +27,9 @@ public class RestClusterGetSettingsResponse implements ToXContentObject { private final Settings transientSettings; private final Settings defaultSettings; - static final String PERSISTENT_FIELD = "persistent"; - static final String TRANSIENT_FIELD = "transient"; - static final String DEFAULTS_FIELD = "defaults"; - - private static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>( - "cluster_get_settings_response", - true, - a -> { - Settings defaultSettings = a[2] == null ? Settings.EMPTY : (Settings) a[2]; - return new RestClusterGetSettingsResponse((Settings) a[0], (Settings) a[1], defaultSettings); - } - ); - static { - PARSER.declareObject(constructorArg(), (p, c) -> Settings.fromXContent(p), new ParseField(PERSISTENT_FIELD)); - PARSER.declareObject(constructorArg(), (p, c) -> Settings.fromXContent(p), new ParseField(TRANSIENT_FIELD)); - PARSER.declareObject(optionalConstructorArg(), (p, c) -> Settings.fromXContent(p), new ParseField(DEFAULTS_FIELD)); - } + public static final String PERSISTENT_FIELD = "persistent"; + public static final String TRANSIENT_FIELD = "transient"; + public static final String DEFAULTS_FIELD = "defaults"; public RestClusterGetSettingsResponse(Settings persistentSettings, Settings transientSettings, Settings defaultSettings) { this.persistentSettings = Objects.requireNonNullElse(persistentSettings, Settings.EMPTY); @@ -120,10 +100,6 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws return builder; } - public static RestClusterGetSettingsResponse fromXContent(XContentParser parser) { - return PARSER.apply(parser, null); - } - @Override public boolean equals(Object o) { if (this == o) return true; diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsRequest.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsRequest.java index d9c55ba097b6c..a62db92687e5a 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsRequest.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsRequest.java @@ -20,16 +20,50 @@ * A request to get cluster level stats. */ public class ClusterStatsRequest extends BaseNodesRequest { + /** + * Should the remote cluster stats be included in the response. + */ + private final boolean doRemotes; + /** + * Return stripped down stats for remote clusters. + */ + private boolean remoteStats; + /** * Get stats from nodes based on the nodes ids specified. If none are passed, stats * based on all nodes will be returned. */ public ClusterStatsRequest(String... nodesIds) { + this(false, nodesIds); + } + + public ClusterStatsRequest(boolean doRemotes, String... nodesIds) { super(nodesIds); + this.doRemotes = doRemotes; + this.remoteStats = false; } @Override public Task createTask(long id, String type, String action, TaskId parentTaskId, Map headers) { return new CancellableTask(id, type, action, "", parentTaskId, headers); } + + public ClusterStatsRequest asRemoteStats() { + this.remoteStats = true; + return this; + } + + /** + * Should the remote cluster stats be included in the response. + */ + public boolean doRemotes() { + return doRemotes; + } + + /** + * Should the response be a stripped down version of the stats for remote clusters. + */ + public boolean isRemoteStats() { + return remoteStats; + } } diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsResponse.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsResponse.java index 86900f830f4be..1a77a3d4d5399 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsResponse.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsResponse.java @@ -18,12 +18,15 @@ import org.elasticsearch.common.Strings; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.unit.ByteSizeValue; import org.elasticsearch.xcontent.ToXContentFragment; import org.elasticsearch.xcontent.XContentBuilder; import java.io.IOException; import java.util.List; import java.util.Locale; +import java.util.Map; +import java.util.Set; import static org.elasticsearch.action.search.TransportSearchAction.CCS_TELEMETRY_FEATURE_FLAG; @@ -34,10 +37,10 @@ public class ClusterStatsResponse extends BaseNodesResponse remoteClustersStats; public ClusterStatsResponse( long timestamp, @@ -48,7 +51,8 @@ public ClusterStatsResponse( MappingStats mappingStats, AnalysisStats analysisStats, VersionStats versionStats, - ClusterSnapshotStats clusterSnapshotStats + ClusterSnapshotStats clusterSnapshotStats, + Map remoteClustersStats ) { super(clusterName, nodes, failures); this.clusterUUID = clusterUUID; @@ -75,6 +79,7 @@ public ClusterStatsResponse( // stats should be the same on every node so just pick one of them .findAny() .orElse(RepositoryUsageStats.EMPTY); + this.remoteClustersStats = remoteClustersStats; } public String getClusterUUID() { @@ -101,6 +106,10 @@ public CCSTelemetrySnapshot getCcsMetrics() { return ccsMetrics; } + public Map getRemoteClustersStats() { + return remoteClustersStats; + } + @Override public void writeTo(StreamOutput out) throws IOException { TransportAction.localOnly(); @@ -138,6 +147,9 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws if (CCS_TELEMETRY_FEATURE_FLAG.isEnabled()) { builder.startObject("ccs"); + if (remoteClustersStats != null) { + builder.field("clusters", remoteClustersStats); + } ccsMetrics.toXContent(builder, params); builder.endObject(); } @@ -150,4 +162,74 @@ public String toString() { return Strings.toString(this, true, true); } + /** + * Represents the information about a remote cluster. + */ + public record RemoteClusterStats( + String clusterUUID, + String mode, + boolean skipUnavailable, + String transportCompress, + Set versions, + String status, + long nodesCount, + long shardsCount, + long indicesCount, + long indicesBytes, + long heapBytes, + long memBytes + ) implements ToXContentFragment { + public RemoteClusterStats(String mode, boolean skipUnavailable, String transportCompress) { + this( + "unavailable", + mode, + skipUnavailable, + transportCompress.toLowerCase(Locale.ROOT), + Set.of(), + "unavailable", + 0, + 0, + 0, + 0, + 0, + 0 + ); + } + + public RemoteClusterStats acceptResponse(RemoteClusterStatsResponse remoteResponse) { + return new RemoteClusterStats( + remoteResponse.getClusterUUID(), + mode, + skipUnavailable, + transportCompress, + remoteResponse.getVersions(), + remoteResponse.getStatus().name().toLowerCase(Locale.ROOT), + remoteResponse.getNodesCount(), + remoteResponse.getShardsCount(), + remoteResponse.getIndicesCount(), + remoteResponse.getIndicesBytes(), + remoteResponse.getHeapBytes(), + remoteResponse.getMemBytes() + ); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + builder.field("cluster_uuid", clusterUUID); + builder.field("mode", mode); + builder.field("skip_unavailable", skipUnavailable); + builder.field("transport.compress", transportCompress); + builder.field("status", status); + builder.field("version", versions); + builder.field("nodes_count", nodesCount); + builder.field("shards_count", shardsCount); + builder.field("indices_count", indicesCount); + builder.humanReadableField("indices_total_size_in_bytes", "indices_total_size", ByteSizeValue.ofBytes(indicesBytes)); + builder.humanReadableField("max_heap_in_bytes", "max_heap", ByteSizeValue.ofBytes(heapBytes)); + builder.humanReadableField("mem_total_in_bytes", "mem_total", ByteSizeValue.ofBytes(memBytes)); + builder.endObject(); + return builder; + } + } } diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/RemoteClusterStatsRequest.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/RemoteClusterStatsRequest.java new file mode 100644 index 0000000000000..47843a91351ee --- /dev/null +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/RemoteClusterStatsRequest.java @@ -0,0 +1,46 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.action.admin.cluster.stats; + +import org.elasticsearch.TransportVersions; +import org.elasticsearch.action.ActionRequest; +import org.elasticsearch.action.ActionRequestValidationException; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; + +import java.io.IOException; + +/** + * A request to get cluster level stats from the remote cluster. + */ +public class RemoteClusterStatsRequest extends ActionRequest { + public RemoteClusterStatsRequest(StreamInput in) throws IOException { + super(in); + } + + public RemoteClusterStatsRequest() { + super(); + } + + @Override + public ActionRequestValidationException validate() { + return null; + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + assert out.getTransportVersion().onOrAfter(TransportVersions.CCS_REMOTE_TELEMETRY_STATS) + : "RemoteClusterStatsRequest is not supported by the remote cluster"; + if (out.getTransportVersion().before(TransportVersions.CCS_REMOTE_TELEMETRY_STATS)) { + throw new UnsupportedOperationException("RemoteClusterStatsRequest is not supported by the remote cluster"); + } + super.writeTo(out); + } +} diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/RemoteClusterStatsResponse.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/RemoteClusterStatsResponse.java new file mode 100644 index 0000000000000..9a140b6b7424e --- /dev/null +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/RemoteClusterStatsResponse.java @@ -0,0 +1,116 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ +package org.elasticsearch.action.admin.cluster.stats; + +import org.elasticsearch.action.ActionResponse; +import org.elasticsearch.cluster.health.ClusterHealthStatus; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; + +import java.io.IOException; +import java.util.Set; + +/** + * Trimmed down cluster stats response for reporting to a remote cluster. + */ +public class RemoteClusterStatsResponse extends ActionResponse { + final String clusterUUID; + final ClusterHealthStatus status; + private final Set versions; + private final long nodesCount; + private final long shardsCount; + private final long indicesCount; + private final long indicesBytes; + private final long heapBytes; + private final long memBytes; + + public Set getVersions() { + return versions; + } + + public long getNodesCount() { + return nodesCount; + } + + public long getShardsCount() { + return shardsCount; + } + + public long getIndicesCount() { + return indicesCount; + } + + public long getIndicesBytes() { + return indicesBytes; + } + + public long getHeapBytes() { + return heapBytes; + } + + public long getMemBytes() { + return memBytes; + } + + public RemoteClusterStatsResponse( + String clusterUUID, + ClusterHealthStatus status, + Set versions, + long nodesCount, + long shardsCount, + long indicesCount, + long indicesBytes, + long heapBytes, + long memBytes + ) { + this.clusterUUID = clusterUUID; + this.status = status; + this.versions = versions; + this.nodesCount = nodesCount; + this.shardsCount = shardsCount; + this.indicesCount = indicesCount; + this.indicesBytes = indicesBytes; + this.heapBytes = heapBytes; + this.memBytes = memBytes; + } + + public String getClusterUUID() { + return this.clusterUUID; + } + + public ClusterHealthStatus getStatus() { + return this.status; + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeString(clusterUUID); + status.writeTo(out); + out.writeStringCollection(versions); + out.writeLong(nodesCount); + out.writeLong(shardsCount); + out.writeLong(indicesCount); + out.writeLong(indicesBytes); + out.writeLong(heapBytes); + out.writeLong(memBytes); + } + + public RemoteClusterStatsResponse(StreamInput in) throws IOException { + super(in); + this.clusterUUID = in.readString(); + this.status = ClusterHealthStatus.readFrom(in); + this.versions = in.readCollectionAsSet(StreamInput::readString); + this.nodesCount = in.readLong(); + this.shardsCount = in.readLong(); + this.indicesCount = in.readLong(); + this.indicesBytes = in.readLong(); + this.heapBytes = in.readLong(); + this.memBytes = in.readLong(); + } +} diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java index 6cac8c8f8ca09..ab68f1d8481fd 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java @@ -9,6 +9,8 @@ package org.elasticsearch.action.admin.cluster.stats; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; import org.apache.lucene.store.AlreadyClosedException; import org.elasticsearch.action.ActionListener; import org.elasticsearch.action.ActionRunnable; @@ -16,10 +18,12 @@ import org.elasticsearch.action.FailedNodeException; import org.elasticsearch.action.admin.cluster.node.info.NodeInfo; import org.elasticsearch.action.admin.cluster.node.stats.NodeStats; +import org.elasticsearch.action.admin.cluster.stats.ClusterStatsResponse.RemoteClusterStats; import org.elasticsearch.action.admin.indices.stats.CommonStats; import org.elasticsearch.action.admin.indices.stats.CommonStatsFlags; import org.elasticsearch.action.admin.indices.stats.ShardStats; import org.elasticsearch.action.support.ActionFilters; +import org.elasticsearch.action.support.CancellableFanOut; import org.elasticsearch.action.support.RefCountingListener; import org.elasticsearch.action.support.SubscribableListener; import org.elasticsearch.action.support.nodes.TransportNodesAction; @@ -32,6 +36,7 @@ import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.util.CancellableSingleObjectCache; import org.elasticsearch.common.util.concurrent.ThreadContext; import org.elasticsearch.core.UpdateForV9; @@ -48,6 +53,9 @@ import org.elasticsearch.tasks.Task; import org.elasticsearch.tasks.TaskId; import org.elasticsearch.threadpool.ThreadPool; +import org.elasticsearch.transport.RemoteClusterConnection; +import org.elasticsearch.transport.RemoteClusterService; +import org.elasticsearch.transport.RemoteConnectionInfo; import org.elasticsearch.transport.TransportRequest; import org.elasticsearch.transport.TransportService; import org.elasticsearch.transport.Transports; @@ -56,12 +64,19 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.Collection; import java.util.List; import java.util.Map; import java.util.concurrent.Executor; import java.util.function.BiFunction; import java.util.function.BooleanSupplier; +import java.util.stream.Collectors; +import static org.elasticsearch.TransportVersions.CCS_REMOTE_TELEMETRY_STATS; + +/** + * Transport action implementing _cluster/stats API. + */ public class TransportClusterStatsAction extends TransportNodesAction< ClusterStatsRequest, ClusterStatsResponse, @@ -70,6 +85,7 @@ public class TransportClusterStatsAction extends TransportNodesAction< SubscribableListener> { public static final ActionType TYPE = new ActionType<>("cluster:monitor/stats"); + private static final CommonStatsFlags SHARD_STATS_FLAGS = new CommonStatsFlags( CommonStatsFlags.Flag.Docs, CommonStatsFlags.Flag.Store, @@ -80,7 +96,9 @@ public class TransportClusterStatsAction extends TransportNodesAction< CommonStatsFlags.Flag.DenseVector, CommonStatsFlags.Flag.SparseVector ); + private static final Logger logger = LogManager.getLogger(TransportClusterStatsAction.class); + private final Settings settings; private final NodeService nodeService; private final IndicesService indicesService; private final RepositoriesService repositoriesService; @@ -90,6 +108,8 @@ public class TransportClusterStatsAction extends TransportNodesAction< private final Executor clusterStateStatsExecutor; private final MetadataStatsCache mappingStatsCache; private final MetadataStatsCache analysisStatsCache; + private final RemoteClusterService remoteClusterService; + private final TransportRemoteClusterStatsAction remoteClusterStatsAction; @Inject public TransportClusterStatsAction( @@ -100,7 +120,9 @@ public TransportClusterStatsAction( IndicesService indicesService, RepositoriesService repositoriesService, UsageService usageService, - ActionFilters actionFilters + ActionFilters actionFilters, + Settings settings, + TransportRemoteClusterStatsAction remoteClusterStatsAction ) { super( TYPE.name(), @@ -118,6 +140,9 @@ public TransportClusterStatsAction( this.clusterStateStatsExecutor = threadPool.executor(ThreadPool.Names.MANAGEMENT); this.mappingStatsCache = new MetadataStatsCache<>(threadPool.getThreadContext(), MappingStats::of); this.analysisStatsCache = new MetadataStatsCache<>(threadPool.getThreadContext(), AnalysisStats::of); + this.remoteClusterService = transportService.getRemoteClusterService(); + this.settings = settings; + this.remoteClusterStatsAction = remoteClusterStatsAction; } @Override @@ -125,14 +150,13 @@ protected SubscribableListener createActionContext(Task task, C assert task instanceof CancellableTask; final var cancellableTask = (CancellableTask) task; final var additionalStatsListener = new SubscribableListener(); - AdditionalStats.compute( - cancellableTask, - clusterStateStatsExecutor, - clusterService, - mappingStatsCache, - analysisStatsCache, - additionalStatsListener - ); + if (request.isRemoteStats() == false) { + final AdditionalStats additionalStats = new AdditionalStats(); + additionalStats.compute(cancellableTask, request, additionalStatsListener); + } else { + // For remote stats request, we don't need to compute anything + additionalStatsListener.onResponse(null); + } return additionalStatsListener; } @@ -150,18 +174,34 @@ protected void newResponseAsync( + "the cluster state that are too slow for a transport thread" ); assert ThreadPool.assertCurrentThreadPool(ThreadPool.Names.MANAGEMENT); + additionalStatsListener.andThenApply( - additionalStats -> new ClusterStatsResponse( - System.currentTimeMillis(), - additionalStats.clusterUUID(), - clusterService.getClusterName(), - responses, - failures, - additionalStats.mappingStats(), - additionalStats.analysisStats(), - VersionStats.of(clusterService.state().metadata(), responses), - additionalStats.clusterSnapshotStats() - ) + additionalStats -> request.isRemoteStats() + // Return stripped down stats for remote clusters + ? new ClusterStatsResponse( + System.currentTimeMillis(), + clusterService.state().metadata().clusterUUID(), + clusterService.getClusterName(), + responses, + List.of(), + null, + null, + null, + null, + Map.of() + ) + : new ClusterStatsResponse( + System.currentTimeMillis(), + additionalStats.clusterUUID(), + clusterService.getClusterName(), + responses, + failures, + additionalStats.mappingStats(), + additionalStats.analysisStats(), + VersionStats.of(clusterService.state().metadata(), responses), + additionalStats.clusterSnapshotStats(), + additionalStats.getRemoteStats() + ) ).addListener(listener); } @@ -315,36 +355,33 @@ protected boolean isFresh(Long currentKey, Long newKey) { } } - public static final class AdditionalStats { + public final class AdditionalStats { private String clusterUUID; private MappingStats mappingStats; private AnalysisStats analysisStats; private ClusterSnapshotStats clusterSnapshotStats; + private Map remoteStats; - static void compute( - CancellableTask task, - Executor executor, - ClusterService clusterService, - MetadataStatsCache mappingStatsCache, - MetadataStatsCache analysisStatsCache, - ActionListener listener - ) { - executor.execute(ActionRunnable.wrap(listener, l -> { + void compute(CancellableTask task, ClusterStatsRequest request, ActionListener listener) { + clusterStateStatsExecutor.execute(ActionRunnable.wrap(listener, l -> { task.ensureNotCancelled(); - final var result = new AdditionalStats(); - result.compute( + internalCompute( + task, + request, clusterService.state(), mappingStatsCache, analysisStatsCache, task::isCancelled, clusterService.threadPool().absoluteTimeInMillis(), - l.map(ignored -> result) + l.map(ignored -> this) ); })); } - private void compute( + private void internalCompute( + CancellableTask task, + ClusterStatsRequest request, ClusterState clusterState, MetadataStatsCache mappingStatsCache, MetadataStatsCache analysisStatsCache, @@ -358,6 +395,18 @@ private void compute( mappingStatsCache.get(metadata, isCancelledSupplier, listeners.acquire(s -> mappingStats = s)); analysisStatsCache.get(metadata, isCancelledSupplier, listeners.acquire(s -> analysisStats = s)); clusterSnapshotStats = ClusterSnapshotStats.of(clusterState, absoluteTimeInMillis); + if (doRemotes(request)) { + var remotes = remoteClusterService.getRegisteredRemoteClusterNames(); + if (remotes.isEmpty()) { + remoteStats = Map.of(); + } else { + new RemoteStatsFanout(task, transportService.getThreadPool().executor(ThreadPool.Names.SEARCH_COORDINATION)).start( + task, + remotes, + listeners.acquire(s -> remoteStats = s) + ); + } + } } } @@ -376,5 +425,79 @@ AnalysisStats analysisStats() { ClusterSnapshotStats clusterSnapshotStats() { return clusterSnapshotStats; } + + public Map getRemoteStats() { + return remoteStats; + } + } + + private static boolean doRemotes(ClusterStatsRequest request) { + return request.doRemotes(); + } + + private class RemoteStatsFanout extends CancellableFanOut> { + private final Executor requestExecutor; + private final TaskId taskId; + private Map remoteClustersStats; + + RemoteStatsFanout(Task task, Executor requestExecutor) { + this.requestExecutor = requestExecutor; + this.taskId = new TaskId(clusterService.getNodeName(), task.getId()); + } + + @Override + protected void sendItemRequest(String clusterAlias, ActionListener listener) { + var remoteClusterClient = remoteClusterService.getRemoteClusterClient( + clusterAlias, + requestExecutor, + RemoteClusterService.DisconnectedStrategy.RECONNECT_IF_DISCONNECTED + ); + var remoteRequest = new RemoteClusterStatsRequest(); + remoteRequest.setParentTask(taskId); + remoteClusterClient.getConnection(remoteRequest, listener.delegateFailureAndWrap((responseListener, connection) -> { + if (connection.getTransportVersion().before(CCS_REMOTE_TELEMETRY_STATS)) { + responseListener.onResponse(null); + } else { + remoteClusterClient.execute(connection, TransportRemoteClusterStatsAction.REMOTE_TYPE, remoteRequest, responseListener); + } + })); + } + + @Override + protected void onItemResponse(String clusterAlias, RemoteClusterStatsResponse response) { + if (response != null) { + remoteClustersStats.computeIfPresent(clusterAlias, (k, v) -> v.acceptResponse(response)); + } + } + + @Override + protected void onItemFailure(String clusterAlias, Exception e) { + logger.warn("Failed to get remote cluster stats for [{}]: {}", clusterAlias, e); + } + + void start(Task task, Collection remotes, ActionListener> listener) { + this.remoteClustersStats = remotes.stream().collect(Collectors.toConcurrentMap(r -> r, this::makeRemoteClusterStats)); + super.run(task, remotes.iterator(), listener); + } + + /** + * Create static portion of RemoteClusterStats for a given cluster alias. + */ + RemoteClusterStats makeRemoteClusterStats(String clusterAlias) { + RemoteClusterConnection remoteConnection = remoteClusterService.getRemoteClusterConnection(clusterAlias); + RemoteConnectionInfo remoteConnectionInfo = remoteConnection.getConnectionInfo(); + var compression = RemoteClusterService.REMOTE_CLUSTER_COMPRESS.getConcreteSettingForNamespace(clusterAlias).get(settings); + return new RemoteClusterStats( + remoteConnectionInfo.getModeInfo().modeName(), + remoteConnection.isSkipUnavailable(), + compression.toString() + ); + } + + @Override + protected Map onCompletion() { + return remoteClustersStats; + } } + } diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportRemoteClusterStatsAction.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportRemoteClusterStatsAction.java new file mode 100644 index 0000000000000..4d57f10807af6 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportRemoteClusterStatsAction.java @@ -0,0 +1,65 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.action.admin.cluster.stats; + +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.action.ActionType; +import org.elasticsearch.action.RemoteClusterActionType; +import org.elasticsearch.action.support.ActionFilters; +import org.elasticsearch.action.support.HandledTransportAction; +import org.elasticsearch.client.internal.node.NodeClient; +import org.elasticsearch.common.util.concurrent.EsExecutors; +import org.elasticsearch.injection.guice.Inject; +import org.elasticsearch.tasks.Task; +import org.elasticsearch.transport.TransportService; + +/** + * Handler action for incoming {@link RemoteClusterStatsRequest}. + * Will pass the work to {@link TransportClusterStatsAction} and return the response. + */ +public class TransportRemoteClusterStatsAction extends HandledTransportAction { + + public static final String NAME = "cluster:monitor/stats/remote"; + public static final ActionType TYPE = new ActionType<>(NAME); + public static final RemoteClusterActionType REMOTE_TYPE = new RemoteClusterActionType<>( + NAME, + RemoteClusterStatsResponse::new + ); + private final NodeClient client; + + @Inject + public TransportRemoteClusterStatsAction(NodeClient client, TransportService transportService, ActionFilters actionFilters) { + super(NAME, transportService, actionFilters, RemoteClusterStatsRequest::new, EsExecutors.DIRECT_EXECUTOR_SERVICE); + this.client = client; + } + + @Override + protected void doExecute(Task task, RemoteClusterStatsRequest request, ActionListener listener) { + ClusterStatsRequest subRequest = new ClusterStatsRequest().asRemoteStats(); + subRequest.setParentTask(request.getParentTask()); + client.execute( + TransportClusterStatsAction.TYPE, + subRequest, + listener.map( + clusterStatsResponse -> new RemoteClusterStatsResponse( + clusterStatsResponse.getClusterUUID(), + clusterStatsResponse.getStatus(), + clusterStatsResponse.getNodesStats().getVersions(), + clusterStatsResponse.getNodesStats().getCounts().getTotal(), + clusterStatsResponse.getIndicesStats().getShards().getTotal(), + clusterStatsResponse.getIndicesStats().getIndexCount(), + clusterStatsResponse.getIndicesStats().getStore().sizeInBytes(), + clusterStatsResponse.getNodesStats().getJvm().getHeapMax().getBytes(), + clusterStatsResponse.getNodesStats().getOs().getMem().getTotal().getBytes() + ) + ) + ); + } +} diff --git a/server/src/main/java/org/elasticsearch/action/admin/indices/create/CreateIndexResponse.java b/server/src/main/java/org/elasticsearch/action/admin/indices/create/CreateIndexResponse.java index a17c998230a31..81b0ad6934ebb 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/indices/create/CreateIndexResponse.java +++ b/server/src/main/java/org/elasticsearch/action/admin/indices/create/CreateIndexResponse.java @@ -12,38 +12,18 @@ import org.elasticsearch.action.support.master.ShardsAcknowledgedResponse; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; -import org.elasticsearch.xcontent.ConstructingObjectParser; -import org.elasticsearch.xcontent.ObjectParser; import org.elasticsearch.xcontent.ParseField; import org.elasticsearch.xcontent.XContentBuilder; -import org.elasticsearch.xcontent.XContentParser; import java.io.IOException; import java.util.Objects; -import static org.elasticsearch.xcontent.ConstructingObjectParser.constructorArg; - /** * A response for a create index action. */ public class CreateIndexResponse extends ShardsAcknowledgedResponse { - private static final ParseField INDEX = new ParseField("index"); - - private static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>( - "create_index", - true, - args -> new CreateIndexResponse((boolean) args[0], (boolean) args[1], (String) args[2]) - ); - - static { - declareFields(PARSER); - } - - protected static void declareFields(ConstructingObjectParser objectParser) { - declareAcknowledgedAndShardsAcknowledgedFields(objectParser); - objectParser.declareField(constructorArg(), (parser, context) -> parser.textOrNull(), INDEX, ObjectParser.ValueType.STRING); - } + public static final ParseField INDEX = new ParseField("index"); private final String index; @@ -74,10 +54,6 @@ protected void addCustomFields(XContentBuilder builder, Params params) throws IO builder.field(INDEX.getPreferredName(), index()); } - public static CreateIndexResponse fromXContent(XContentParser parser) { - return PARSER.apply(parser, null); - } - @Override public boolean equals(Object o) { if (super.equals(o)) { diff --git a/server/src/main/java/org/elasticsearch/action/admin/indices/resolve/ResolveClusterActionRequest.java b/server/src/main/java/org/elasticsearch/action/admin/indices/resolve/ResolveClusterActionRequest.java index dbcece1eb4364..9c5b6097b11bd 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/indices/resolve/ResolveClusterActionRequest.java +++ b/server/src/main/java/org/elasticsearch/action/admin/indices/resolve/ResolveClusterActionRequest.java @@ -15,13 +15,12 @@ import org.elasticsearch.action.IndicesRequest; import org.elasticsearch.action.ValidateActions; import org.elasticsearch.action.support.IndicesOptions; -import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.tasks.CancellableTask; import org.elasticsearch.tasks.Task; import org.elasticsearch.tasks.TaskId; -import org.elasticsearch.transport.RemoteClusterService; +import org.elasticsearch.transport.RemoteClusterAware; import java.io.IOException; import java.util.Arrays; @@ -166,13 +165,7 @@ public String getDescription() { boolean localIndicesPresent(String[] indices) { for (String index : indices) { - // ensure that `index` is a remote name and not a date math expression which includes ':' symbol - // since date math expression after evaluation should not contain ':' symbol - // NOTE: index expressions can be prefixed with "-" for index exclusion, which will not be parsed by resolveDateMathExpression - String indexExpression = IndexNameExpressionResolver.resolveDateMathExpression( - index.charAt(0) == '-' ? index.substring(1) : index - ); - if (indexExpression.indexOf(RemoteClusterService.REMOTE_CLUSTER_INDEX_SEPARATOR) < 0) { + if (RemoteClusterAware.isRemoteIndexName(index) == false) { return true; } } diff --git a/server/src/main/java/org/elasticsearch/action/admin/indices/rollover/MetadataRolloverService.java b/server/src/main/java/org/elasticsearch/action/admin/indices/rollover/MetadataRolloverService.java index cfc5b7802d989..0c22a17bb1f6b 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/indices/rollover/MetadataRolloverService.java +++ b/server/src/main/java/org/elasticsearch/action/admin/indices/rollover/MetadataRolloverService.java @@ -412,7 +412,7 @@ yield new DataStreamAutoShardingEvent( dataStream.rollover( indexMetadata.getIndex(), newGeneration, - metadata.isTimeSeriesTemplate(templateV2), + metadata.retrieveIndexModeFromTemplate(templateV2), dataStreamAutoShardingEvent ) ); diff --git a/server/src/main/java/org/elasticsearch/action/search/ShardSearchFailure.java b/server/src/main/java/org/elasticsearch/action/search/ShardSearchFailure.java index 42957e7c932d1..9f40584733250 100644 --- a/server/src/main/java/org/elasticsearch/action/search/ShardSearchFailure.java +++ b/server/src/main/java/org/elasticsearch/action/search/ShardSearchFailure.java @@ -12,32 +12,25 @@ import org.elasticsearch.ElasticsearchException; import org.elasticsearch.ExceptionsHelper; import org.elasticsearch.action.ShardOperationFailedException; -import org.elasticsearch.cluster.metadata.IndexMetadata; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.core.Nullable; -import org.elasticsearch.index.Index; -import org.elasticsearch.index.shard.ShardId; import org.elasticsearch.rest.RestStatus; import org.elasticsearch.search.SearchException; import org.elasticsearch.search.SearchShardTarget; -import org.elasticsearch.transport.RemoteClusterAware; import org.elasticsearch.xcontent.XContentBuilder; -import org.elasticsearch.xcontent.XContentParser; import java.io.IOException; -import static org.elasticsearch.common.xcontent.XContentParserUtils.ensureExpectedToken; - /** * Represents a failure to search on a specific shard. */ public class ShardSearchFailure extends ShardOperationFailedException { - private static final String REASON_FIELD = "reason"; - private static final String NODE_FIELD = "node"; - private static final String INDEX_FIELD = "index"; - private static final String SHARD_FIELD = "shard"; + public static final String REASON_FIELD = "reason"; + public static final String NODE_FIELD = "node"; + public static final String INDEX_FIELD = "index"; + public static final String SHARD_FIELD = "shard"; public static final ShardSearchFailure[] EMPTY_ARRAY = new ShardSearchFailure[0]; @@ -125,51 +118,4 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws return builder; } - public static ShardSearchFailure fromXContent(XContentParser parser) throws IOException { - XContentParser.Token token; - ensureExpectedToken(XContentParser.Token.START_OBJECT, parser.currentToken(), parser); - String currentFieldName = null; - int shardId = -1; - String indexName = null; - String clusterAlias = null; - String nodeId = null; - ElasticsearchException exception = null; - while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { - if (token == XContentParser.Token.FIELD_NAME) { - currentFieldName = parser.currentName(); - } else if (token.isValue()) { - if (SHARD_FIELD.equals(currentFieldName)) { - shardId = parser.intValue(); - } else if (INDEX_FIELD.equals(currentFieldName)) { - indexName = parser.text(); - int indexOf = indexName.indexOf(RemoteClusterAware.REMOTE_CLUSTER_INDEX_SEPARATOR); - if (indexOf > 0) { - clusterAlias = indexName.substring(0, indexOf); - indexName = indexName.substring(indexOf + 1); - } - } else if (NODE_FIELD.equals(currentFieldName)) { - nodeId = parser.text(); - } else { - parser.skipChildren(); - } - } else if (token == XContentParser.Token.START_OBJECT) { - if (REASON_FIELD.equals(currentFieldName)) { - exception = ElasticsearchException.fromXContent(parser); - } else { - parser.skipChildren(); - } - } else { - parser.skipChildren(); - } - } - SearchShardTarget searchShardTarget = null; - if (nodeId != null) { - searchShardTarget = new SearchShardTarget( - nodeId, - new ShardId(new Index(indexName, IndexMetadata.INDEX_UUID_NA_VALUE), shardId), - clusterAlias - ); - } - return new ShardSearchFailure(exception, searchShardTarget); - } } diff --git a/server/src/main/java/org/elasticsearch/action/search/TransportSearchHelper.java b/server/src/main/java/org/elasticsearch/action/search/TransportSearchHelper.java index 4ed8feb098ad2..4e3544f0170cb 100644 --- a/server/src/main/java/org/elasticsearch/action/search/TransportSearchHelper.java +++ b/server/src/main/java/org/elasticsearch/action/search/TransportSearchHelper.java @@ -110,15 +110,9 @@ private static SearchContextIdForNode readSearchContextIdForNodeExcludingContext private static SearchContextIdForNode innerReadSearchContextIdForNode(String contextUUID, StreamInput in) throws IOException { long id = in.readLong(); - String target = in.readString(); - String clusterAlias; - final int index = target.indexOf(RemoteClusterAware.REMOTE_CLUSTER_INDEX_SEPARATOR); - if (index == -1) { - clusterAlias = null; - } else { - clusterAlias = target.substring(0, index); - target = target.substring(index + 1); - } + String[] split = RemoteClusterAware.splitIndexName(in.readString()); + String clusterAlias = split[0]; + String target = split[1]; return new SearchContextIdForNode(clusterAlias, target, new ShardSearchContextId(contextUUID, id)); } diff --git a/server/src/main/java/org/elasticsearch/action/support/master/AcknowledgedResponse.java b/server/src/main/java/org/elasticsearch/action/support/master/AcknowledgedResponse.java index 89e3c98ea003b..dcee489e92468 100644 --- a/server/src/main/java/org/elasticsearch/action/support/master/AcknowledgedResponse.java +++ b/server/src/main/java/org/elasticsearch/action/support/master/AcknowledgedResponse.java @@ -11,18 +11,12 @@ import org.elasticsearch.action.ActionResponse; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; -import org.elasticsearch.xcontent.ConstructingObjectParser; -import org.elasticsearch.xcontent.ObjectParser; -import org.elasticsearch.xcontent.ParseField; import org.elasticsearch.xcontent.ToXContentObject; import org.elasticsearch.xcontent.XContentBuilder; -import org.elasticsearch.xcontent.XContentParser; import java.io.IOException; import java.util.Objects; -import static org.elasticsearch.xcontent.ConstructingObjectParser.constructorArg; - /** * A response to an action which updated the cluster state, but needs to report whether any relevant nodes failed to apply the update. For * instance, a {@link org.elasticsearch.action.admin.indices.mapping.put.PutMappingRequest} may update a mapping in the index metadata, but @@ -39,16 +33,6 @@ public class AcknowledgedResponse extends ActionResponse implements IsAcknowledg public static final AcknowledgedResponse FALSE = new AcknowledgedResponse(false); public static final String ACKNOWLEDGED_KEY = "acknowledged"; - private static final ParseField ACKNOWLEDGED = new ParseField(ACKNOWLEDGED_KEY); - - public static void declareAcknowledgedField(ConstructingObjectParser objectParser) { - objectParser.declareField( - constructorArg(), - (parser, context) -> parser.booleanValue(), - ACKNOWLEDGED, - ObjectParser.ValueType.BOOLEAN - ); - } protected final boolean acknowledged; @@ -93,28 +77,6 @@ public final XContentBuilder toXContent(XContentBuilder builder, Params params) protected void addCustomFields(XContentBuilder builder, Params params) throws IOException {} - /** - * A generic parser that simply parses the acknowledged flag - */ - private static final ConstructingObjectParser ACKNOWLEDGED_FLAG_PARSER = new ConstructingObjectParser<>( - "acknowledged_flag", - true, - args -> (Boolean) args[0] - ); - - static { - ACKNOWLEDGED_FLAG_PARSER.declareField( - constructorArg(), - (parser, context) -> parser.booleanValue(), - ACKNOWLEDGED, - ObjectParser.ValueType.BOOLEAN - ); - } - - public static AcknowledgedResponse fromXContent(XContentParser parser) throws IOException { - return AcknowledgedResponse.of(ACKNOWLEDGED_FLAG_PARSER.apply(parser, null)); - } - @Override public boolean equals(Object o) { if (this == o) { diff --git a/server/src/main/java/org/elasticsearch/action/support/master/ShardsAcknowledgedResponse.java b/server/src/main/java/org/elasticsearch/action/support/master/ShardsAcknowledgedResponse.java index 72bf0a1a41f3e..127850d8d96cd 100644 --- a/server/src/main/java/org/elasticsearch/action/support/master/ShardsAcknowledgedResponse.java +++ b/server/src/main/java/org/elasticsearch/action/support/master/ShardsAcknowledgedResponse.java @@ -11,31 +11,15 @@ import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; -import org.elasticsearch.xcontent.ConstructingObjectParser; -import org.elasticsearch.xcontent.ObjectParser; import org.elasticsearch.xcontent.ParseField; import org.elasticsearch.xcontent.XContentBuilder; import java.io.IOException; import java.util.Objects; -import static org.elasticsearch.xcontent.ConstructingObjectParser.constructorArg; - public class ShardsAcknowledgedResponse extends AcknowledgedResponse { - protected static final ParseField SHARDS_ACKNOWLEDGED = new ParseField("shards_acknowledged"); - - public static void declareAcknowledgedAndShardsAcknowledgedFields( - ConstructingObjectParser objectParser - ) { - declareAcknowledgedField(objectParser); - objectParser.declareField( - constructorArg(), - (parser, context) -> parser.booleanValue(), - SHARDS_ACKNOWLEDGED, - ObjectParser.ValueType.BOOLEAN - ); - } + public static final ParseField SHARDS_ACKNOWLEDGED = new ParseField("shards_acknowledged"); public static final ShardsAcknowledgedResponse NOT_ACKNOWLEDGED = new ShardsAcknowledgedResponse(false, false); private static final ShardsAcknowledgedResponse SHARDS_NOT_ACKNOWLEDGED = new ShardsAcknowledgedResponse(true, false); diff --git a/server/src/main/java/org/elasticsearch/bootstrap/BootstrapChecks.java b/server/src/main/java/org/elasticsearch/bootstrap/BootstrapChecks.java index 566c8001dea56..021ad8127a2d0 100644 --- a/server/src/main/java/org/elasticsearch/bootstrap/BootstrapChecks.java +++ b/server/src/main/java/org/elasticsearch/bootstrap/BootstrapChecks.java @@ -412,12 +412,12 @@ static class MaxFileSizeCheck implements BootstrapCheck { @Override public BootstrapCheckResult check(BootstrapContext context) { - final long maxFileSize = getMaxFileSize(); + final long maxFileSize = getProcessLimits().maxFileSize(); if (maxFileSize != Long.MIN_VALUE && maxFileSize != ProcessLimits.UNLIMITED) { final String message = String.format( Locale.ROOT, "max file size [%d] for user [%s] is too low, increase to [unlimited]", - getMaxFileSize(), + maxFileSize, BootstrapInfo.getSystemProperties().get("user.name") ); return BootstrapCheckResult.failure(message); @@ -426,8 +426,8 @@ public BootstrapCheckResult check(BootstrapContext context) { } } - long getMaxFileSize() { - return NativeAccess.instance().getProcessLimits().maxVirtualMemorySize(); + protected ProcessLimits getProcessLimits() { + return NativeAccess.instance().getProcessLimits(); } @Override diff --git a/server/src/main/java/org/elasticsearch/cluster/ClusterModule.java b/server/src/main/java/org/elasticsearch/cluster/ClusterModule.java index c068b496ae896..b7c7caecd65ad 100644 --- a/server/src/main/java/org/elasticsearch/cluster/ClusterModule.java +++ b/server/src/main/java/org/elasticsearch/cluster/ClusterModule.java @@ -67,7 +67,7 @@ import org.elasticsearch.common.settings.Setting; import org.elasticsearch.common.settings.Setting.Property; import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.core.UpdateForV9; +import org.elasticsearch.core.UpdateForV10; import org.elasticsearch.gateway.GatewayAllocator; import org.elasticsearch.health.metadata.HealthMetadataService; import org.elasticsearch.health.node.selection.HealthNodeTaskExecutor; @@ -391,7 +391,7 @@ private static void addAllocationDecider(Map, AllocationDecider> decide } } - @UpdateForV9 // in v9 there is only one allocator + @UpdateForV10 // in v10 there is only one allocator private static ShardsAllocator createShardsAllocator( Settings settings, ClusterSettings clusterSettings, diff --git a/server/src/main/java/org/elasticsearch/cluster/metadata/DataStream.java b/server/src/main/java/org/elasticsearch/cluster/metadata/DataStream.java index 5634d40993b4f..dd4a52fd9beda 100644 --- a/server/src/main/java/org/elasticsearch/cluster/metadata/DataStream.java +++ b/server/src/main/java/org/elasticsearch/cluster/metadata/DataStream.java @@ -112,7 +112,7 @@ public static boolean isFailureStoreFeatureFlagEnabled() { private final IndexMode indexMode; @Nullable private final DataStreamLifecycle lifecycle; - private final boolean failureStoreEnabled; + private final DataStreamOptions dataStreamOptions; private final DataStreamIndices backingIndices; private final DataStreamIndices failureIndices; @@ -128,7 +128,7 @@ public DataStream( boolean allowCustomRouting, IndexMode indexMode, DataStreamLifecycle lifecycle, - boolean failureStoreEnabled, + @Nullable DataStreamOptions dataStreamOptions, List failureIndices, boolean rolloverOnWrite, @Nullable DataStreamAutoShardingEvent autoShardingEvent @@ -144,7 +144,7 @@ public DataStream( allowCustomRouting, indexMode, lifecycle, - failureStoreEnabled, + dataStreamOptions, new DataStreamIndices(BACKING_INDEX_PREFIX, List.copyOf(indices), rolloverOnWrite, autoShardingEvent), new DataStreamIndices(FAILURE_STORE_PREFIX, List.copyOf(failureIndices), false, null) ); @@ -162,7 +162,7 @@ public DataStream( boolean allowCustomRouting, IndexMode indexMode, DataStreamLifecycle lifecycle, - boolean failureStoreEnabled, + DataStreamOptions dataStreamOptions, DataStreamIndices backingIndices, DataStreamIndices failureIndices ) { @@ -177,7 +177,7 @@ public DataStream( this.allowCustomRouting = allowCustomRouting; this.indexMode = indexMode; this.lifecycle = lifecycle; - this.failureStoreEnabled = failureStoreEnabled; + this.dataStreamOptions = dataStreamOptions == null ? DataStreamOptions.EMPTY : dataStreamOptions; assert backingIndices.indices.isEmpty() == false; assert replicated == false || (backingIndices.rolloverOnWrite == false && failureIndices.rolloverOnWrite == false) : "replicated data streams cannot be marked for lazy rollover"; @@ -198,9 +198,11 @@ public static DataStream read(StreamInput in) throws IOException { var lifecycle = in.getTransportVersion().onOrAfter(TransportVersions.V_8_9_X) ? in.readOptionalWriteable(DataStreamLifecycle::new) : null; - var failureStoreEnabled = in.getTransportVersion().onOrAfter(DataStream.ADDED_FAILURE_STORE_TRANSPORT_VERSION) - ? in.readBoolean() - : false; + // This boolean flag has been moved in data stream options + var failureStoreEnabled = in.getTransportVersion() + .between(DataStream.ADDED_FAILURE_STORE_TRANSPORT_VERSION, TransportVersions.ADD_DATA_STREAM_OPTIONS) + ? in.readBoolean() + : false; var failureIndices = in.getTransportVersion().onOrAfter(DataStream.ADDED_FAILURE_STORE_TRANSPORT_VERSION) ? readIndices(in) : List.of(); @@ -213,6 +215,14 @@ public static DataStream read(StreamInput in) throws IOException { failureIndicesBuilder.setRolloverOnWrite(in.readBoolean()) .setAutoShardingEvent(in.readOptionalWriteable(DataStreamAutoShardingEvent::new)); } + DataStreamOptions dataStreamOptions; + if (in.getTransportVersion().onOrAfter(TransportVersions.ADD_DATA_STREAM_OPTIONS)) { + dataStreamOptions = in.readOptionalWriteable(DataStreamOptions::read); + } else { + // We cannot distinguish if failure store was explicitly disabled or not. Given that failure store + // is still behind a feature flag in previous version we use the default value instead of explicitly disabling it. + dataStreamOptions = failureStoreEnabled ? DataStreamOptions.FAILURE_STORE_ENABLED : null; + } return new DataStream( name, generation, @@ -224,7 +234,7 @@ public static DataStream read(StreamInput in) throws IOException { allowCustomRouting, indexMode, lifecycle, - failureStoreEnabled, + dataStreamOptions, backingIndicesBuilder.build(), failureIndicesBuilder.build() ); @@ -274,6 +284,10 @@ public boolean isFailureStoreIndex(String indexName) { return failureIndices.containsIndex(indexName); } + public DataStreamOptions getDataStreamOptions() { + return dataStreamOptions; + } + public boolean rolloverOnWrite() { return backingIndices.rolloverOnWrite; } @@ -406,13 +420,12 @@ public boolean isAllowCustomRouting() { } /** - * Determines if this data stream should persist ingest pipeline and mapping failures from bulk requests to a locally - * configured failure store. - * - * @return Whether this data stream should store ingestion failures. + * Determines if this data stream has its failure store enabled or not. Currently, the failure store + * is enabled only when a user has explicitly requested it. + * @return true, if the user has explicitly enabled the failure store. */ public boolean isFailureStoreEnabled() { - return failureStoreEnabled; + return dataStreamOptions.failureStore() != null && dataStreamOptions.failureStore().isExplicitlyEnabled(); } @Nullable @@ -448,43 +461,52 @@ public DataStreamIndices getDataStreamIndices(boolean failureStore) { * Performs a rollover on a {@code DataStream} instance and returns a new instance containing * the updated list of backing indices and incremented generation. * - * @param writeIndex new write index - * @param generation new generation - * @param timeSeries whether the template that created this data stream is in time series mode - * @param autoShardingEvent the auto sharding event this rollover operation is applying - * + * @param writeIndex new write index + * @param generation new generation + * @param indexModeFromTemplate the index mode that originates from the template that created this data stream + * @param autoShardingEvent the auto sharding event this rollover operation is applying * @return new {@code DataStream} instance with the rollover operation applied */ public DataStream rollover( Index writeIndex, long generation, - boolean timeSeries, + IndexMode indexModeFromTemplate, @Nullable DataStreamAutoShardingEvent autoShardingEvent ) { ensureNotReplicated(); - return unsafeRollover(writeIndex, generation, timeSeries, autoShardingEvent); + return unsafeRollover(writeIndex, generation, indexModeFromTemplate, autoShardingEvent); } /** - * Like {@link #rollover(Index, long, boolean, DataStreamAutoShardingEvent)}, but does no validation, use with care only. + * Like {@link #rollover(Index, long, IndexMode, DataStreamAutoShardingEvent)}, but does no validation, use with care only. */ - public DataStream unsafeRollover(Index writeIndex, long generation, boolean timeSeries, DataStreamAutoShardingEvent autoShardingEvent) { - IndexMode indexMode = this.indexMode; - if ((indexMode == null || indexMode == IndexMode.STANDARD) && timeSeries) { + public DataStream unsafeRollover( + Index writeIndex, + long generation, + IndexMode indexModeFromTemplate, + DataStreamAutoShardingEvent autoShardingEvent + ) { + IndexMode dsIndexMode = this.indexMode; + if ((dsIndexMode == null || dsIndexMode == IndexMode.STANDARD) && indexModeFromTemplate == IndexMode.TIME_SERIES) { // This allows for migrating a data stream to be a tsdb data stream: // (only if index_mode=null|standard then allow it to be set to time_series) - indexMode = IndexMode.TIME_SERIES; - } else if (indexMode == IndexMode.TIME_SERIES && timeSeries == false) { + dsIndexMode = IndexMode.TIME_SERIES; + } else if (dsIndexMode == IndexMode.TIME_SERIES && (indexModeFromTemplate == null || indexModeFromTemplate == IndexMode.STANDARD)) { + // Allow downgrading a time series data stream to a regular data stream + dsIndexMode = null; + } else if ((dsIndexMode == null || dsIndexMode == IndexMode.STANDARD) && indexModeFromTemplate == IndexMode.LOGSDB) { + dsIndexMode = IndexMode.LOGSDB; + } else if (dsIndexMode == IndexMode.LOGSDB && (indexModeFromTemplate == null || indexModeFromTemplate == IndexMode.STANDARD)) { // Allow downgrading a time series data stream to a regular data stream - indexMode = null; + dsIndexMode = null; } List backingIndices = new ArrayList<>(this.backingIndices.indices); backingIndices.add(writeIndex); return copy().setBackingIndices( this.backingIndices.copy().setIndices(backingIndices).setAutoShardingEvent(autoShardingEvent).setRolloverOnWrite(false).build() - ).setGeneration(generation).setIndexMode(indexMode).build(); + ).setGeneration(generation).setIndexMode(dsIndexMode).build(); } /** @@ -1054,8 +1076,11 @@ public void writeTo(StreamOutput out) throws IOException { if (out.getTransportVersion().onOrAfter(TransportVersions.V_8_9_X)) { out.writeOptionalWriteable(lifecycle); } + if (out.getTransportVersion() + .between(DataStream.ADDED_FAILURE_STORE_TRANSPORT_VERSION, TransportVersions.ADD_DATA_STREAM_OPTIONS)) { + out.writeBoolean(isFailureStoreEnabled()); + } if (out.getTransportVersion().onOrAfter(DataStream.ADDED_FAILURE_STORE_TRANSPORT_VERSION)) { - out.writeBoolean(failureStoreEnabled); out.writeCollection(failureIndices.indices); } if (out.getTransportVersion().onOrAfter(TransportVersions.V_8_13_0)) { @@ -1068,6 +1093,9 @@ public void writeTo(StreamOutput out) throws IOException { out.writeBoolean(failureIndices.rolloverOnWrite); out.writeOptionalWriteable(failureIndices.autoShardingEvent); } + if (out.getTransportVersion().onOrAfter(TransportVersions.ADD_DATA_STREAM_OPTIONS)) { + out.writeOptionalWriteable(dataStreamOptions.isEmpty() ? null : dataStreamOptions); + } } public static final ParseField NAME_FIELD = new ParseField("name"); @@ -1087,6 +1115,7 @@ public void writeTo(StreamOutput out) throws IOException { public static final ParseField AUTO_SHARDING_FIELD = new ParseField("auto_sharding"); public static final ParseField FAILURE_ROLLOVER_ON_WRITE_FIELD = new ParseField("failure_rollover_on_write"); public static final ParseField FAILURE_AUTO_SHARDING_FIELD = new ParseField("failure_auto_sharding"); + public static final ParseField DATA_STREAM_OPTIONS_FIELD = new ParseField("options"); @SuppressWarnings("unchecked") private static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>("data_stream", args -> { @@ -1101,6 +1130,16 @@ public void writeTo(StreamOutput out) throws IOException { (DataStreamAutoShardingEvent) args[15] ) : new DataStreamIndices(FAILURE_STORE_PREFIX, List.of(), false, null); + // We cannot distinguish if failure store was explicitly disabled or not. Given that failure store + // is still behind a feature flag in previous version we use the default value instead of explicitly disabling it. + DataStreamOptions dataStreamOptions = DataStreamOptions.EMPTY; + if (DataStream.isFailureStoreFeatureFlagEnabled()) { + if (args[16] != null) { + dataStreamOptions = (DataStreamOptions) args[16]; + } else if (failureStoreEnabled) { + dataStreamOptions = DataStreamOptions.FAILURE_STORE_ENABLED; + } + } return new DataStream( (String) args[0], (Long) args[2], @@ -1112,7 +1151,7 @@ public void writeTo(StreamOutput out) throws IOException { args[7] != null && (boolean) args[7], args[8] != null ? IndexMode.fromString((String) args[8]) : null, (DataStreamLifecycle) args[9], - failureStoreEnabled, + dataStreamOptions, new DataStreamIndices( BACKING_INDEX_PREFIX, (List) args[1], @@ -1162,6 +1201,11 @@ public void writeTo(StreamOutput out) throws IOException { (p, c) -> DataStreamAutoShardingEvent.fromXContent(p), FAILURE_AUTO_SHARDING_FIELD ); + PARSER.declareObject( + ConstructingObjectParser.optionalConstructorArg(), + (p, c) -> DataStreamOptions.fromXContent(p), + DATA_STREAM_OPTIONS_FIELD + ); } } @@ -1199,7 +1243,6 @@ public XContentBuilder toXContent( builder.field(SYSTEM_FIELD.getPreferredName(), system); builder.field(ALLOW_CUSTOM_ROUTING.getPreferredName(), allowCustomRouting); if (DataStream.isFailureStoreFeatureFlagEnabled()) { - builder.field(FAILURE_STORE_FIELD.getPreferredName(), failureStoreEnabled); if (failureIndices.indices.isEmpty() == false) { builder.xContentList(FAILURE_INDICES_FIELD.getPreferredName(), failureIndices.indices); } @@ -1209,6 +1252,10 @@ public XContentBuilder toXContent( failureIndices.autoShardingEvent.toXContent(builder, params); builder.endObject(); } + if (dataStreamOptions.isEmpty() == false) { + builder.field(DATA_STREAM_OPTIONS_FIELD.getPreferredName()); + dataStreamOptions.toXContent(builder, params); + } } if (indexMode != null) { builder.field(INDEX_MODE.getPreferredName(), indexMode); @@ -1241,7 +1288,7 @@ public boolean equals(Object o) { && allowCustomRouting == that.allowCustomRouting && indexMode == that.indexMode && Objects.equals(lifecycle, that.lifecycle) - && failureStoreEnabled == that.failureStoreEnabled + && Objects.equals(dataStreamOptions, that.dataStreamOptions) && Objects.equals(backingIndices, that.backingIndices) && Objects.equals(failureIndices, that.failureIndices); } @@ -1258,7 +1305,7 @@ public int hashCode() { allowCustomRouting, indexMode, lifecycle, - failureStoreEnabled, + dataStreamOptions, backingIndices, failureIndices ); @@ -1571,7 +1618,7 @@ public static class Builder { private IndexMode indexMode = null; @Nullable private DataStreamLifecycle lifecycle = null; - private boolean failureStoreEnabled = false; + private DataStreamOptions dataStreamOptions = DataStreamOptions.EMPTY; private DataStreamIndices backingIndices; private DataStreamIndices failureIndices = DataStreamIndices.failureIndicesBuilder(List.of()).build(); @@ -1596,7 +1643,7 @@ private Builder(DataStream dataStream) { allowCustomRouting = dataStream.allowCustomRouting; indexMode = dataStream.indexMode; lifecycle = dataStream.lifecycle; - failureStoreEnabled = dataStream.failureStoreEnabled; + dataStreamOptions = dataStream.dataStreamOptions; backingIndices = dataStream.backingIndices; failureIndices = dataStream.failureIndices; } @@ -1651,8 +1698,8 @@ public Builder setLifecycle(DataStreamLifecycle lifecycle) { return this; } - public Builder setFailureStoreEnabled(boolean failureStoreEnabled) { - this.failureStoreEnabled = failureStoreEnabled; + public Builder setDataStreamOptions(DataStreamOptions dataStreamOptions) { + this.dataStreamOptions = dataStreamOptions; return this; } @@ -1688,7 +1735,7 @@ public DataStream build() { allowCustomRouting, indexMode, lifecycle, - failureStoreEnabled, + dataStreamOptions, backingIndices, failureIndices ); diff --git a/server/src/main/java/org/elasticsearch/cluster/metadata/DataStreamFailureStore.java b/server/src/main/java/org/elasticsearch/cluster/metadata/DataStreamFailureStore.java index d94a7630eb868..e9d32594fa833 100644 --- a/server/src/main/java/org/elasticsearch/cluster/metadata/DataStreamFailureStore.java +++ b/server/src/main/java/org/elasticsearch/cluster/metadata/DataStreamFailureStore.java @@ -24,38 +24,51 @@ /** * Holds the data stream failure store metadata that enable or disable the failure store of a data stream. Currently, it - * supports the following configurations: - * - enabled + * supports the following configurations only explicitly enabling or disabling the failure store */ -public record DataStreamFailureStore(boolean enabled) implements SimpleDiffable, ToXContentObject { +public record DataStreamFailureStore(Boolean enabled) implements SimpleDiffable, ToXContentObject { public static final ParseField ENABLED_FIELD = new ParseField("enabled"); public static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>( "failure_store", false, - (args, unused) -> new DataStreamFailureStore(args[0] == null || (Boolean) args[0]) + (args, unused) -> new DataStreamFailureStore((Boolean) args[0]) ); static { - PARSER.declareBoolean(ConstructingObjectParser.constructorArg(), ENABLED_FIELD); + PARSER.declareBoolean(ConstructingObjectParser.optionalConstructorArg(), ENABLED_FIELD); } - public DataStreamFailureStore() { - this(true); + /** + * @param enabled, true when the failure is enabled, false when it's disabled, null when it depends on other configuration. Currently, + * null value is not supported because there are no other arguments + * @throws IllegalArgumentException when all the constructor arguments are null + */ + public DataStreamFailureStore { + if (enabled == null) { + throw new IllegalArgumentException("Failure store configuration should have at least one non-null configuration value."); + } } public DataStreamFailureStore(StreamInput in) throws IOException { - this(in.readBoolean()); + this(in.readOptionalBoolean()); } public static Diff readDiffFrom(StreamInput in) throws IOException { return SimpleDiffable.readDiffFrom(DataStreamFailureStore::new, in); } + /** + * @return iff the user has explicitly enabled the failure store + */ + public boolean isExplicitlyEnabled() { + return enabled != null && enabled; + } + @Override public void writeTo(StreamOutput out) throws IOException { - out.writeBoolean(enabled); + out.writeOptionalBoolean(enabled); } @Override @@ -66,7 +79,9 @@ public String toString() { @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { builder.startObject(); - builder.field(ENABLED_FIELD.getPreferredName(), enabled); + if (enabled != null) { + builder.field(ENABLED_FIELD.getPreferredName(), enabled); + } builder.endObject(); return builder; } diff --git a/server/src/main/java/org/elasticsearch/cluster/metadata/DataStreamOptions.java b/server/src/main/java/org/elasticsearch/cluster/metadata/DataStreamOptions.java index 29211e8c1b37b..9cd4e2625e2ba 100644 --- a/server/src/main/java/org/elasticsearch/cluster/metadata/DataStreamOptions.java +++ b/server/src/main/java/org/elasticsearch/cluster/metadata/DataStreamOptions.java @@ -35,6 +35,9 @@ public record DataStreamOptions(@Nullable DataStreamFailureStore failureStore) ToXContentObject { public static final ParseField FAILURE_STORE_FIELD = new ParseField("failure_store"); + public static final DataStreamOptions FAILURE_STORE_ENABLED = new DataStreamOptions(new DataStreamFailureStore(true)); + public static final DataStreamOptions FAILURE_STORE_DISABLED = new DataStreamOptions(new DataStreamFailureStore(false)); + public static final DataStreamOptions EMPTY = new DataStreamOptions(); public static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>( "options", @@ -59,15 +62,14 @@ public static DataStreamOptions read(StreamInput in) throws IOException { return new DataStreamOptions(in.readOptionalWriteable(DataStreamFailureStore::new)); } - @Nullable - public DataStreamFailureStore getFailureStore() { - return failureStore; - } - public static Diff readDiffFrom(StreamInput in) throws IOException { return SimpleDiffable.readDiffFrom(DataStreamOptions::read, in); } + public boolean isEmpty() { + return this.equals(EMPTY); + } + @Override public void writeTo(StreamOutput out) throws IOException { out.writeOptionalWriteable(failureStore); diff --git a/server/src/main/java/org/elasticsearch/cluster/metadata/IndexNameExpressionResolver.java b/server/src/main/java/org/elasticsearch/cluster/metadata/IndexNameExpressionResolver.java index 6e865db0ebb39..2229166a2d779 100644 --- a/server/src/main/java/org/elasticsearch/cluster/metadata/IndexNameExpressionResolver.java +++ b/server/src/main/java/org/elasticsearch/cluster/metadata/IndexNameExpressionResolver.java @@ -37,6 +37,7 @@ import org.elasticsearch.indices.InvalidIndexNameException; import org.elasticsearch.indices.SystemIndices; import org.elasticsearch.indices.SystemIndices.SystemIndexAccessLevel; +import org.elasticsearch.transport.RemoteClusterAware; import java.time.Instant; import java.time.ZoneId; @@ -1753,7 +1754,7 @@ private static void ensureRemoteIndicesRequireIgnoreUnavailable(IndicesOptions o return; } for (String index : indexExpressions) { - if (index.contains(":")) { + if (RemoteClusterAware.isRemoteIndexName(index)) { failOnRemoteIndicesNotIgnoringUnavailable(indexExpressions); } } @@ -1762,7 +1763,7 @@ private static void ensureRemoteIndicesRequireIgnoreUnavailable(IndicesOptions o private static void failOnRemoteIndicesNotIgnoringUnavailable(List indexExpressions) { List crossClusterIndices = new ArrayList<>(); for (String index : indexExpressions) { - if (index.contains(":")) { + if (RemoteClusterAware.isRemoteIndexName(index)) { crossClusterIndices.add(index); } } diff --git a/server/src/main/java/org/elasticsearch/cluster/metadata/LifecycleExecutionState.java b/server/src/main/java/org/elasticsearch/cluster/metadata/LifecycleExecutionState.java index b88b5086980d1..abc0983ccb2d4 100644 --- a/server/src/main/java/org/elasticsearch/cluster/metadata/LifecycleExecutionState.java +++ b/server/src/main/java/org/elasticsearch/cluster/metadata/LifecycleExecutionState.java @@ -28,6 +28,7 @@ public record LifecycleExecutionState( Boolean isAutoRetryableError, Integer failedStepRetryCount, String stepInfo, + String previousStepInfo, String phaseDefinition, Long lifecycleDate, Long phaseTime, @@ -53,6 +54,7 @@ public record LifecycleExecutionState( private static final String IS_AUTO_RETRYABLE_ERROR = "is_auto_retryable_error"; private static final String FAILED_STEP_RETRY_COUNT = "failed_step_retry_count"; private static final String STEP_INFO = "step_info"; + private static final String PREVIOUS_STEP_INFO = "previous_step_info"; private static final String PHASE_DEFINITION = "phase_definition"; private static final String SNAPSHOT_NAME = "snapshot_name"; private static final String SNAPSHOT_REPOSITORY = "snapshot_repository"; @@ -74,6 +76,7 @@ public static Builder builder(LifecycleExecutionState state) { .setIsAutoRetryableError(state.isAutoRetryableError) .setFailedStepRetryCount(state.failedStepRetryCount) .setStepInfo(state.stepInfo) + .setPreviousStepInfo(state.previousStepInfo) .setPhaseDefinition(state.phaseDefinition) .setIndexCreationDate(state.lifecycleDate) .setPhaseTime(state.phaseTime) @@ -116,6 +119,10 @@ public static LifecycleExecutionState fromCustomMetadata(Map cus if (stepInfo != null) { builder.setStepInfo(stepInfo); } + String previousStepInfo = customData.get(PREVIOUS_STEP_INFO); + if (previousStepInfo != null) { + builder.setPreviousStepInfo(previousStepInfo); + } String phaseDefinition = customData.get(PHASE_DEFINITION); if (phaseDefinition != null) { builder.setPhaseDefinition(phaseDefinition); @@ -224,6 +231,9 @@ public Map asMap() { if (stepInfo != null) { result.put(STEP_INFO, stepInfo); } + if (previousStepInfo != null) { + result.put(PREVIOUS_STEP_INFO, previousStepInfo); + } if (lifecycleDate != null) { result.put(INDEX_CREATION_DATE, String.valueOf(lifecycleDate)); } @@ -263,6 +273,7 @@ public static class Builder { private String step; private String failedStep; private String stepInfo; + private String previousStepInfo; private String phaseDefinition; private Long indexCreationDate; private Long phaseTime; @@ -301,6 +312,11 @@ public Builder setStepInfo(String stepInfo) { return this; } + public Builder setPreviousStepInfo(String previousStepInfo) { + this.previousStepInfo = previousStepInfo; + return this; + } + public Builder setPhaseDefinition(String phaseDefinition) { this.phaseDefinition = phaseDefinition; return this; @@ -370,6 +386,7 @@ public LifecycleExecutionState build() { isAutoRetryableError, failedStepRetryCount, stepInfo, + previousStepInfo, phaseDefinition, indexCreationDate, phaseTime, diff --git a/server/src/main/java/org/elasticsearch/cluster/metadata/Metadata.java b/server/src/main/java/org/elasticsearch/cluster/metadata/Metadata.java index d2f5ab5eabaee..566571d82c8ab 100644 --- a/server/src/main/java/org/elasticsearch/cluster/metadata/Metadata.java +++ b/server/src/main/java/org/elasticsearch/cluster/metadata/Metadata.java @@ -855,7 +855,7 @@ public Map> findDataStreamAliases(final String[] a * * @param aliases The aliases to look for. Might contain include or exclude wildcards. * @param possibleMatches The data streams or indices that the aliases must point to in order to be returned - * @param getter A function that is used to get the alises for a given data stream or index + * @param getter A function that is used to get the aliases for a given data stream or index * @param setter A function that is used to keep track of the found aliases */ private void findAliasInfo(final String[] aliases, final String[] possibleMatches, AliasInfoGetter getter, AliasInfoSetter setter) { @@ -881,24 +881,30 @@ private void findAliasInfo(final String[] aliases, final String[] possibleMatche boolean matchAllAliases = patterns.length == 0; + // memoize pattern match against aliases to avoid repeatedly matching when multiple indices share an alias + HashMap seenAliases = new HashMap<>(); + Predicate matcher = alias -> seenAliases.computeIfAbsent(alias, key -> { + boolean matched = matchAllAliases; + for (int i = 0; i < patterns.length; i++) { + if (include[i]) { + if (matched == false) { + String pattern = patterns[i]; + matched = ALL.equals(pattern) || Regex.simpleMatch(pattern, key); + } + } else if (matched) { + matched = Regex.simpleMatch(patterns[i], key) == false; + } + } + + return matched; + }); + for (String index : possibleMatches) { List filteredValues = new ArrayList<>(); List entities = getter.get(index); for (AliasInfo aliasInfo : entities) { - boolean matched = matchAllAliases; - String alias = aliasInfo.getAlias(); - for (int i = 0; i < patterns.length; i++) { - if (include[i]) { - if (matched == false) { - String pattern = patterns[i]; - matched = ALL.equals(pattern) || Regex.simpleMatch(pattern, alias); - } - } else if (matched) { - matched = Regex.simpleMatch(patterns[i], alias) == false; - } - } - if (matched) { + if (matcher.test(aliasInfo.getAlias())) { filteredValues.add(aliasInfo); } } @@ -1305,16 +1311,10 @@ public Map templatesV2() { .orElse(Collections.emptyMap()); } + // TODO: remove this method: public boolean isTimeSeriesTemplate(ComposableIndexTemplate indexTemplate) { - if (indexTemplate.getDataStreamTemplate() == null) { - return false; - } - - var settings = MetadataIndexTemplateService.resolveSettings(indexTemplate, componentTemplates()); - // Not using IndexSettings.MODE.get() to avoid validation that may fail at this point. - var rawIndexMode = settings.get(IndexSettings.MODE.getKey()); - var indexMode = rawIndexMode != null ? Enum.valueOf(IndexMode.class, rawIndexMode.toUpperCase(Locale.ROOT)) : null; - if (indexMode == IndexMode.TIME_SERIES) { + var indexModeFromTemplate = retrieveIndexModeFromTemplate(indexTemplate); + if (indexModeFromTemplate == IndexMode.TIME_SERIES) { // No need to check for the existence of index.routing_path here, because index.mode=time_series can't be specified without it. // Setting validation takes care of this. // Also no need to validate that the fields defined in index.routing_path are keyword fields with time_series_dimension @@ -1328,6 +1328,17 @@ public boolean isTimeSeriesTemplate(ComposableIndexTemplate indexTemplate) { return false; } + public IndexMode retrieveIndexModeFromTemplate(ComposableIndexTemplate indexTemplate) { + if (indexTemplate.getDataStreamTemplate() == null) { + return null; + } + + var settings = MetadataIndexTemplateService.resolveSettings(indexTemplate, componentTemplates()); + // Not using IndexSettings.MODE.get() to avoid validation that may fail at this point. + var rawIndexMode = settings.get(IndexSettings.MODE.getKey()); + return rawIndexMode != null ? Enum.valueOf(IndexMode.class, rawIndexMode.toUpperCase(Locale.ROOT)) : null; + } + public Map dataStreams() { return this.custom(DataStreamMetadata.TYPE, DataStreamMetadata.EMPTY).dataStreams(); } diff --git a/server/src/main/java/org/elasticsearch/cluster/metadata/MetadataCreateDataStreamService.java b/server/src/main/java/org/elasticsearch/cluster/metadata/MetadataCreateDataStreamService.java index 69f753233b418..2df9cf706d892 100644 --- a/server/src/main/java/org/elasticsearch/cluster/metadata/MetadataCreateDataStreamService.java +++ b/server/src/main/java/org/elasticsearch/cluster/metadata/MetadataCreateDataStreamService.java @@ -313,7 +313,7 @@ static ClusterState createDataStream( .collect(Collectors.toCollection(ArrayList::new)); dsBackingIndices.add(writeIndex.getIndex()); boolean hidden = isSystem || template.getDataStreamTemplate().isHidden(); - final IndexMode indexMode = metadata.isTimeSeriesTemplate(template) ? IndexMode.TIME_SERIES : null; + final IndexMode indexMode = metadata.retrieveIndexModeFromTemplate(template); final DataStreamLifecycle lifecycle = isSystem ? MetadataIndexTemplateService.resolveLifecycle(template, systemDataStreamDescriptor.getComponentTemplates()) : MetadataIndexTemplateService.resolveLifecycle(template, metadata.componentTemplates()); @@ -329,7 +329,7 @@ static ClusterState createDataStream( template.getDataStreamTemplate().isAllowCustomRouting(), indexMode, lifecycle == null && isDslOnlyMode ? DataStreamLifecycle.DEFAULT : lifecycle, - template.getDataStreamTemplate().hasFailureStore(), + template.getDataStreamTemplate().hasFailureStore() ? DataStreamOptions.FAILURE_STORE_ENABLED : DataStreamOptions.EMPTY, new DataStream.DataStreamIndices(DataStream.BACKING_INDEX_PREFIX, dsBackingIndices, false, null), // If the failure store shouldn't be initialized on data stream creation, we're marking it for "lazy rollover", which will // initialize the failure store on first write. diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceReconciler.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceReconciler.java index 0b39de4d0afef..c55ad5570e038 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceReconciler.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceReconciler.java @@ -115,12 +115,12 @@ public DesiredBalanceReconciler(ClusterSettings clusterSettings, ThreadPool thre undesiredAllocations = LongGaugeMetric.create( meterRegistry, "es.allocator.desired_balance.allocations.undesired.current", - "Total number of shards allocated on undesired nodes", + "Total number of shards allocated on undesired nodes excluding shutting down nodes", "{shard}" ); undesiredAllocationsRatio = meterRegistry.registerDoubleGauge( "es.allocator.desired_balance.allocations.undesired.ratio", - "Ratio of undesired allocations to shard count", + "Ratio of undesired allocations to shard count excluding shutting down nodes", "1", () -> { var total = totalAllocations.get(); @@ -500,7 +500,7 @@ private void balance() { int unassignedShards = routingNodes.unassigned().size() + routingNodes.unassigned().ignored().size(); int totalAllocations = 0; - int undesiredAllocations = 0; + int undesiredAllocationsExcludingShuttingDownNodes = 0; // Iterate over all started shards and try to move any which are on undesired nodes. In the presence of throttling shard // movements, the goal of this iteration order is to achieve a fairer movement of shards from the nodes that are offloading the @@ -526,7 +526,9 @@ private void balance() { continue; } - undesiredAllocations++; + if (allocation.metadata().nodeShutdowns().contains(shardRouting.currentNodeId()) == false) { + undesiredAllocationsExcludingShuttingDownNodes++; + } if (allocation.deciders().canRebalance(shardRouting, allocation).type() != Decision.Type.YES) { // rebalancing disabled for this shard @@ -560,23 +562,23 @@ private void balance() { } DesiredBalanceReconciler.this.unassignedShards.set(unassignedShards); - DesiredBalanceReconciler.this.undesiredAllocations.set(undesiredAllocations); + DesiredBalanceReconciler.this.undesiredAllocations.set(undesiredAllocationsExcludingShuttingDownNodes); DesiredBalanceReconciler.this.totalAllocations.set(totalAllocations); - maybeLogUndesiredAllocationsWarning(totalAllocations, undesiredAllocations, routingNodes.size()); + maybeLogUndesiredAllocationsWarning(totalAllocations, undesiredAllocationsExcludingShuttingDownNodes, routingNodes.size()); } - private void maybeLogUndesiredAllocationsWarning(int allAllocations, int undesiredAllocations, int nodeCount) { + private void maybeLogUndesiredAllocationsWarning(int totalAllocations, int undesiredAllocations, int nodeCount) { // more shards than cluster can relocate with one reroute final boolean nonEmptyRelocationBacklog = undesiredAllocations > 2L * nodeCount; - final boolean warningThresholdReached = undesiredAllocations > undesiredAllocationsLogThreshold * allAllocations; - if (allAllocations > 0 && nonEmptyRelocationBacklog && warningThresholdReached) { + final boolean warningThresholdReached = undesiredAllocations > undesiredAllocationsLogThreshold * totalAllocations; + if (totalAllocations > 0 && nonEmptyRelocationBacklog && warningThresholdReached) { undesiredAllocationLogInterval.maybeExecute( () -> logger.warn( "[{}] of assigned shards ({}/{}) are not on their desired nodes, which exceeds the warn threshold of [{}]", - Strings.format1Decimals(100.0 * undesiredAllocations / allAllocations, "%"), + Strings.format1Decimals(100.0 * undesiredAllocations / totalAllocations, "%"), undesiredAllocations, - allAllocations, + totalAllocations, Strings.format1Decimals(100.0 * undesiredAllocationsLogThreshold, "%") ) ); diff --git a/server/src/main/java/org/elasticsearch/features/FeatureData.java b/server/src/main/java/org/elasticsearch/features/FeatureData.java index f2fdac937fc96..991bb4d82be3d 100644 --- a/server/src/main/java/org/elasticsearch/features/FeatureData.java +++ b/server/src/main/java/org/elasticsearch/features/FeatureData.java @@ -11,6 +11,8 @@ import org.elasticsearch.Version; import org.elasticsearch.common.Strings; +import org.elasticsearch.logging.LogManager; +import org.elasticsearch.logging.Logger; import java.util.Collections; import java.util.HashMap; @@ -28,6 +30,16 @@ * features for the consumption of {@link FeatureService} */ public class FeatureData { + + private static final Logger Log = LogManager.getLogger(FeatureData.class); + private static final boolean INCLUDE_TEST_FEATURES = System.getProperty("tests.testfeatures.enabled", "").equals("true"); + + static { + if (INCLUDE_TEST_FEATURES) { + Log.warn("WARNING: Test features are enabled. This should ONLY be used in automated tests."); + } + } + private final NavigableMap> historicalFeatures; private final Map nodeFeatures; @@ -43,7 +55,11 @@ public static FeatureData createFromSpecifications(List> historicalFeatures = new TreeMap<>(Map.of(Version.V_EMPTY, Set.of())); Map nodeFeatures = new HashMap<>(); for (FeatureSpecification spec : specs) { - var specFeatures = spec.getFeatures(); + Set specFeatures = spec.getFeatures(); + if (INCLUDE_TEST_FEATURES) { + specFeatures = new HashSet<>(specFeatures); + specFeatures.addAll(spec.getTestFeatures()); + } for (var hfe : spec.getHistoricalFeatures().entrySet()) { FeatureSpecification existing = allFeatures.putIfAbsent(hfe.getKey().id(), spec); diff --git a/server/src/main/java/org/elasticsearch/features/FeatureInfrastructureFeatures.java b/server/src/main/java/org/elasticsearch/features/FeatureInfrastructureFeatures.java index 53eaef369778f..76afb5eba8a47 100644 --- a/server/src/main/java/org/elasticsearch/features/FeatureInfrastructureFeatures.java +++ b/server/src/main/java/org/elasticsearch/features/FeatureInfrastructureFeatures.java @@ -24,4 +24,9 @@ public class FeatureInfrastructureFeatures implements FeatureSpecification { public Set getFeatures() { return Set.of(FeatureService.FEATURES_SUPPORTED); } + + @Override + public Set getTestFeatures() { + return Set.of(FeatureService.TEST_FEATURES_ENABLED); + } } diff --git a/server/src/main/java/org/elasticsearch/features/FeatureService.java b/server/src/main/java/org/elasticsearch/features/FeatureService.java index 250a4541b0869..1d911a75a4838 100644 --- a/server/src/main/java/org/elasticsearch/features/FeatureService.java +++ b/server/src/main/java/org/elasticsearch/features/FeatureService.java @@ -30,6 +30,7 @@ public class FeatureService { * A feature indicating that node features are supported. */ public static final NodeFeature FEATURES_SUPPORTED = new NodeFeature("features_supported"); + public static final NodeFeature TEST_FEATURES_ENABLED = new NodeFeature("test_features_enabled"); private static final Logger logger = LogManager.getLogger(FeatureService.class); diff --git a/server/src/main/java/org/elasticsearch/features/FeatureSpecification.java b/server/src/main/java/org/elasticsearch/features/FeatureSpecification.java index db69ef00756b8..03f0dd89f172e 100644 --- a/server/src/main/java/org/elasticsearch/features/FeatureSpecification.java +++ b/server/src/main/java/org/elasticsearch/features/FeatureSpecification.java @@ -40,6 +40,16 @@ default Set getFeatures() { return Set.of(); } + /** + * Returns a set of test features that this node supports. + *

+ * These features will only be exposed if the {@code tests.testfeatures.enabled} system property is set. + * This should only be used when deploying test clusters. + */ + default Set getTestFeatures() { + return Set.of(); + } + /** * Returns information on historical features that should be deemed to be present on all nodes * on or above the {@link Version} specified. diff --git a/server/src/main/java/org/elasticsearch/index/codec/Elasticsearch814Codec.java b/server/src/main/java/org/elasticsearch/index/codec/Elasticsearch814Codec.java index b19ed472c6a2e..44108109ad329 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/Elasticsearch814Codec.java +++ b/server/src/main/java/org/elasticsearch/index/codec/Elasticsearch814Codec.java @@ -30,7 +30,7 @@ public class Elasticsearch814Codec extends CodecService.DeduplicateFieldInfosCod private final StoredFieldsFormat storedFieldsFormat; - private final PostingsFormat defaultPostingsFormat; + private static final PostingsFormat defaultPostingsFormat = new Lucene99PostingsFormat(); private final PostingsFormat postingsFormat = new PerFieldPostingsFormat() { @Override public PostingsFormat getPostingsFormatForField(String field) { @@ -38,7 +38,7 @@ public PostingsFormat getPostingsFormatForField(String field) { } }; - private final DocValuesFormat defaultDVFormat; + private static final DocValuesFormat defaultDVFormat = new Lucene90DocValuesFormat(); private final DocValuesFormat docValuesFormat = new PerFieldDocValuesFormat() { @Override public DocValuesFormat getDocValuesFormatForField(String field) { @@ -46,7 +46,7 @@ public DocValuesFormat getDocValuesFormatForField(String field) { } }; - private final KnnVectorsFormat defaultKnnVectorsFormat; + private static final KnnVectorsFormat defaultKnnVectorsFormat = new Lucene99HnswVectorsFormat(); private final KnnVectorsFormat knnVectorsFormat = new PerFieldKnnVectorsFormat() { @Override public KnnVectorsFormat getKnnVectorsFormatForField(String field) { @@ -54,6 +54,8 @@ public KnnVectorsFormat getKnnVectorsFormatForField(String field) { } }; + private static final Lucene99Codec lucene99Codec = new Lucene99Codec(); + /** Public no-arg constructor, needed for SPI loading at read-time. */ public Elasticsearch814Codec() { this(Zstd814StoredFieldsFormat.Mode.BEST_SPEED); @@ -64,11 +66,8 @@ public Elasticsearch814Codec() { * worse space-efficiency or vice-versa. */ public Elasticsearch814Codec(Zstd814StoredFieldsFormat.Mode mode) { - super("Elasticsearch814", new Lucene99Codec()); + super("Elasticsearch814", lucene99Codec); this.storedFieldsFormat = new Zstd814StoredFieldsFormat(mode); - this.defaultPostingsFormat = new Lucene99PostingsFormat(); - this.defaultDVFormat = new Lucene90DocValuesFormat(); - this.defaultKnnVectorsFormat = new Lucene99HnswVectorsFormat(); } @Override diff --git a/server/src/main/java/org/elasticsearch/index/codec/PerFieldFormatSupplier.java b/server/src/main/java/org/elasticsearch/index/codec/PerFieldFormatSupplier.java index 2b5f34a5772fb..9c2a08a69002c 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/PerFieldFormatSupplier.java +++ b/server/src/main/java/org/elasticsearch/index/codec/PerFieldFormatSupplier.java @@ -31,19 +31,17 @@ */ public class PerFieldFormatSupplier { - private final MapperService mapperService; - private final DocValuesFormat docValuesFormat = new Lucene90DocValuesFormat(); - private final KnnVectorsFormat knnVectorsFormat = new Lucene99HnswVectorsFormat(); - private final ES87BloomFilterPostingsFormat bloomFilterPostingsFormat; - private final ES87TSDBDocValuesFormat tsdbDocValuesFormat; + private static final DocValuesFormat docValuesFormat = new Lucene90DocValuesFormat(); + private static final KnnVectorsFormat knnVectorsFormat = new Lucene99HnswVectorsFormat(); + private static final ES87TSDBDocValuesFormat tsdbDocValuesFormat = new ES87TSDBDocValuesFormat(); + private static final ES812PostingsFormat es812PostingsFormat = new ES812PostingsFormat(); - private final ES812PostingsFormat es812PostingsFormat; + private final ES87BloomFilterPostingsFormat bloomFilterPostingsFormat; + private final MapperService mapperService; public PerFieldFormatSupplier(MapperService mapperService, BigArrays bigArrays) { this.mapperService = mapperService; this.bloomFilterPostingsFormat = new ES87BloomFilterPostingsFormat(bigArrays, this::internalGetPostingsFormatForField); - this.tsdbDocValuesFormat = new ES87TSDBDocValuesFormat(); - this.es812PostingsFormat = new ES812PostingsFormat(); } public PostingsFormat getPostingsFormatForField(String field) { diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java b/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java index 7f9b59d427656..ebe9f27f461cf 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java @@ -389,14 +389,6 @@ static Mapping createDynamicUpdate(DocumentParserContext context) { rootBuilder.addRuntimeField(runtimeField); } RootObjectMapper root = rootBuilder.build(MapperBuilderContext.root(context.mappingLookup().isSourceSynthetic(), false)); - - // Repeat the check, in case the dynamic mappers don't produce a mapping update. - // For instance, the parsed source may contain intermediate objects that get flattened, - // leading to an empty dynamic update. - if (root.mappers.isEmpty() && root.runtimeFields().isEmpty()) { - return null; - } - return context.mappingLookup().getMapping().mappingUpdate(root); } @@ -646,7 +638,7 @@ private static void parseObject(final DocumentParserContext context, String curr private static void doParseObject(DocumentParserContext context, String currentFieldName, Mapper objectMapper) throws IOException { context.path().add(currentFieldName); boolean withinLeafObject = context.path().isWithinLeafObject(); - if (objectMapper instanceof ObjectMapper objMapper && objMapper.subobjects() == ObjectMapper.Subobjects.DISABLED) { + if (objectMapper instanceof ObjectMapper objMapper && objMapper.subobjects() != ObjectMapper.Subobjects.ENABLED) { context.path().setWithinLeafObject(true); } parseObjectOrField(context, objectMapper); @@ -1020,15 +1012,11 @@ private static Mapper getLeafMapper(final DocumentParserContext context, String // don't create a dynamic mapping for it and don't index it. String fieldPath = context.path().pathAsText(fieldName); MappedFieldType fieldType = context.mappingLookup().getFieldType(fieldPath); - - if (fieldType != null && fieldType.hasDocValues() == false && fieldType.isAggregatable() && fieldType.isSearchable()) { - // We haven't found a mapper with this name above, which means it is a runtime field. + if (fieldType != null) { + // we haven't found a mapper with this name above, which means if a field type is found it is for sure a runtime field. + assert fieldType.hasDocValues() == false && fieldType.isAggregatable() && fieldType.isSearchable(); return noopFieldMapper(fieldPath); } - // No match or the matching field type corresponds to a mapper with flattened name (containing dots), - // e.g. for field 'foo.bar' under root there is no 'bar' mapper in object 'bar'. - // Returning null leads to creating a dynamic mapper. In the case of a mapper with flattened name, - // the dynamic mapper later gets deduplicated when building the dynamic update for the doc at hand. return null; } @@ -1172,10 +1160,11 @@ private static class RootDocumentParserContext extends DocumentParserContext { mappingLookup.getMapping().getRoot(), ObjectMapper.Dynamic.getRootDynamic(mappingLookup) ); - // If root supports no subobjects, there's no point in expanding dots in names to subobjects. - this.parser = (mappingLookup.getMapping().getRoot().subobjects() == ObjectMapper.Subobjects.DISABLED) - ? parser - : DotExpandingXContentParser.expandDots(parser, this.path, this); + if (mappingLookup.getMapping().getRoot().subobjects() == ObjectMapper.Subobjects.ENABLED) { + this.parser = DotExpandingXContentParser.expandDots(parser, this.path); + } else { + this.parser = parser; + } this.document = new LuceneDocument(); this.documents.add(document); this.maxAllowedNumNestedDocs = indexSettings().getMappingNestedDocsLimit(); diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java b/server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java index b8acdb716b467..c2970d8716147 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java @@ -123,7 +123,6 @@ public int get() { private Field version; private final SeqNoFieldMapper.SequenceIDFields seqID; private final Set fieldsAppliedFromTemplates; - private final boolean supportsObjectAutoFlattening; /** * Fields that are copied from values of other fields via copy_to. @@ -178,7 +177,6 @@ private DocumentParserContext( this.copyToFields = copyToFields; this.dynamicMappersSize = dynamicMapperSize; this.recordedSource = recordedSource; - this.supportsObjectAutoFlattening = checkForAutoFlatteningSupport(); } private DocumentParserContext(ObjectMapper parent, ObjectMapper.Dynamic dynamic, DocumentParserContext in) { @@ -206,43 +204,6 @@ private DocumentParserContext(ObjectMapper parent, ObjectMapper.Dynamic dynamic, ); } - private boolean checkForAutoFlatteningSupport() { - if (root().subobjects() != ObjectMapper.Subobjects.ENABLED) { - return true; - } - for (ObjectMapper objectMapper : mappingLookup.objectMappers().values()) { - if (objectMapper.subobjects() != ObjectMapper.Subobjects.ENABLED) { - return true; - } - } - if (root().dynamicTemplates() != null) { - for (DynamicTemplate dynamicTemplate : root().dynamicTemplates()) { - if (findSubobjects(dynamicTemplate.getMapping())) { - return true; - } - } - } - for (ObjectMapper objectMapper : dynamicObjectMappers.values()) { - if (objectMapper.subobjects() != ObjectMapper.Subobjects.ENABLED) { - return true; - } - } - return false; - } - - @SuppressWarnings("unchecked") - private static boolean findSubobjects(Map mapping) { - for (var entry : mapping.entrySet()) { - if (entry.getKey().equals("subobjects") && (entry.getValue() instanceof Boolean || entry.getValue() instanceof String)) { - return true; - } - if (entry.getValue() instanceof Map && findSubobjects((Map) entry.getValue())) { - return true; - } - } - return false; - } - protected DocumentParserContext( MappingLookup mappingLookup, MappingParserContext mappingParserContext, @@ -503,10 +464,6 @@ public Set getCopyToFields() { return copyToFields; } - boolean supportsObjectAutoFlattening() { - return supportsObjectAutoFlattening; - } - /** * Add a new mapper dynamically created while parsing. * @@ -642,25 +599,6 @@ final ObjectMapper getDynamicObjectMapper(String name) { return dynamicObjectMappers.get(name); } - ObjectMapper findObject(String fullName) { - // does the object mapper already exist? if so, use that - ObjectMapper objectMapper = mappingLookup().objectMappers().get(fullName); - if (objectMapper != null) { - return objectMapper; - } - // has the object mapper been added as a dynamic update already? - return getDynamicObjectMapper(fullName); - } - - ObjectMapper.Builder findObjectBuilder(String fullName) { - // does the object mapper already exist? if so, use that - ObjectMapper objectMapper = findObject(fullName); - if (objectMapper != null) { - return objectMapper.newBuilder(indexSettings().getIndexVersionCreated()); - } - return null; - } - /** * Add a new runtime field dynamically created while parsing. * We use the same set for both new indexed and new runtime fields, @@ -760,7 +698,7 @@ public LuceneDocument doc() { */ public final DocumentParserContext createCopyToContext(String copyToField, LuceneDocument doc) throws IOException { ContentPath path = new ContentPath(); - XContentParser parser = DotExpandingXContentParser.expandDots(new CopyToParser(copyToField, parser()), path, this); + XContentParser parser = DotExpandingXContentParser.expandDots(new CopyToParser(copyToField, parser()), path); return new Wrapper(root(), this) { @Override public ContentPath path() { diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DotExpandingXContentParser.java b/server/src/main/java/org/elasticsearch/index/mapper/DotExpandingXContentParser.java index 728c7ac6f25ac..fc003e709cbca 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DotExpandingXContentParser.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DotExpandingXContentParser.java @@ -18,8 +18,6 @@ import java.io.IOException; import java.util.ArrayDeque; -import java.util.ArrayList; -import java.util.Arrays; import java.util.Deque; import java.util.List; import java.util.Map; @@ -40,13 +38,9 @@ private static final class WrappingParser extends FilterXContentParser { private final ContentPath contentPath; final Deque parsers = new ArrayDeque<>(); - final DocumentParserContext context; - boolean supportsObjectAutoFlattening; - WrappingParser(XContentParser in, ContentPath contentPath, DocumentParserContext context) throws IOException { + WrappingParser(XContentParser in, ContentPath contentPath) throws IOException { this.contentPath = contentPath; - this.context = context; - this.supportsObjectAutoFlattening = (context != null && context.supportsObjectAutoFlattening()); parsers.push(in); if (in.currentToken() == Token.FIELD_NAME) { expandDots(in); @@ -113,7 +107,7 @@ private void doExpandDots(XContentParser delegate, String field, int dotCount) t if (resultSize == 0) { throw new IllegalArgumentException("field name cannot contain only dots"); } - String[] subpaths; + final String[] subpaths; if (resultSize == list.length) { for (String part : list) { // check if the field name contains only whitespace @@ -132,9 +126,6 @@ private void doExpandDots(XContentParser delegate, String field, int dotCount) t } subpaths = extractAndValidateResults(field, list, resultSize); } - if (supportsObjectAutoFlattening && subpaths.length > 1) { - subpaths = maybeFlattenPaths(Arrays.asList(subpaths), context, contentPath).toArray(String[]::new); - } pushSubParser(delegate, subpaths); } @@ -244,13 +235,11 @@ public List listOrderedMap() throws IOException { /** * Wraps an XContentParser such that it re-interprets dots in field names as an object structure - * @param in the parser to wrap - * @param contentPath the starting path to expand, can be empty - * @param context provides mapping context to check for objects supporting sub-object auto-flattening - * @return the wrapped XContentParser + * @param in the parser to wrap + * @return the wrapped XContentParser */ - static XContentParser expandDots(XContentParser in, ContentPath contentPath, DocumentParserContext context) throws IOException { - return new WrappingParser(in, contentPath, context); + static XContentParser expandDots(XContentParser in, ContentPath contentPath) throws IOException { + return new WrappingParser(in, contentPath); } private enum State { @@ -421,49 +410,4 @@ public Token nextToken() throws IOException { return null; } } - - static List maybeFlattenPaths(List subpaths, DocumentParserContext context, ContentPath contentPath) { - String prefixWithDots = contentPath.pathAsText(""); - ObjectMapper parent = contentPath.length() == 0 - ? context.root() - : context.findObject(prefixWithDots.substring(0, prefixWithDots.length() - 1)); - List result = new ArrayList<>(subpaths.size()); - for (int i = 0; i < subpaths.size(); i++) { - String fullPath = prefixWithDots + String.join(".", subpaths.subList(0, i)); - if (i > 0) { - parent = context.findObject(fullPath); - } - boolean match = false; - StringBuilder path = new StringBuilder(subpaths.get(i)); - if (parent == null) { - // We get here for dynamic objects, which always get parsed with subobjects and may get flattened later. - match = true; - } else if (parent.subobjects() == ObjectMapper.Subobjects.ENABLED) { - match = true; - } else if (parent.subobjects() == ObjectMapper.Subobjects.AUTO) { - // Check if there's any subobject in the remaining path. - for (int j = i; j < subpaths.size() - 1; j++) { - if (j > i) { - path.append(".").append(subpaths.get(j)); - } - Mapper mapper = parent.mappers.get(path.toString()); - if (mapper instanceof ObjectMapper objectMapper - && (ObjectMapper.isFlatteningCandidate(objectMapper.subobjects, objectMapper) - || objectMapper.checkFlattenable(null).isPresent())) { - i = j; - match = true; - break; - } - } - } - if (match) { - result.add(path.toString()); - } else { - // We only get here if parent has subobjects set to false, or set to auto with no non-flattenable object in the sub-path. - result.add(String.join(".", subpaths.subList(i, subpaths.size()))); - return result; - } - } - return result; - } } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DynamicFieldsBuilder.java b/server/src/main/java/org/elasticsearch/index/mapper/DynamicFieldsBuilder.java index cf810e278782a..4b6419b85e155 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DynamicFieldsBuilder.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DynamicFieldsBuilder.java @@ -21,7 +21,6 @@ import java.io.IOException; import java.time.DateTimeException; import java.util.Map; -import java.util.Optional; /** * Encapsulates the logic for dynamically creating fields as part of document parsing. @@ -163,9 +162,7 @@ static Mapper createDynamicObjectMapper(DocumentParserContext context, String na Mapper mapper = createObjectMapperFromTemplate(context, name); return mapper != null ? mapper - // Dynamic objects are configured with subobject support, otherwise they can't get auto-flattened - // even if they otherwise qualify. - : new ObjectMapper.Builder(name, Optional.empty()).enabled(ObjectMapper.Defaults.ENABLED) + : new ObjectMapper.Builder(name, context.parent().subobjects).enabled(ObjectMapper.Defaults.ENABLED) .build(context.createDynamicMapperBuilderContext()); } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java index 46b1dbdce4c4b..529ff19bfffd7 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java @@ -1093,7 +1093,7 @@ protected BytesRef preserve(BytesRef value) { }); } - if (fieldType().ignoreAbove != ignoreAboveDefault) { + if (fieldType().ignoreAbove != Integer.MAX_VALUE) { layers.add(new CompositeSyntheticFieldLoader.StoredFieldLayer(originalName()) { @Override protected void writeValue(Object value, XContentBuilder b) throws IOException { diff --git a/server/src/main/java/org/elasticsearch/index/mapper/ObjectMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/ObjectMapper.java index b9b611d8c62f9..40019566adaa8 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/ObjectMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/ObjectMapper.java @@ -177,84 +177,42 @@ public final void addDynamic(String name, String prefix, Mapper mapper, Document // If the mapper to add has no dots, or the current object mapper has subobjects set to false, // we just add it as it is for sure a leaf mapper if (name.contains(".") == false || (subobjects.isPresent() && (subobjects.get() == Subobjects.DISABLED))) { - if (mapper instanceof ObjectMapper objectMapper - && isFlatteningCandidate(subobjects, objectMapper) - && objectMapper.checkFlattenable(null).isEmpty()) { - // Subobjects auto and false don't allow adding subobjects dynamically. - return; - } add(name, mapper); - return; - } - if (subobjects.isPresent() && subobjects.get() == Subobjects.AUTO) { - // Check if there's an existing field with the sanme, to avoid no-op dynamic updates. - ObjectMapper objectMapper = (prefix == null) ? context.root() : context.mappingLookup().objectMappers().get(prefix); - if (objectMapper != null && objectMapper.mappers.containsKey(name)) { - return; - } - - // Check for parent objects. Due to auto-flattening, names with dots are allowed so we need to check for all possible - // object names. For instance, for mapper 'foo.bar.baz.bad', we have the following options: - // -> object 'foo' found => call addDynamic on 'bar.baz.bad' - // ---> object 'bar' found => call addDynamic on 'baz.bad' - // -----> object 'baz' found => add field 'bad' to it - // -----> no match found => add field 'baz.bad' to 'bar' - // ---> object 'bar.baz' found => add field 'bad' to it - // ---> no match found => add field 'bar.baz.bad' to 'foo' - // -> object 'foo.bar' found => call addDynamic on 'baz.bad' - // ---> object 'baz' found => add field 'bad' to it - // ---> no match found=> add field 'baz.bad' to 'foo.bar' - // -> object 'foo.bar.baz' found => add field 'bad' to it - // -> no match found => add field 'foo.bar.baz.bad' to parent - String fullPathToMapper = name.substring(0, name.lastIndexOf(mapper.leafName())); - String[] fullPathTokens = fullPathToMapper.split("\\."); - StringBuilder candidateObject = new StringBuilder(); - String candidateObjectPrefix = prefix == null ? "" : prefix + "."; - for (int i = 0; i < fullPathTokens.length; i++) { - if (candidateObject.isEmpty() == false) { - candidateObject.append("."); - } - candidateObject.append(fullPathTokens[i]); - String candidateFullObject = candidateObjectPrefix.isEmpty() - ? candidateObject.toString() - : candidateObjectPrefix + candidateObject.toString(); - ObjectMapper parent = context.findObject(candidateFullObject); - if (parent != null) { - var parentBuilder = parent.newBuilder(context.indexSettings().getIndexVersionCreated()); - parentBuilder.addDynamic(name.substring(candidateObject.length() + 1), candidateFullObject, mapper, context); - if (parentBuilder.mappersBuilders.isEmpty() == false) { - add(parentBuilder); - } - return; - } - } - - // No matching parent object was found, the mapper is added as a leaf - similar to subobjects false. - // This only applies to field mappers, as subobjects get auto-flattened. - if (mapper instanceof FieldMapper fieldMapper) { - FieldMapper.Builder fieldBuilder = fieldMapper.getMergeBuilder(); - fieldBuilder.setLeafName(name); // Update to reflect the current, possibly flattened name. - add(fieldBuilder); + } else { + // We strip off the first object path of the mapper name, load or create + // the relevant object mapper, and then recurse down into it, passing the remainder + // of the mapper name. So for a mapper 'foo.bar.baz', we locate 'foo' and then + // call addDynamic on it with the name 'bar.baz', and next call addDynamic on 'bar' with the name 'baz'. + int firstDotIndex = name.indexOf('.'); + String immediateChild = name.substring(0, firstDotIndex); + String immediateChildFullName = prefix == null ? immediateChild : prefix + "." + immediateChild; + Builder parentBuilder = findObjectBuilder(immediateChildFullName, context); + if (parentBuilder != null) { + parentBuilder.addDynamic(name.substring(firstDotIndex + 1), immediateChildFullName, mapper, context); + add(parentBuilder); + } else if (subobjects.isPresent() && subobjects.get() == Subobjects.AUTO) { + // No matching parent object was found, the mapper is added as a leaf - similar to subobjects false. + add(name, mapper); + } else { + // Expected to find a matching parent object but got null. + throw new IllegalStateException("Missing intermediate object " + immediateChildFullName); } - return; } + } - // We strip off the first object path of the mapper name, load or create - // the relevant object mapper, and then recurse down into it, passing the remainder - // of the mapper name. So for a mapper 'foo.bar.baz', we locate 'foo' and then - // call addDynamic on it with the name 'bar.baz', and next call addDynamic on 'bar' with the name 'baz'. - int firstDotIndex = name.indexOf('.'); - String immediateChild = name.substring(0, firstDotIndex); - String immediateChildFullName = prefix == null ? immediateChild : prefix + "." + immediateChild; - Builder parentBuilder = context.findObjectBuilder(immediateChildFullName); - if (parentBuilder != null) { - parentBuilder.addDynamic(name.substring(firstDotIndex + 1), immediateChildFullName, mapper, context); - add(parentBuilder); - } else { - // Expected to find a matching parent object but got null. - throw new IllegalStateException("Missing intermediate object " + immediateChildFullName); + private static Builder findObjectBuilder(String fullName, DocumentParserContext context) { + // does the object mapper already exist? if so, use that + ObjectMapper objectMapper = context.mappingLookup().objectMappers().get(fullName); + if (objectMapper != null) { + return objectMapper.newBuilder(context.indexSettings().getIndexVersionCreated()); } - + // has the object mapper been added as a dynamic update already? + objectMapper = context.getDynamicObjectMapper(fullName); + if (objectMapper != null) { + return objectMapper.newBuilder(context.indexSettings().getIndexVersionCreated()); + } + // no object mapper found + return null; } protected final Map buildMappers(MapperBuilderContext mapperBuilderContext) { @@ -270,10 +228,9 @@ protected final Map buildMappers(MapperBuilderContext mapperBuil // mix of object notation and dot notation. mapper = existing.merge(mapper, MapperMergeContext.from(mapperBuilderContext, Long.MAX_VALUE)); } - if (mapper instanceof ObjectMapper objectMapper && isFlatteningCandidate(subobjects, objectMapper)) { - // We're parsing a mapping that has defined sub-objects, may need to flatten them. - objectMapper.asFlattenedFieldMappers(mapperBuilderContext, throwOnFlattenableError(subobjects)) - .forEach(m -> mappers.put(m.leafName(), m)); + if (subobjects.isPresent() && subobjects.get() == Subobjects.DISABLED && mapper instanceof ObjectMapper objectMapper) { + // We're parsing a mapping that has set `subobjects: false` but has defined sub-objects + objectMapper.asFlattenedFieldMappers(mapperBuilderContext).forEach(m -> mappers.put(m.leafName(), m)); } else { mappers.put(mapper.leafName(), mapper); } @@ -668,11 +625,12 @@ private static Map buildMergedMappers( Optional subobjects ) { Map mergedMappers = new HashMap<>(); - var context = objectMergeContext.getMapperBuilderContext(); for (Mapper childOfExistingMapper : existing.mappers.values()) { - if (childOfExistingMapper instanceof ObjectMapper objectMapper && isFlatteningCandidate(subobjects, objectMapper)) { - // An existing mapping with sub-objects is merged with a mapping that has `subobjects` set to false or auto. - objectMapper.asFlattenedFieldMappers(context, throwOnFlattenableError(subobjects)) + if (subobjects.isPresent() + && subobjects.get() == Subobjects.DISABLED + && childOfExistingMapper instanceof ObjectMapper objectMapper) { + // An existing mapping with sub-objects is merged with a mapping that has set `subobjects: false` + objectMapper.asFlattenedFieldMappers(objectMergeContext.getMapperBuilderContext()) .forEach(m -> mergedMappers.put(m.leafName(), m)); } else { putMergedMapper(mergedMappers, childOfExistingMapper); @@ -681,9 +639,11 @@ private static Map buildMergedMappers( for (Mapper mergeWithMapper : mergeWithObject) { Mapper mergeIntoMapper = mergedMappers.get(mergeWithMapper.leafName()); if (mergeIntoMapper == null) { - if (mergeWithMapper instanceof ObjectMapper objectMapper && isFlatteningCandidate(subobjects, objectMapper)) { - // An existing mapping with `subobjects` set to false or auto is merged with a mapping with sub-objects - objectMapper.asFlattenedFieldMappers(context, throwOnFlattenableError(subobjects)) + if (subobjects.isPresent() + && subobjects.get() == Subobjects.DISABLED + && mergeWithMapper instanceof ObjectMapper objectMapper) { + // An existing mapping that has set `subobjects: false` is merged with a mapping with sub-objects + objectMapper.asFlattenedFieldMappers(objectMergeContext.getMapperBuilderContext()) .stream() .filter(m -> objectMergeContext.decrementFieldBudgetIfPossible(m.getTotalFieldsCount())) .forEach(m -> putMergedMapper(mergedMappers, m)); @@ -740,83 +700,57 @@ private static ObjectMapper truncateObjectMapper(MapperMergeContext context, Obj * * @throws IllegalArgumentException if the mapper cannot be flattened */ - List asFlattenedFieldMappers(MapperBuilderContext context, boolean throwOnFlattenableError) { - List flattenedMappers = new ArrayList<>(); + List asFlattenedFieldMappers(MapperBuilderContext context) { + List flattenedMappers = new ArrayList<>(); ContentPath path = new ContentPath(); - asFlattenedFieldMappers(context, flattenedMappers, path, throwOnFlattenableError); + asFlattenedFieldMappers(context, flattenedMappers, path); return flattenedMappers; } - static boolean isFlatteningCandidate(Optional subobjects, ObjectMapper mapper) { - return subobjects.isPresent() && subobjects.get() != Subobjects.ENABLED && mapper instanceof NestedObjectMapper == false; - } - - private static boolean throwOnFlattenableError(Optional subobjects) { - return subobjects.isPresent() && subobjects.get() == Subobjects.DISABLED; - } - - private void asFlattenedFieldMappers( - MapperBuilderContext context, - List flattenedMappers, - ContentPath path, - boolean throwOnFlattenableError - ) { - var error = checkFlattenable(context); - if (error.isPresent()) { - if (throwOnFlattenableError) { - throw new IllegalArgumentException( - "Object mapper [" - + path.pathAsText(leafName()) - + "] was found in a context where subobjects is set to false. " - + "Auto-flattening [" - + path.pathAsText(leafName()) - + "] failed because " - + error.get() - ); - } - // The object can't be auto-flattened under the parent object, so it gets added at the current level. - // [subobjects=auto] applies auto-flattening to names, so the leaf name may need to change. - // Since mapper objects are immutable, we create a clone of the current one with the updated leaf name. - flattenedMappers.add( - path.pathAsText("").isEmpty() - ? this - : new ObjectMapper(path.pathAsText(leafName()), fullPath, enabled, subobjects, storeArraySource, dynamic, mappers) - ); - return; - } + private void asFlattenedFieldMappers(MapperBuilderContext context, List flattenedMappers, ContentPath path) { + ensureFlattenable(context, path); path.add(leafName()); for (Mapper mapper : mappers.values()) { if (mapper instanceof FieldMapper fieldMapper) { FieldMapper.Builder fieldBuilder = fieldMapper.getMergeBuilder(); fieldBuilder.setLeafName(path.pathAsText(mapper.leafName())); flattenedMappers.add(fieldBuilder.build(context)); - } else if (mapper instanceof ObjectMapper objectMapper && mapper instanceof NestedObjectMapper == false) { - objectMapper.asFlattenedFieldMappers(context, flattenedMappers, path, throwOnFlattenableError); + } else if (mapper instanceof ObjectMapper objectMapper) { + objectMapper.asFlattenedFieldMappers(context, flattenedMappers, path); } } path.remove(); } - Optional checkFlattenable(MapperBuilderContext context) { - if (dynamic != null && (context == null || context.getDynamic() != dynamic)) { - return Optional.of( + private void ensureFlattenable(MapperBuilderContext context, ContentPath path) { + if (dynamic != null && context.getDynamic() != dynamic) { + throwAutoFlatteningException( + path, "the value of [dynamic] (" + dynamic + ") is not compatible with the value from its parent context (" - + (context != null ? context.getDynamic() : "") + + context.getDynamic() + ")" ); } - if (storeArraySource()) { - return Optional.of("the value of [store_array_source] is [true]"); - } if (isEnabled() == false) { - return Optional.of("the value of [enabled] is [false]"); + throwAutoFlatteningException(path, "the value of [enabled] is [false]"); } - if (subobjects.isPresent() && subobjects.get() != Subobjects.DISABLED) { - return Optional.of("the value of [subobjects] is [" + subobjects().printedValue + "]"); + if (subobjects.isPresent() && subobjects.get() == Subobjects.ENABLED) { + throwAutoFlatteningException(path, "the value of [subobjects] is [true]"); } - return Optional.empty(); + } + + private void throwAutoFlatteningException(ContentPath path, String reason) { + throw new IllegalArgumentException( + "Object mapper [" + + path.pathAsText(leafName()) + + "] was found in a context where subobjects is set to false. " + + "Auto-flattening [" + + path.pathAsText(leafName()) + + "] failed because " + + reason + ); } @Override diff --git a/server/src/main/java/org/elasticsearch/index/query/InnerHitBuilder.java b/server/src/main/java/org/elasticsearch/index/query/InnerHitBuilder.java index 4c861c2320ea5..806f28d72647a 100644 --- a/server/src/main/java/org/elasticsearch/index/query/InnerHitBuilder.java +++ b/server/src/main/java/org/elasticsearch/index/query/InnerHitBuilder.java @@ -50,9 +50,9 @@ public final class InnerHitBuilder implements Writeable, ToXContentObject { public static final ParseField COLLAPSE_FIELD = new ParseField("collapse"); public static final ParseField FIELD_FIELD = new ParseField("field"); + public static final int DEFAULT_FROM = 0; + public static final int DEFAULT_SIZE = 3; private static final boolean DEFAULT_IGNORE_UNAMPPED = false; - private static final int DEFAULT_FROM = 0; - private static final int DEFAULT_SIZE = 3; private static final boolean DEFAULT_VERSION = false; private static final boolean DEFAULT_SEQ_NO_AND_PRIMARY_TERM = false; private static final boolean DEFAULT_EXPLAIN = false; diff --git a/server/src/main/java/org/elasticsearch/index/query/SearchIndexNameMatcher.java b/server/src/main/java/org/elasticsearch/index/query/SearchIndexNameMatcher.java index 9e34093776fb2..6799895d8e278 100644 --- a/server/src/main/java/org/elasticsearch/index/query/SearchIndexNameMatcher.java +++ b/server/src/main/java/org/elasticsearch/index/query/SearchIndexNameMatcher.java @@ -53,14 +53,12 @@ public SearchIndexNameMatcher( * the separator ':', and must match on both the cluster alias and index name. */ public boolean test(String pattern) { - int separatorIndex = pattern.indexOf(RemoteClusterAware.REMOTE_CLUSTER_INDEX_SEPARATOR); - if (separatorIndex < 0) { + String[] splitIndex = RemoteClusterAware.splitIndexName(pattern); + + if (splitIndex[0] == null) { return clusterAlias == null && matchesIndex(pattern); } else { - String clusterPattern = pattern.substring(0, separatorIndex); - String indexPattern = pattern.substring(separatorIndex + 1); - - return Regex.simpleMatch(clusterPattern, clusterAlias) && matchesIndex(indexPattern); + return Regex.simpleMatch(splitIndex[0], clusterAlias) && matchesIndex(splitIndex[1]); } } diff --git a/server/src/main/java/org/elasticsearch/index/snapshots/blobstore/SlicedInputStream.java b/server/src/main/java/org/elasticsearch/index/snapshots/blobstore/SlicedInputStream.java index 92e71c08cf056..1edd69a6443a7 100644 --- a/server/src/main/java/org/elasticsearch/index/snapshots/blobstore/SlicedInputStream.java +++ b/server/src/main/java/org/elasticsearch/index/snapshots/blobstore/SlicedInputStream.java @@ -24,12 +24,12 @@ public abstract class SlicedInputStream extends InputStream { private int nextSlice = 0; private InputStream currentStream; - private int currentSliceOffset = 0; + private long currentSliceOffset = 0; private final int numSlices; private boolean closed = false; private boolean initialized = false; private int markedSlice = -1; - private int markedSliceOffset = -1; + private long markedSliceOffset = -1; /** * Creates a new SlicedInputStream @@ -98,6 +98,30 @@ public final int read(byte[] buffer, int offset, int length) throws IOException return read; } + @Override + public long skip(long n) throws IOException { + long remaining = n; + while (remaining > 0) { + final InputStream stream = currentStream(); + if (stream == null) { + break; + } + long skipped = stream.skip(remaining); + currentSliceOffset += skipped; + if (skipped < remaining) { + // read one more byte to see if we reached EOF in order to proceed to the next stream. + if (stream.read() < 0) { + nextStream(); + } else { + currentSliceOffset++; + skipped++; + } + } + remaining -= skipped; + } + return n - remaining; + } + @Override public void close() throws IOException { closed = true; @@ -129,7 +153,7 @@ public void mark(int readLimit) { // According to JDK documentation, marking a closed InputStream should have no effect. if (markSupported() && isClosed() == false && numSlices > 0) { if (initialized) { - markedSlice = nextSlice - 1; + markedSlice = (currentStream == null) ? numSlices : nextSlice - 1; markedSliceOffset = currentSliceOffset; } else { markedSlice = 0; @@ -148,12 +172,16 @@ public void reset() throws IOException { throw new IOException("Mark has not been set"); } - // We do not call the SlicedInputStream's skipNBytes but call skipNBytes directly on the returned stream, to ensure that - // the skip is performed on the marked slice and no other slices are involved. This may help uncover any bugs. nextSlice = markedSlice; - final InputStream stream = nextStream(); - if (stream != null) { - stream.skipNBytes(markedSliceOffset); + initialized = true; + IOUtils.close(currentStream); + if (nextSlice < numSlices) { + currentStream = openSlice(nextSlice++); + // We do not call the SlicedInputStream's skipNBytes but call skipNBytes directly on the returned stream, to ensure that + // the skip is performed on the marked slice and no other slices are involved. This may help uncover any bugs. + currentStream.skipNBytes(markedSliceOffset); + } else { + currentStream = null; } currentSliceOffset = markedSliceOffset; } diff --git a/server/src/main/java/org/elasticsearch/monitor/metrics/IndicesMetrics.java b/server/src/main/java/org/elasticsearch/monitor/metrics/IndicesMetrics.java new file mode 100644 index 0000000000000..17e290283d5e0 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/monitor/metrics/IndicesMetrics.java @@ -0,0 +1,177 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.monitor.metrics; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.lucene.store.AlreadyClosedException; +import org.elasticsearch.cluster.routing.ShardRouting; +import org.elasticsearch.common.component.AbstractLifecycleComponent; +import org.elasticsearch.common.util.SingleObjectCache; +import org.elasticsearch.core.TimeValue; +import org.elasticsearch.index.IndexMode; +import org.elasticsearch.index.IndexService; +import org.elasticsearch.index.shard.IllegalIndexShardStateException; +import org.elasticsearch.index.shard.IndexShard; +import org.elasticsearch.indices.IndicesService; +import org.elasticsearch.telemetry.metric.LongWithAttributes; +import org.elasticsearch.telemetry.metric.MeterRegistry; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.EnumMap; +import java.util.List; +import java.util.Map; + +/** + * {@link IndicesMetrics} monitors index statistics on an Elasticsearch node and exposes them as metrics + * through the provided {@link MeterRegistry}. It tracks the current total number of indices, document count, and + * store size (in bytes) for each index mode. + */ +public class IndicesMetrics extends AbstractLifecycleComponent { + private final Logger logger = LogManager.getLogger(IndicesMetrics.class); + private final MeterRegistry registry; + private final List metrics = new ArrayList<>(); + private final IndicesStatsCache stateCache; + + public IndicesMetrics(MeterRegistry meterRegistry, IndicesService indicesService, TimeValue metricsInterval) { + this.registry = meterRegistry; + // Use half of the update interval to ensure that results aren't cached across updates, + // while preventing the cache from expiring when reading different gauges within the same update. + var cacheExpiry = new TimeValue(metricsInterval.getMillis() / 2); + this.stateCache = new IndicesStatsCache(indicesService, cacheExpiry); + } + + private static List registerAsyncMetrics(MeterRegistry registry, IndicesStatsCache cache) { + List metrics = new ArrayList<>(IndexMode.values().length * 3); + assert IndexMode.values().length == 3 : "index modes have changed"; + for (IndexMode indexMode : IndexMode.values()) { + String name = indexMode.getName(); + metrics.add( + registry.registerLongGauge( + "es.indices." + name + ".total", + "total number of " + name + " indices", + "unit", + () -> new LongWithAttributes(cache.getOrRefresh().get(indexMode).numIndices) + ) + ); + metrics.add( + registry.registerLongGauge( + "es.indices." + name + ".docs.total", + "total documents of " + name + " indices", + "unit", + () -> new LongWithAttributes(cache.getOrRefresh().get(indexMode).numDocs) + ) + ); + metrics.add( + registry.registerLongGauge( + "es.indices." + name + ".bytes.total", + "total size in bytes of " + name + " indices", + "unit", + () -> new LongWithAttributes(cache.getOrRefresh().get(indexMode).numBytes) + ) + ); + } + return metrics; + } + + @Override + protected void doStart() { + metrics.addAll(registerAsyncMetrics(registry, stateCache)); + } + + @Override + protected void doStop() { + stateCache.stopRefreshing(); + } + + @Override + protected void doClose() throws IOException { + metrics.forEach(metric -> { + try { + metric.close(); + } catch (Exception e) { + logger.warn("metrics close() method should not throw Exception", e); + } + }); + } + + static class IndexStats { + int numIndices = 0; + long numDocs = 0; + long numBytes = 0; + } + + private static class IndicesStatsCache extends SingleObjectCache> { + private static final Map MISSING_STATS; + static { + MISSING_STATS = new EnumMap<>(IndexMode.class); + for (IndexMode value : IndexMode.values()) { + MISSING_STATS.put(value, new IndexStats()); + } + } + + private boolean refresh; + private final IndicesService indicesService; + + IndicesStatsCache(IndicesService indicesService, TimeValue interval) { + super(interval, MISSING_STATS); + this.indicesService = indicesService; + this.refresh = true; + } + + private Map internalGetIndicesStats() { + Map stats = new EnumMap<>(IndexMode.class); + for (IndexMode mode : IndexMode.values()) { + stats.put(mode, new IndexStats()); + } + for (IndexService indexService : indicesService) { + for (IndexShard indexShard : indexService) { + if (indexShard.isSystem()) { + continue; // skip system indices + } + final ShardRouting shardRouting = indexShard.routingEntry(); + if (shardRouting.primary() == false) { + continue; // count primaries only + } + if (shardRouting.recoverySource() != null) { + continue; // exclude relocating shards + } + final IndexMode indexMode = indexShard.indexSettings().getMode(); + final IndexStats indexStats = stats.get(indexMode); + if (shardRouting.shardId().id() == 0) { + indexStats.numIndices++; + } + try { + indexStats.numDocs += indexShard.commitStats().getNumDocs(); + indexStats.numBytes += indexShard.storeStats().sizeInBytes(); + } catch (IllegalIndexShardStateException | AlreadyClosedException ignored) { + // ignored + } + } + } + return stats; + } + + @Override + protected Map refresh() { + return refresh ? internalGetIndicesStats() : getNoRefresh(); + } + + @Override + protected boolean needsRefresh() { + return getNoRefresh() == MISSING_STATS || super.needsRefresh(); + } + + void stopRefreshing() { + this.refresh = false; + } + } +} diff --git a/server/src/main/java/org/elasticsearch/node/Node.java b/server/src/main/java/org/elasticsearch/node/Node.java index 1447ac1c5b59b..5024cc5468866 100644 --- a/server/src/main/java/org/elasticsearch/node/Node.java +++ b/server/src/main/java/org/elasticsearch/node/Node.java @@ -66,6 +66,7 @@ import org.elasticsearch.injection.guice.Injector; import org.elasticsearch.monitor.fs.FsHealthService; import org.elasticsearch.monitor.jvm.JvmInfo; +import org.elasticsearch.monitor.metrics.IndicesMetrics; import org.elasticsearch.monitor.metrics.NodeMetrics; import org.elasticsearch.node.internal.TerminationHandler; import org.elasticsearch.plugins.ClusterCoordinationPlugin; @@ -441,6 +442,7 @@ public void onTimeout(TimeValue timeout) { } injector.getInstance(NodeMetrics.class).start(); + injector.getInstance(IndicesMetrics.class).start(); injector.getInstance(HealthPeriodicLogger.class).start(); logger.info("started {}", transportService.getLocalNode()); @@ -489,6 +491,7 @@ private void stop() { stopIfStarted(SearchService.class); stopIfStarted(TransportService.class); stopIfStarted(NodeMetrics.class); + stopIfStarted(IndicesMetrics.class); pluginLifecycleComponents.forEach(Node::stopIfStarted); // we should stop this last since it waits for resources to get released @@ -558,6 +561,7 @@ public synchronized void close() throws IOException { toClose.add(() -> stopWatch.stop().start("transport")); toClose.add(injector.getInstance(TransportService.class)); toClose.add(injector.getInstance(NodeMetrics.class)); + toClose.add(injector.getInstance(IndicesService.class)); if (ReadinessService.enabled(environment)) { toClose.add(injector.getInstance(ReadinessService.class)); } diff --git a/server/src/main/java/org/elasticsearch/node/NodeConstruction.java b/server/src/main/java/org/elasticsearch/node/NodeConstruction.java index c4816b440f568..b3c95186b6037 100644 --- a/server/src/main/java/org/elasticsearch/node/NodeConstruction.java +++ b/server/src/main/java/org/elasticsearch/node/NodeConstruction.java @@ -141,6 +141,7 @@ import org.elasticsearch.monitor.MonitorService; import org.elasticsearch.monitor.fs.FsHealthService; import org.elasticsearch.monitor.jvm.JvmInfo; +import org.elasticsearch.monitor.metrics.IndicesMetrics; import org.elasticsearch.monitor.metrics.NodeMetrics; import org.elasticsearch.node.internal.TerminationHandler; import org.elasticsearch.node.internal.TerminationHandlerProvider; @@ -1063,6 +1064,7 @@ private void construct( final TimeValue metricsInterval = settings.getAsTime("telemetry.agent.metrics_interval", TimeValue.timeValueSeconds(10)); final NodeMetrics nodeMetrics = new NodeMetrics(telemetryProvider.getMeterRegistry(), nodeService, metricsInterval); + final IndicesMetrics indicesMetrics = new IndicesMetrics(telemetryProvider.getMeterRegistry(), indicesService, metricsInterval); final SearchService searchService = serviceProvider.newSearchService( pluginsService, @@ -1162,6 +1164,7 @@ private void construct( b.bind(Transport.class).toInstance(transport); b.bind(TransportService.class).toInstance(transportService); b.bind(NodeMetrics.class).toInstance(nodeMetrics); + b.bind(IndicesMetrics.class).toInstance(indicesMetrics); b.bind(NetworkService.class).toInstance(networkService); b.bind(IndexMetadataVerifier.class).toInstance(indexMetadataVerifier); b.bind(ClusterInfoService.class).toInstance(clusterInfoService); diff --git a/server/src/main/java/org/elasticsearch/rest/RestHandler.java b/server/src/main/java/org/elasticsearch/rest/RestHandler.java index b7dbe09db4a39..ede295fee9f4d 100644 --- a/server/src/main/java/org/elasticsearch/rest/RestHandler.java +++ b/server/src/main/java/org/elasticsearch/rest/RestHandler.java @@ -195,8 +195,9 @@ private RouteBuilder(Method method, String path) { } /** - * Marks that the route being built has been deprecated (for some reason -- the deprecationMessage), and notes the major - * version in which that deprecation occurred. + * Marks that the route being built has been deprecated (for some reason -- the deprecationMessage) for removal. Notes the last + * major version in which the path is fully supported without compatibility headers. If this path is being replaced by another + * then use {@link #replaces(Method, String, RestApiVersion)} instead. *

* For example: *

 {@code
@@ -205,55 +206,57 @@ private RouteBuilder(Method method, String path) {
              *  .build()}
* * @param deprecationMessage the user-visible explanation of this deprecation - * @param deprecatedInVersion the major version in which the deprecation occurred + * @param lastFullySupportedVersion the last {@link RestApiVersion} (i.e. 7) for which this route is fully supported. + * The next major version (i.e. 8) will require compatibility header(s). (;compatible-with=7) + * The next major version (i.e. 9) will have no support whatsoever for this route. * @return a reference to this object. */ - public RouteBuilder deprecated(String deprecationMessage, RestApiVersion deprecatedInVersion) { + public RouteBuilder deprecated(String deprecationMessage, RestApiVersion lastFullySupportedVersion) { assert this.replacedRoute == null; - this.restApiVersion = Objects.requireNonNull(deprecatedInVersion); + this.restApiVersion = Objects.requireNonNull(lastFullySupportedVersion); this.deprecationMessage = Objects.requireNonNull(deprecationMessage); return this; } /** - * Marks that the route being built has been deprecated (for some reason -- the deprecationMessage), and notes the major - * version in which that deprecation occurred. + * Marks that the route being built replaces another route, and notes the last major version in which the path is fully + * supported without compatibility headers. *

* For example: *

 {@code
-             * Route.builder(GET, "_upgrade")
-             *  .deprecated("The _upgrade API is no longer useful and will be removed.", RestApiVersion.V_7)
-             *  .build()}
+ * Route.builder(GET, "/_security/user/") + * .replaces(GET, "/_xpack/security/user/", RestApiVersion.V_7).build()} * - * @param deprecationMessage the user-visible explanation of this deprecation - * @param deprecationLevel the level at which to log the deprecation - * @param deprecatedInVersion the major version in which the deprecation occurred + * @param method the method being replaced + * @param path the path being replaced + * @param lastFullySupportedVersion the last {@link RestApiVersion} (i.e. 7) for which this route is fully supported. + * The next major version (i.e. 8) will require compatibility header(s). (;compatible-with=7) + * The next major version (i.e. 9) will have no support whatsoever for this route. * @return a reference to this object. */ - public RouteBuilder deprecated(String deprecationMessage, Level deprecationLevel, RestApiVersion deprecatedInVersion) { - assert this.replacedRoute == null; - this.restApiVersion = Objects.requireNonNull(deprecatedInVersion); - this.deprecationMessage = Objects.requireNonNull(deprecationMessage); - this.deprecationLevel = deprecationLevel; + public RouteBuilder replaces(Method method, String path, RestApiVersion lastFullySupportedVersion) { + assert this.deprecationMessage == null; + this.replacedRoute = new Route(method, path, lastFullySupportedVersion, null, null, null); return this; } /** - * Marks that the route being built replaces another route, and notes the major version in which that replacement occurred. + * Marks that the route being built has been deprecated (for some reason -- the deprecationMessage), but will not be removed. *

* For example: *

 {@code
-             * Route.builder(GET, "/_security/user/")
-             *   .replaces(GET, "/_xpack/security/user/", RestApiVersion.V_7).build()}
+ * Route.builder(GET, "_upgrade") + * .deprecated("The _upgrade API is no longer useful but will not be removed.") + * .build()} * - * @param method the method being replaced - * @param path the path being replaced - * @param replacedInVersion the major version in which the replacement occurred + * @param deprecationMessage the user-visible explanation of this deprecation * @return a reference to this object. */ - public RouteBuilder replaces(Method method, String path, RestApiVersion replacedInVersion) { - assert this.deprecationMessage == null; - this.replacedRoute = new Route(method, path, replacedInVersion, null, null, null); + public RouteBuilder deprecateAndKeep(String deprecationMessage) { + assert this.replacedRoute == null; + this.restApiVersion = RestApiVersion.current(); + this.deprecationMessage = Objects.requireNonNull(deprecationMessage); + this.deprecationLevel = Level.WARN; return this; } diff --git a/server/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestClusterStatsAction.java b/server/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestClusterStatsAction.java index 603dcdba86730..53ae50bc0b75f 100644 --- a/server/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestClusterStatsAction.java +++ b/server/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestClusterStatsAction.java @@ -11,6 +11,8 @@ import org.elasticsearch.action.admin.cluster.stats.ClusterStatsRequest; import org.elasticsearch.client.internal.node.NodeClient; +import org.elasticsearch.common.util.FeatureFlag; +import org.elasticsearch.common.util.set.Sets; import org.elasticsearch.rest.BaseRestHandler; import org.elasticsearch.rest.RestRequest; import org.elasticsearch.rest.Scope; @@ -29,6 +31,8 @@ public class RestClusterStatsAction extends BaseRestHandler { private static final Set SUPPORTED_CAPABILITIES = Set.of("human-readable-total-docs-size"); + private static final Set SUPPORTED_CAPABILITIES_CCS_STATS = Sets.union(SUPPORTED_CAPABILITIES, Set.of("ccs-stats")); + public static final FeatureFlag CCS_TELEMETRY_FEATURE_FLAG = new FeatureFlag("ccs_telemetry"); @Override public List routes() { @@ -40,9 +44,17 @@ public String getName() { return "cluster_stats_action"; } + @Override + public Set supportedQueryParameters() { + return Set.of("include_remotes", "nodeId"); + } + @Override public RestChannelConsumer prepareRequest(final RestRequest request, final NodeClient client) throws IOException { - ClusterStatsRequest clusterStatsRequest = new ClusterStatsRequest(request.paramAsStringArray("nodeId", null)); + ClusterStatsRequest clusterStatsRequest = new ClusterStatsRequest( + request.paramAsBoolean("include_remotes", false), + request.paramAsStringArray("nodeId", null) + ); clusterStatsRequest.timeout(getTimeout(request)); return channel -> new RestCancellableNodeClient(client, request.getHttpChannel()).admin() .cluster() @@ -56,6 +68,6 @@ public boolean canTripCircuitBreaker() { @Override public Set supportedCapabilities() { - return SUPPORTED_CAPABILITIES; + return CCS_TELEMETRY_FEATURE_FLAG.isEnabled() ? SUPPORTED_CAPABILITIES_CCS_STATS : SUPPORTED_CAPABILITIES; } } diff --git a/server/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestNodesCapabilitiesAction.java b/server/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestNodesCapabilitiesAction.java index ad405656631b0..5c8e5928678c3 100644 --- a/server/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestNodesCapabilitiesAction.java +++ b/server/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestNodesCapabilitiesAction.java @@ -54,9 +54,12 @@ protected RestChannelConsumer prepareRequest(RestRequest request, NodeClient cli ? new NodesCapabilitiesRequest(client.getLocalNodeId()) : new NodesCapabilitiesRequest(); + // Handle the 'path' parameter, use "/" as default if not provided + String path = URLDecoder.decode(request.param("path", "/"), StandardCharsets.UTF_8); + NodesCapabilitiesRequest r = requestNodes.timeout(getTimeout(request)) .method(RestRequest.Method.valueOf(request.param("method", "GET"))) - .path(URLDecoder.decode(request.param("path"), StandardCharsets.UTF_8)) + .path(path) .parameters(request.paramAsStringArray("parameters", Strings.EMPTY_ARRAY)) .capabilities(request.paramAsStringArray("capabilities", Strings.EMPTY_ARRAY)) .restApiVersion(request.getRestApiVersion()); diff --git a/server/src/main/java/org/elasticsearch/rest/action/admin/indices/RestPutIndexTemplateAction.java b/server/src/main/java/org/elasticsearch/rest/action/admin/indices/RestPutIndexTemplateAction.java index 362713a8f48cb..f70d9351e69c9 100644 --- a/server/src/main/java/org/elasticsearch/rest/action/admin/indices/RestPutIndexTemplateAction.java +++ b/server/src/main/java/org/elasticsearch/rest/action/admin/indices/RestPutIndexTemplateAction.java @@ -9,7 +9,6 @@ package org.elasticsearch.rest.action.admin.indices; -import org.apache.logging.log4j.Level; import org.elasticsearch.action.admin.indices.template.put.PutIndexTemplateRequest; import org.elasticsearch.client.internal.node.NodeClient; import org.elasticsearch.common.Strings; @@ -41,8 +40,8 @@ public class RestPutIndexTemplateAction extends BaseRestHandler { @Override public List routes() { return List.of( - Route.builder(POST, "/_template/{name}").deprecated(DEPRECATION_WARNING, Level.WARN, DEPRECATION_VERSION).build(), - Route.builder(PUT, "/_template/{name}").deprecated(DEPRECATION_WARNING, Level.WARN, DEPRECATION_VERSION).build() + Route.builder(POST, "/_template/{name}").deprecateAndKeep(DEPRECATION_WARNING).build(), + Route.builder(PUT, "/_template/{name}").deprecateAndKeep(DEPRECATION_WARNING).build() ); } diff --git a/server/src/main/java/org/elasticsearch/rest/action/search/RestKnnSearchAction.java b/server/src/main/java/org/elasticsearch/rest/action/search/RestKnnSearchAction.java index dd868b8321f1d..9b9be199eedae 100644 --- a/server/src/main/java/org/elasticsearch/rest/action/search/RestKnnSearchAction.java +++ b/server/src/main/java/org/elasticsearch/rest/action/search/RestKnnSearchAction.java @@ -11,7 +11,7 @@ import org.elasticsearch.action.search.SearchRequestBuilder; import org.elasticsearch.client.internal.node.NodeClient; -import org.elasticsearch.core.RestApiVersion; +import org.elasticsearch.core.UpdateForV9; import org.elasticsearch.rest.BaseRestHandler; import org.elasticsearch.rest.RestRequest; import org.elasticsearch.rest.action.RestCancellableNodeClient; @@ -34,11 +34,18 @@ public class RestKnnSearchAction extends BaseRestHandler { public RestKnnSearchAction() {} + @UpdateForV9 // these routes were ".deprecated" in RestApiVersion.V_8 which will require use of REST API compatibility headers to access + // this API in v9. It is unclear if this was intentional for v9, and the code has been updated to ".deprecateAndKeep" which will + // continue to emit deprecations warnings but will not require any special headers to access the API in v9. + // Please review and update the code and tests as needed. The original code remains commented out below for reference. @Override public List routes() { + return List.of( - Route.builder(GET, "{index}/_knn_search").deprecated(DEPRECATION_MESSAGE, RestApiVersion.V_8).build(), - Route.builder(POST, "{index}/_knn_search").deprecated(DEPRECATION_MESSAGE, RestApiVersion.V_8).build() + // Route.builder(GET, "{index}/_knn_search").deprecated(DEPRECATION_MESSAGE, RestApiVersion.V_8).build(), + // Route.builder(POST, "{index}/_knn_search").deprecated(DEPRECATION_MESSAGE, RestApiVersion.V_8).build() + Route.builder(GET, "{index}/_knn_search").deprecateAndKeep(DEPRECATION_MESSAGE).build(), + Route.builder(POST, "{index}/_knn_search").deprecateAndKeep(DEPRECATION_MESSAGE).build() ); } diff --git a/server/src/main/java/org/elasticsearch/search/SearchSortValues.java b/server/src/main/java/org/elasticsearch/search/SearchSortValues.java index eb9fa935d2374..d134d0664e3c7 100644 --- a/server/src/main/java/org/elasticsearch/search/SearchSortValues.java +++ b/server/src/main/java/org/elasticsearch/search/SearchSortValues.java @@ -13,11 +13,9 @@ import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.io.stream.Writeable; import org.elasticsearch.common.lucene.Lucene; -import org.elasticsearch.common.xcontent.XContentParserUtils; import org.elasticsearch.search.SearchHit.Fields; import org.elasticsearch.xcontent.ToXContentFragment; import org.elasticsearch.xcontent.XContentBuilder; -import org.elasticsearch.xcontent.XContentParser; import java.io.IOException; import java.util.Arrays; @@ -84,11 +82,6 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws return builder; } - public static SearchSortValues fromXContent(XContentParser parser) throws IOException { - XContentParserUtils.ensureExpectedToken(XContentParser.Token.START_ARRAY, parser.currentToken(), parser); - return new SearchSortValues(parser.list().toArray()); - } - /** * Returns the formatted version of the values that sorting was performed against */ diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/InternalAggregations.java b/server/src/main/java/org/elasticsearch/search/aggregations/InternalAggregations.java index a5a7cdeaaae5c..9ed62add775c0 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/InternalAggregations.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/InternalAggregations.java @@ -8,8 +8,6 @@ */ package org.elasticsearch.search.aggregations; -import org.apache.lucene.util.SetOnce; -import org.elasticsearch.common.ParsingException; import org.elasticsearch.common.io.stream.DelayableWriteable; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; @@ -22,21 +20,18 @@ import org.elasticsearch.search.sort.SortValue; import org.elasticsearch.xcontent.ToXContentFragment; import org.elasticsearch.xcontent.XContentBuilder; -import org.elasticsearch.xcontent.XContentParser; import java.io.IOException; import java.util.AbstractList; import java.util.ArrayList; import java.util.Iterator; import java.util.List; -import java.util.Locale; import java.util.Map; import java.util.Objects; import java.util.Optional; import java.util.stream.Collectors; import static java.util.Collections.unmodifiableMap; -import static org.elasticsearch.common.xcontent.XContentParserUtils.parseTypedKeysObject; /** * Represents a set of {@link InternalAggregation}s @@ -52,7 +47,7 @@ public final class InternalAggregations implements Iterable /** * Constructs a new aggregation. */ - private InternalAggregations(List aggregations) { + public InternalAggregations(List aggregations) { this.aggregations = aggregations; if (aggregations.isEmpty()) { aggregationsAsMap = Map.of(); @@ -126,27 +121,6 @@ public XContentBuilder toXContentInternal(XContentBuilder builder, Params params return builder; } - public static InternalAggregations fromXContent(XContentParser parser) throws IOException { - final List aggregations = new ArrayList<>(); - XContentParser.Token token; - while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { - if (token == XContentParser.Token.START_OBJECT) { - SetOnce typedAgg = new SetOnce<>(); - String currentField = parser.currentName(); - parseTypedKeysObject(parser, Aggregation.TYPED_KEYS_DELIMITER, InternalAggregation.class, typedAgg::set); - if (typedAgg.get() != null) { - aggregations.add(typedAgg.get()); - } else { - throw new ParsingException( - parser.getTokenLocation(), - String.format(Locale.ROOT, "Could not parse aggregation keyed as [%s]", currentField) - ); - } - } - } - return new InternalAggregations(aggregations); - } - public static InternalAggregations from(List aggregations) { if (aggregations.isEmpty()) { return EMPTY; diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/pipeline/MovAvgPipelineAggregationBuilder.java b/server/src/main/java/org/elasticsearch/search/aggregations/pipeline/MovAvgPipelineAggregationBuilder.java index d163cafaffe2e..8326342df09f2 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/pipeline/MovAvgPipelineAggregationBuilder.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/pipeline/MovAvgPipelineAggregationBuilder.java @@ -16,6 +16,7 @@ import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.logging.DeprecationLogger; import org.elasticsearch.core.RestApiVersion; +import org.elasticsearch.core.UpdateForV9; import org.elasticsearch.index.query.CommonTermsQueryBuilder; import org.elasticsearch.xcontent.ContextParser; import org.elasticsearch.xcontent.ParseField; @@ -32,6 +33,7 @@ * * @deprecated Only for 7.x rest compat */ +@UpdateForV9 // remove this since it's only for 7.x compat and 7.x compat will be removed in 9.0 @Deprecated public class MovAvgPipelineAggregationBuilder extends AbstractPipelineAggregationBuilder { private static final DeprecationLogger deprecationLogger = DeprecationLogger.getLogger(CommonTermsQueryBuilder.class); diff --git a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightField.java b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightField.java index 7aa4126cf9b35..9516465309c39 100644 --- a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightField.java +++ b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightField.java @@ -9,23 +9,17 @@ package org.elasticsearch.search.fetch.subphase.highlight; -import org.elasticsearch.common.ParsingException; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.io.stream.Writeable; import org.elasticsearch.common.text.Text; import org.elasticsearch.xcontent.ToXContentFragment; import org.elasticsearch.xcontent.XContentBuilder; -import org.elasticsearch.xcontent.XContentParser; import java.io.IOException; -import java.util.ArrayList; import java.util.Arrays; -import java.util.List; import java.util.Objects; -import static org.elasticsearch.common.xcontent.XContentParserUtils.ensureExpectedToken; - /** * A field highlighted with its highlighted fragments. */ @@ -74,25 +68,6 @@ public void writeTo(StreamOutput out) throws IOException { } } - public static HighlightField fromXContent(XContentParser parser) throws IOException { - ensureExpectedToken(XContentParser.Token.FIELD_NAME, parser.currentToken(), parser); - String fieldName = parser.currentName(); - Text[] fragments; - XContentParser.Token token = parser.nextToken(); - if (token == XContentParser.Token.START_ARRAY) { - List values = new ArrayList<>(); - while (parser.nextToken() != XContentParser.Token.END_ARRAY) { - values.add(new Text(parser.text())); - } - fragments = values.toArray(Text.EMPTY_ARRAY); - } else if (token == XContentParser.Token.VALUE_NULL) { - fragments = null; - } else { - throw new ParsingException(parser.getTokenLocation(), "unexpected token type [" + token + "]"); - } - return new HighlightField(fieldName, fragments); - } - @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { builder.field(name); diff --git a/server/src/main/java/org/elasticsearch/search/profile/ProfileResult.java b/server/src/main/java/org/elasticsearch/search/profile/ProfileResult.java index c8af8671ec232..34049fc027ff0 100644 --- a/server/src/main/java/org/elasticsearch/search/profile/ProfileResult.java +++ b/server/src/main/java/org/elasticsearch/search/profile/ProfileResult.java @@ -14,11 +14,9 @@ import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.io.stream.Writeable; import org.elasticsearch.core.TimeValue; -import org.elasticsearch.xcontent.InstantiatingObjectParser; import org.elasticsearch.xcontent.ParseField; import org.elasticsearch.xcontent.ToXContentObject; import org.elasticsearch.xcontent.XContentBuilder; -import org.elasticsearch.xcontent.XContentParser; import java.io.IOException; import java.util.Collections; @@ -27,22 +25,18 @@ import java.util.Objects; import java.util.concurrent.TimeUnit; -import static java.util.stream.Collectors.toMap; -import static org.elasticsearch.xcontent.ConstructingObjectParser.constructorArg; -import static org.elasticsearch.xcontent.ConstructingObjectParser.optionalConstructorArg; - /** * The result of a profiled *thing*, like a query or an aggregation. See * {@link AbstractProfiler} for the statistic collection framework. */ public final class ProfileResult implements Writeable, ToXContentObject { - static final ParseField TYPE = new ParseField("type"); - static final ParseField DESCRIPTION = new ParseField("description"); - static final ParseField BREAKDOWN = new ParseField("breakdown"); - static final ParseField DEBUG = new ParseField("debug"); + public static final ParseField TYPE = new ParseField("type"); + public static final ParseField DESCRIPTION = new ParseField("description"); + public static final ParseField BREAKDOWN = new ParseField("breakdown"); + public static final ParseField DEBUG = new ParseField("debug"); static final ParseField NODE_TIME = new ParseField("time"); - static final ParseField NODE_TIME_RAW = new ParseField("time_in_nanos"); - static final ParseField CHILDREN = new ParseField("children"); + public static final ParseField NODE_TIME_RAW = new ParseField("time_in_nanos"); + public static final ParseField CHILDREN = new ParseField("children"); private final String type; private final String description; @@ -181,28 +175,4 @@ public int hashCode() { public String toString() { return Strings.toString(this); } - - private static final InstantiatingObjectParser PARSER; - static { - InstantiatingObjectParser.Builder parser = InstantiatingObjectParser.builder( - "profile_result", - true, - ProfileResult.class - ); - parser.declareString(constructorArg(), TYPE); - parser.declareString(constructorArg(), DESCRIPTION); - parser.declareObject( - constructorArg(), - (p, c) -> p.map().entrySet().stream().collect(toMap(Map.Entry::getKey, e -> ((Number) e.getValue()).longValue())), - BREAKDOWN - ); - parser.declareObject(optionalConstructorArg(), (p, c) -> p.map(), DEBUG); - parser.declareLong(constructorArg(), NODE_TIME_RAW); - parser.declareObjectArray(optionalConstructorArg(), (p, c) -> fromXContent(p), CHILDREN); - PARSER = parser.build(); - } - - public static ProfileResult fromXContent(XContentParser p) throws IOException { - return PARSER.parse(p, null); - } } diff --git a/server/src/main/java/org/elasticsearch/search/profile/SearchProfileResults.java b/server/src/main/java/org/elasticsearch/search/profile/SearchProfileResults.java index 5c4c7d2ea5574..8227cb5674809 100644 --- a/server/src/main/java/org/elasticsearch/search/profile/SearchProfileResults.java +++ b/server/src/main/java/org/elasticsearch/search/profile/SearchProfileResults.java @@ -16,6 +16,7 @@ import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.io.stream.Writeable; import org.elasticsearch.core.Nullable; +import org.elasticsearch.transport.RemoteClusterAware; import org.elasticsearch.xcontent.ToXContentFragment; import org.elasticsearch.xcontent.XContentBuilder; @@ -143,15 +144,10 @@ static ShardProfileId parseCompositeProfileShardId(String compositeId) { Matcher m = SHARD_ID_DECOMPOSITION.matcher(compositeId); if (m.find()) { String nodeId = m.group(1); - String indexName = m.group(2); + String[] tokens = RemoteClusterAware.splitIndexName(m.group(2)); + String cluster = tokens[0]; + String indexName = tokens[1]; int shardId = Integer.parseInt(m.group(3)); - String cluster = null; - if (indexName.contains(":")) { - // index names and cluster names cannot contain a ':', so this split should be accurate - String[] tokens = indexName.split(":", 2); - cluster = tokens[0]; - indexName = tokens[1]; - } return new ShardProfileId(nodeId, indexName, shardId, cluster); } else { assert false : "Unable to match input against expected pattern of [nodeId][indexName][shardId]. Input: " + compositeId; diff --git a/server/src/main/java/org/elasticsearch/search/profile/aggregation/AggregationProfileShardResult.java b/server/src/main/java/org/elasticsearch/search/profile/aggregation/AggregationProfileShardResult.java index 5223cf969ee04..9d309d34e34eb 100644 --- a/server/src/main/java/org/elasticsearch/search/profile/aggregation/AggregationProfileShardResult.java +++ b/server/src/main/java/org/elasticsearch/search/profile/aggregation/AggregationProfileShardResult.java @@ -16,15 +16,12 @@ import org.elasticsearch.search.profile.ProfileResult; import org.elasticsearch.xcontent.ToXContentFragment; import org.elasticsearch.xcontent.XContentBuilder; -import org.elasticsearch.xcontent.XContentParser; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.List; -import static org.elasticsearch.common.xcontent.XContentParserUtils.ensureExpectedToken; - /** * A container class to hold the profile results for a single shard in the request. * Contains a list of query profiles, a collector tree and a total rewrite tree. @@ -87,13 +84,4 @@ public String toString() { return Strings.toString(this); } - public static AggregationProfileShardResult fromXContent(XContentParser parser) throws IOException { - XContentParser.Token token = parser.currentToken(); - ensureExpectedToken(XContentParser.Token.START_ARRAY, token, parser); - List aggProfileResults = new ArrayList<>(); - while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { - aggProfileResults.add(ProfileResult.fromXContent(parser)); - } - return new AggregationProfileShardResult(aggProfileResults); - } } diff --git a/server/src/main/java/org/elasticsearch/search/profile/query/CollectorResult.java b/server/src/main/java/org/elasticsearch/search/profile/query/CollectorResult.java index 9b30b7e16b25d..637b2dbe0ba49 100644 --- a/server/src/main/java/org/elasticsearch/search/profile/query/CollectorResult.java +++ b/server/src/main/java/org/elasticsearch/search/profile/query/CollectorResult.java @@ -19,7 +19,6 @@ import org.elasticsearch.xcontent.ToXContent; import org.elasticsearch.xcontent.ToXContentObject; import org.elasticsearch.xcontent.XContentBuilder; -import org.elasticsearch.xcontent.XContentParser; import java.io.IOException; import java.util.ArrayList; @@ -27,8 +26,6 @@ import java.util.Objects; import java.util.concurrent.TimeUnit; -import static org.elasticsearch.common.xcontent.XContentParserUtils.ensureExpectedToken; - /** * Public interface and serialization container for profiled timings of the * Collectors used in the search. Children CollectorResult's may be @@ -43,11 +40,11 @@ public class CollectorResult extends ProfilerCollectorResult implements ToXConte public static final String REASON_AGGREGATION = "aggregation"; public static final String REASON_AGGREGATION_GLOBAL = "aggregation_global"; - private static final ParseField NAME = new ParseField("name"); - private static final ParseField REASON = new ParseField("reason"); - private static final ParseField TIME = new ParseField("time"); - private static final ParseField TIME_NANOS = new ParseField("time_in_nanos"); - private static final ParseField CHILDREN = new ParseField("children"); + public static final ParseField NAME = new ParseField("name"); + public static final ParseField REASON = new ParseField("reason"); + public static final ParseField TIME = new ParseField("time"); + public static final ParseField TIME_NANOS = new ParseField("time_in_nanos"); + public static final ParseField CHILDREN = new ParseField("children"); public CollectorResult(String collectorName, String reason, long time, List children) { super(collectorName, reason, time, new ArrayList<>(children)); @@ -119,41 +116,4 @@ public XContentBuilder toXContent(XContentBuilder builder, ToXContent.Params par return builder; } - public static CollectorResult fromXContent(XContentParser parser) throws IOException { - XContentParser.Token token = parser.currentToken(); - ensureExpectedToken(XContentParser.Token.START_OBJECT, token, parser); - String currentFieldName = null; - String name = null, reason = null; - long time = -1; - List children = new ArrayList<>(); - while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { - if (token == XContentParser.Token.FIELD_NAME) { - currentFieldName = parser.currentName(); - } else if (token.isValue()) { - if (NAME.match(currentFieldName, parser.getDeprecationHandler())) { - name = parser.text(); - } else if (REASON.match(currentFieldName, parser.getDeprecationHandler())) { - reason = parser.text(); - } else if (TIME.match(currentFieldName, parser.getDeprecationHandler())) { - // we need to consume this value, but we use the raw nanosecond value - parser.text(); - } else if (TIME_NANOS.match(currentFieldName, parser.getDeprecationHandler())) { - time = parser.longValue(); - } else { - parser.skipChildren(); - } - } else if (token == XContentParser.Token.START_ARRAY) { - if (CHILDREN.match(currentFieldName, parser.getDeprecationHandler())) { - while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { - children.add(CollectorResult.fromXContent(parser)); - } - } else { - parser.skipChildren(); - } - } else { - parser.skipChildren(); - } - } - return new CollectorResult(name, reason, time, children); - } } diff --git a/server/src/main/java/org/elasticsearch/transport/RemoteClusterAware.java b/server/src/main/java/org/elasticsearch/transport/RemoteClusterAware.java index 76b93a2f802ec..0d6b2cf45138b 100644 --- a/server/src/main/java/org/elasticsearch/transport/RemoteClusterAware.java +++ b/server/src/main/java/org/elasticsearch/transport/RemoteClusterAware.java @@ -53,6 +53,45 @@ protected static Set getEnabledRemoteClusters(final Settings settings) { return RemoteConnectionStrategy.getRemoteClusters(settings); } + /** + * Check whether the index expression represents remote index or not. + * The index name is assumed to be individual index (no commas) but can contain `-`, wildcards, + * datemath, remote cluster name and any other syntax permissible in index expression component. + */ + public static boolean isRemoteIndexName(String indexExpression) { + if (indexExpression.isEmpty() || indexExpression.charAt(0) == '<' || indexExpression.startsWith("-<")) { + // This is date math, but even if it is not, the remote can't start with '<'. + // Thus, whatever it is, this is definitely not a remote index. + return false; + } + // Note remote index name also can not start with ':' + return indexExpression.indexOf(RemoteClusterService.REMOTE_CLUSTER_INDEX_SEPARATOR) > 0; + } + + /** + * Split the index name into remote cluster alias and index name. + * The index expression is assumed to be individual index (no commas) but can contain `-`, wildcards, + * datemath, remote cluster name and any other syntax permissible in index expression component. + * There's no guarantee the components actually represent existing remote cluster or index, only + * rudimentary checks are done on the syntax. + */ + public static String[] splitIndexName(String indexExpression) { + if (indexExpression.isEmpty() || indexExpression.charAt(0) == '<' || indexExpression.startsWith("-<")) { + // This is date math, but even if it is not, the remote can't start with '<'. + // Thus, whatever it is, this is definitely not a remote index. + return new String[] { null, indexExpression }; + } + int i = indexExpression.indexOf(RemoteClusterService.REMOTE_CLUSTER_INDEX_SEPARATOR); + if (i == 0) { + throw new IllegalArgumentException("index name [" + indexExpression + "] is invalid because the remote part is empty"); + } + if (i < 0) { + return new String[] { null, indexExpression }; + } else { + return new String[] { indexExpression.substring(0, i), indexExpression.substring(i + 1) }; + } + } + /** * Groups indices per cluster by splitting remote cluster-alias, index-name pairs on {@link #REMOTE_CLUSTER_INDEX_SEPARATOR}. All * indices per cluster are collected as a list in the returned map keyed by the cluster alias. Local indices are grouped under @@ -77,18 +116,20 @@ protected Map> groupClusterIndices(Set remoteCluste for (String index : requestIndices) { // ensure that `index` is a remote name and not a datemath expression which includes ':' symbol // Remote names can not start with '<' so we are assuming that if the first character is '<' then it is a datemath expression. - boolean isDateMathExpression = (index.charAt(0) == '<' || index.startsWith("-<")); - int i = index.indexOf(RemoteClusterService.REMOTE_CLUSTER_INDEX_SEPARATOR); - if (isDateMathExpression == false && i >= 0) { + String[] split = splitIndexName(index); + if (split[0] != null) { if (isRemoteClusterClientEnabled == false) { assert remoteClusterNames.isEmpty() : remoteClusterNames; throw new IllegalArgumentException("node [" + nodeName + "] does not have the remote cluster client role enabled"); } - int startIdx = index.charAt(0) == '-' ? 1 : 0; - String remoteClusterName = index.substring(startIdx, i); - List clusters = ClusterNameExpressionResolver.resolveClusterNames(remoteClusterNames, remoteClusterName); - String indexName = index.substring(i + 1); - if (startIdx == 1) { + String remoteClusterName = split[0]; + String indexName = split[1]; + boolean isNegative = remoteClusterName.startsWith("-"); + List clusters = ClusterNameExpressionResolver.resolveClusterNames( + remoteClusterNames, + isNegative ? remoteClusterName.substring(1) : remoteClusterName + ); + if (isNegative) { if (indexName.equals("*") == false) { throw new IllegalArgumentException( Strings.format( diff --git a/server/src/main/java/org/elasticsearch/transport/RemoteClusterConnection.java b/server/src/main/java/org/elasticsearch/transport/RemoteClusterConnection.java index d0638fcf7a2de..f0cafb956457e 100644 --- a/server/src/main/java/org/elasticsearch/transport/RemoteClusterConnection.java +++ b/server/src/main/java/org/elasticsearch/transport/RemoteClusterConnection.java @@ -43,7 +43,7 @@ * {@link SniffConnectionStrategy#REMOTE_CONNECTIONS_PER_CLUSTER} until either all eligible nodes are exhausted or the maximum number of * connections per cluster has been reached. */ -final class RemoteClusterConnection implements Closeable { +public final class RemoteClusterConnection implements Closeable { private final TransportService transportService; private final RemoteConnectionManager remoteConnectionManager; @@ -99,7 +99,7 @@ void setSkipUnavailable(boolean skipUnavailable) { /** * Returns whether this cluster is configured to be skipped when unavailable */ - boolean isSkipUnavailable() { + public boolean isSkipUnavailable() { return skipUnavailable; } diff --git a/server/src/main/java/org/elasticsearch/transport/RemoteClusterService.java b/server/src/main/java/org/elasticsearch/transport/RemoteClusterService.java index f1afdfe1f186b..620b80e91cb45 100644 --- a/server/src/main/java/org/elasticsearch/transport/RemoteClusterService.java +++ b/server/src/main/java/org/elasticsearch/transport/RemoteClusterService.java @@ -277,7 +277,7 @@ public void maybeEnsureConnectedAndGetConnection( } } - RemoteClusterConnection getRemoteClusterConnection(String cluster) { + public RemoteClusterConnection getRemoteClusterConnection(String cluster) { if (enabled == false) { throw new IllegalArgumentException( "this node does not have the " + DiscoveryNodeRole.REMOTE_CLUSTER_CLIENT_ROLE.roleName() + " role" diff --git a/server/src/test/java/org/elasticsearch/action/admin/cluster/settings/ClusterUpdateSettingsResponseTests.java b/server/src/test/java/org/elasticsearch/action/admin/cluster/settings/ClusterUpdateSettingsResponseTests.java index 4eed3a642ca4d..f5d76dbc2bd2d 100644 --- a/server/src/test/java/org/elasticsearch/action/admin/cluster/settings/ClusterUpdateSettingsResponseTests.java +++ b/server/src/test/java/org/elasticsearch/action/admin/cluster/settings/ClusterUpdateSettingsResponseTests.java @@ -15,6 +15,7 @@ import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings.Builder; import org.elasticsearch.test.AbstractXContentSerializingTestCase; +import org.elasticsearch.test.rest.TestResponseParsers; import org.elasticsearch.xcontent.ConstructingObjectParser; import org.elasticsearch.xcontent.XContentParser; @@ -22,7 +23,6 @@ import java.util.Set; import java.util.function.Predicate; -import static org.elasticsearch.action.support.master.AcknowledgedResponse.declareAcknowledgedField; import static org.elasticsearch.xcontent.ConstructingObjectParser.constructorArg; public class ClusterUpdateSettingsResponseTests extends AbstractXContentSerializingTestCase { @@ -33,7 +33,7 @@ public class ClusterUpdateSettingsResponseTests extends AbstractXContentSerializ args -> new ClusterUpdateSettingsResponse((boolean) args[0], (Settings) args[1], (Settings) args[2]) ); static { - declareAcknowledgedField(PARSER); + TestResponseParsers.declareAcknowledgedField(PARSER); PARSER.declareObject(constructorArg(), (p, c) -> Settings.fromXContent(p), ClusterUpdateSettingsResponse.TRANSIENT); PARSER.declareObject(constructorArg(), (p, c) -> Settings.fromXContent(p), ClusterUpdateSettingsResponse.PERSISTENT); } diff --git a/server/src/test/java/org/elasticsearch/action/admin/cluster/settings/RestClusterGetSettingsResponseTests.java b/server/src/test/java/org/elasticsearch/action/admin/cluster/settings/RestClusterGetSettingsResponseTests.java index 8a9b7abc348a6..b22884de16a65 100644 --- a/server/src/test/java/org/elasticsearch/action/admin/cluster/settings/RestClusterGetSettingsResponseTests.java +++ b/server/src/test/java/org/elasticsearch/action/admin/cluster/settings/RestClusterGetSettingsResponseTests.java @@ -11,6 +11,7 @@ import org.elasticsearch.common.settings.Settings; import org.elasticsearch.test.AbstractXContentTestCase; +import org.elasticsearch.test.rest.TestResponseParsers; import org.elasticsearch.xcontent.XContentParser; import java.io.IOException; @@ -20,7 +21,7 @@ public class RestClusterGetSettingsResponseTests extends AbstractXContentTestCas @Override protected RestClusterGetSettingsResponse doParseInstance(XContentParser parser) throws IOException { - return RestClusterGetSettingsResponse.fromXContent(parser); + return TestResponseParsers.parseClusterSettingsResponse(parser); } @Override diff --git a/server/src/test/java/org/elasticsearch/action/admin/indices/create/CreateIndexResponseTests.java b/server/src/test/java/org/elasticsearch/action/admin/indices/create/CreateIndexResponseTests.java index 279ba31267fd0..cee67e9efa024 100644 --- a/server/src/test/java/org/elasticsearch/action/admin/indices/create/CreateIndexResponseTests.java +++ b/server/src/test/java/org/elasticsearch/action/admin/indices/create/CreateIndexResponseTests.java @@ -12,6 +12,7 @@ import org.elasticsearch.common.Strings; import org.elasticsearch.common.io.stream.Writeable; import org.elasticsearch.test.AbstractXContentSerializingTestCase; +import org.elasticsearch.test.rest.TestResponseParsers; import org.elasticsearch.xcontent.XContentParser; public class CreateIndexResponseTests extends AbstractXContentSerializingTestCase { @@ -52,7 +53,7 @@ protected CreateIndexResponse mutateInstance(CreateIndexResponse response) { @Override protected CreateIndexResponse doParseInstance(XContentParser parser) { - return CreateIndexResponse.fromXContent(parser); + return TestResponseParsers.parseCreateIndexResponse(parser); } public void testToXContent() { diff --git a/server/src/test/java/org/elasticsearch/action/admin/indices/open/OpenIndexResponseTests.java b/server/src/test/java/org/elasticsearch/action/admin/indices/open/OpenIndexResponseTests.java index 5f0382f284e49..424be2eb20e37 100644 --- a/server/src/test/java/org/elasticsearch/action/admin/indices/open/OpenIndexResponseTests.java +++ b/server/src/test/java/org/elasticsearch/action/admin/indices/open/OpenIndexResponseTests.java @@ -11,11 +11,10 @@ import org.elasticsearch.common.io.stream.Writeable; import org.elasticsearch.test.AbstractXContentSerializingTestCase; +import org.elasticsearch.test.rest.TestResponseParsers; import org.elasticsearch.xcontent.ConstructingObjectParser; import org.elasticsearch.xcontent.XContentParser; -import static org.elasticsearch.action.support.master.ShardsAcknowledgedResponse.declareAcknowledgedAndShardsAcknowledgedFields; - public class OpenIndexResponseTests extends AbstractXContentSerializingTestCase { private static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>( @@ -25,7 +24,7 @@ public class OpenIndexResponseTests extends AbstractXContentSerializingTestCase< ); static { - declareAcknowledgedAndShardsAcknowledgedFields(PARSER); + TestResponseParsers.declareAcknowledgedAndShardsAcknowledgedFields(PARSER); } @Override diff --git a/server/src/test/java/org/elasticsearch/action/admin/indices/resolve/TransportResolveClusterActionTests.java b/server/src/test/java/org/elasticsearch/action/admin/indices/resolve/TransportResolveClusterActionTests.java index dc66bae52dc2a..2c618f19a3c75 100644 --- a/server/src/test/java/org/elasticsearch/action/admin/indices/resolve/TransportResolveClusterActionTests.java +++ b/server/src/test/java/org/elasticsearch/action/admin/indices/resolve/TransportResolveClusterActionTests.java @@ -88,9 +88,10 @@ public void writeTo(StreamOutput out) throws IOException { null ); - final var ex = asInstanceOf( + final var ex = safeAwaitFailure( IllegalArgumentException.class, - safeAwaitFailure(ResolveClusterActionResponse.class, listener -> action.doExecute(null, request, listener)) + ResolveClusterActionResponse.class, + listener -> action.doExecute(null, request, listener) ); assertThat(ex.getMessage(), containsString("not compatible with version")); diff --git a/server/src/test/java/org/elasticsearch/action/admin/indices/rollover/RolloverRequestTests.java b/server/src/test/java/org/elasticsearch/action/admin/indices/rollover/RolloverRequestTests.java index 1f0e3c52d55bb..a7fa81eb24a57 100644 --- a/server/src/test/java/org/elasticsearch/action/admin/indices/rollover/RolloverRequestTests.java +++ b/server/src/test/java/org/elasticsearch/action/admin/indices/rollover/RolloverRequestTests.java @@ -21,7 +21,6 @@ import org.elasticsearch.common.unit.ByteSizeUnit; import org.elasticsearch.common.unit.ByteSizeValue; import org.elasticsearch.common.xcontent.XContentHelper; -import org.elasticsearch.core.RestApiVersion; import org.elasticsearch.core.TimeValue; import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.indices.IndicesModule; @@ -30,9 +29,7 @@ import org.elasticsearch.xcontent.XContentBuilder; import org.elasticsearch.xcontent.XContentFactory; import org.elasticsearch.xcontent.XContentParseException; -import org.elasticsearch.xcontent.XContentParser; import org.elasticsearch.xcontent.XContentType; -import org.elasticsearch.xcontent.json.JsonXContent; import org.junit.Before; import java.io.IOException; @@ -276,72 +273,4 @@ public void testValidation() { ); } } - - public void testParsingWithType() throws Exception { - final XContentBuilder builder = XContentFactory.jsonBuilder() - .startObject() - .startObject("conditions") - .field("max_age", "10d") - .field("max_docs", 100) - .endObject() - .startObject("mappings") - .startObject("type1") - .startObject("properties") - .startObject("field1") - .field("type", "string") - .field("index", "not_analyzed") - .endObject() - .endObject() - .endObject() - .endObject() - .startObject("settings") - .field("number_of_shards", 10) - .endObject() - .startObject("aliases") - .startObject("alias1") - .endObject() - .endObject() - .endObject(); - - try ( - XContentParser parser = createParserWithCompatibilityFor( - JsonXContent.jsonXContent, - BytesReference.bytes(builder).utf8ToString(), - RestApiVersion.V_7 - ) - ) { - final RolloverRequest request = new RolloverRequest(randomAlphaOfLength(10), randomAlphaOfLength(10)); - request.fromXContent(true, parser); - Map> conditions = request.getConditions().getConditions(); - assertThat(conditions.size(), equalTo(2)); - assertThat(request.getCreateIndexRequest().mappings(), equalTo(""" - {"_doc":{"properties":{"field1":{"index":"not_analyzed","type":"string"}}}}""")); - } - } - - public void testTypedRequestWithoutIncludeTypeName() throws IOException { - final XContentBuilder builder = XContentFactory.jsonBuilder() - .startObject() - .startObject("mappings") - .startObject("_doc") - .startObject("properties") - .startObject("field1") - .field("type", "string") - .field("index", "not_analyzed") - .endObject() - .endObject() - .endObject() - .endObject() - .endObject(); - try ( - XContentParser parser = createParserWithCompatibilityFor( - JsonXContent.jsonXContent, - BytesReference.bytes(builder).utf8ToString(), - RestApiVersion.V_7 - ) - ) { - final RolloverRequest request = new RolloverRequest(randomAlphaOfLength(10), randomAlphaOfLength(10)); - expectThrows(IllegalArgumentException.class, () -> request.fromXContent(false, parser)); - } - } } diff --git a/server/src/test/java/org/elasticsearch/action/bulk/BulkOperationTests.java b/server/src/test/java/org/elasticsearch/action/bulk/BulkOperationTests.java index b87dfd07181dc..c39be42f96150 100644 --- a/server/src/test/java/org/elasticsearch/action/bulk/BulkOperationTests.java +++ b/server/src/test/java/org/elasticsearch/action/bulk/BulkOperationTests.java @@ -32,6 +32,7 @@ import org.elasticsearch.cluster.coordination.NoMasterBlockService; import org.elasticsearch.cluster.metadata.ComposableIndexTemplate; import org.elasticsearch.cluster.metadata.DataStream; +import org.elasticsearch.cluster.metadata.DataStreamOptions; import org.elasticsearch.cluster.metadata.DataStreamTestHelper; import org.elasticsearch.cluster.metadata.IndexMetadata; import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver; @@ -130,7 +131,7 @@ public class BulkOperationTests extends ESTestCase { ); private final DataStream dataStream3 = DataStream.builder(fsRolloverDataStreamName, List.of(ds3BackingIndex1.getIndex())) .setGeneration(1) - .setFailureStoreEnabled(true) + .setDataStreamOptions(DataStreamOptions.FAILURE_STORE_ENABLED) .setFailureIndices( DataStream.DataStreamIndices.failureIndicesBuilder(List.of(ds3FailureStore1.getIndex())).setRolloverOnWrite(true).build() ) diff --git a/server/src/test/java/org/elasticsearch/action/fieldcaps/TransportFieldCapabilitiesActionTests.java b/server/src/test/java/org/elasticsearch/action/fieldcaps/TransportFieldCapabilitiesActionTests.java index 0c07896986022..3031f17cccc62 100644 --- a/server/src/test/java/org/elasticsearch/action/fieldcaps/TransportFieldCapabilitiesActionTests.java +++ b/server/src/test/java/org/elasticsearch/action/fieldcaps/TransportFieldCapabilitiesActionTests.java @@ -87,9 +87,10 @@ protected void doWriteTo(StreamOutput out) throws IOException { null ); - IllegalArgumentException ex = asInstanceOf( + IllegalArgumentException ex = safeAwaitFailure( IllegalArgumentException.class, - safeAwaitFailure(FieldCapabilitiesResponse.class, l -> action.doExecute(null, fieldCapsRequest, l)) + FieldCapabilitiesResponse.class, + l -> action.doExecute(null, fieldCapsRequest, l) ); assertThat( diff --git a/server/src/test/java/org/elasticsearch/action/search/MultiSearchRequestTests.java b/server/src/test/java/org/elasticsearch/action/search/MultiSearchRequestTests.java index 8211fc8dfa4c6..f2bc561792991 100644 --- a/server/src/test/java/org/elasticsearch/action/search/MultiSearchRequestTests.java +++ b/server/src/test/java/org/elasticsearch/action/search/MultiSearchRequestTests.java @@ -471,52 +471,6 @@ public void testWritingExpandWildcards() throws IOException { ); } - public void testEmptyFirstLine1() throws Exception { - MultiSearchRequest request = parseMultiSearchRequestFromString(""" - - - { "query": {"match_all": {}}} - {} - { "query": {"match_all": {}}} - - { "query": {"match_all": {}}} - {} - { "query": {"match_all": {}}} - """, RestApiVersion.V_7); - assertThat(request.requests().size(), equalTo(4)); - for (SearchRequest searchRequest : request.requests()) { - assertThat(searchRequest.indices().length, equalTo(0)); - assertThat(searchRequest.source().query(), instanceOf(MatchAllQueryBuilder.class)); - } - assertCriticalWarnings( - "support for empty first line before any action metadata in msearch API is deprecated and will be removed " - + "in the next major version" - ); - } - - public void testEmptyFirstLine2() throws Exception { - MultiSearchRequest request = parseMultiSearchRequestFromString(""" - - {} - { "query": {"match_all": {}}} - - { "query": {"match_all": {}}} - {} - { "query": {"match_all": {}}} - - { "query": {"match_all": {}}} - """, RestApiVersion.V_7); - assertThat(request.requests().size(), equalTo(4)); - for (SearchRequest searchRequest : request.requests()) { - assertThat(searchRequest.indices().length, equalTo(0)); - assertThat(searchRequest.source().query(), instanceOf(MatchAllQueryBuilder.class)); - } - assertCriticalWarnings( - "support for empty first line before any action metadata in msearch API is deprecated and will be removed " - + "in the next major version" - ); - } - public void testTaskDescription() { MultiSearchRequest request = new MultiSearchRequest(); request.add(new SearchRequest().preference("abc")); diff --git a/server/src/test/java/org/elasticsearch/action/search/ShardSearchFailureTests.java b/server/src/test/java/org/elasticsearch/action/search/ShardSearchFailureTests.java index 9d40911059b87..87f5df26d2b5d 100644 --- a/server/src/test/java/org/elasticsearch/action/search/ShardSearchFailureTests.java +++ b/server/src/test/java/org/elasticsearch/action/search/ShardSearchFailureTests.java @@ -16,6 +16,7 @@ import org.elasticsearch.common.xcontent.XContentHelper; import org.elasticsearch.index.Index; import org.elasticsearch.index.shard.ShardId; +import org.elasticsearch.search.SearchResponseUtils; import org.elasticsearch.search.SearchShardTarget; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.test.TransportVersionUtils; @@ -70,7 +71,7 @@ private void doFromXContentTestWithRandomFields(boolean addRandomFields) throws ShardSearchFailure parsed; try (XContentParser parser = createParser(xContentType.xContent(), mutated)) { assertEquals(XContentParser.Token.START_OBJECT, parser.nextToken()); - parsed = ShardSearchFailure.fromXContent(parser); + parsed = SearchResponseUtils.parseShardSearchFailure(parser); assertEquals(XContentParser.Token.END_OBJECT, parser.currentToken()); assertNull(parser.nextToken()); } diff --git a/server/src/test/java/org/elasticsearch/action/support/broadcast/node/TransportBroadcastByNodeActionTests.java b/server/src/test/java/org/elasticsearch/action/support/broadcast/node/TransportBroadcastByNodeActionTests.java index cd568ab1571f5..e0cd8d8390c74 100644 --- a/server/src/test/java/org/elasticsearch/action/support/broadcast/node/TransportBroadcastByNodeActionTests.java +++ b/server/src/test/java/org/elasticsearch/action/support/broadcast/node/TransportBroadcastByNodeActionTests.java @@ -344,10 +344,8 @@ public void testGlobalBlock() { assertEquals( "blocked by: [SERVICE_UNAVAILABLE/1/test-block];", - asInstanceOf( - ClusterBlockException.class, - safeAwaitFailure(Response.class, listener -> action.doExecute(null, request, listener)) - ).getMessage() + safeAwaitFailure(ClusterBlockException.class, Response.class, listener -> action.doExecute(null, request, listener)) + .getMessage() ); } @@ -362,10 +360,8 @@ public void testRequestBlock() { setState(clusterService, ClusterState.builder(clusterService.state()).blocks(block)); assertEquals( "index [" + TEST_INDEX + "] blocked by: [SERVICE_UNAVAILABLE/1/test-block];", - asInstanceOf( - ClusterBlockException.class, - safeAwaitFailure(Response.class, listener -> action.doExecute(null, request, listener)) - ).getMessage() + safeAwaitFailure(ClusterBlockException.class, Response.class, listener -> action.doExecute(null, request, listener)) + .getMessage() ); } diff --git a/server/src/test/java/org/elasticsearch/bootstrap/BootstrapChecksTests.java b/server/src/test/java/org/elasticsearch/bootstrap/BootstrapChecksTests.java index 9a51757189f8b..8c3749dbd3a45 100644 --- a/server/src/test/java/org/elasticsearch/bootstrap/BootstrapChecksTests.java +++ b/server/src/test/java/org/elasticsearch/bootstrap/BootstrapChecksTests.java @@ -389,8 +389,8 @@ public void testMaxFileSizeCheck() throws NodeValidationException { final AtomicLong maxFileSize = new AtomicLong(randomIntBetween(0, Integer.MAX_VALUE)); final BootstrapChecks.MaxFileSizeCheck check = new BootstrapChecks.MaxFileSizeCheck() { @Override - long getMaxFileSize() { - return maxFileSize.get(); + protected ProcessLimits getProcessLimits() { + return new ProcessLimits(ProcessLimits.UNKNOWN, ProcessLimits.UNKNOWN, maxFileSize.get()); } }; diff --git a/server/src/test/java/org/elasticsearch/client/internal/ParentTaskAssigningClientTests.java b/server/src/test/java/org/elasticsearch/client/internal/ParentTaskAssigningClientTests.java index b067509f4668c..7736ae63724f6 100644 --- a/server/src/test/java/org/elasticsearch/client/internal/ParentTaskAssigningClientTests.java +++ b/server/src/test/java/org/elasticsearch/client/internal/ParentTaskAssigningClientTests.java @@ -114,31 +114,27 @@ public void getConnection( ); assertEquals( "fake remote-cluster client", - asInstanceOf( + safeAwaitFailure( UnsupportedOperationException.class, - safeAwaitFailure( - ClusterStateResponse.class, - listener -> remoteClusterClient.execute( - ClusterStateAction.REMOTE_TYPE, - new ClusterStateRequest(TEST_REQUEST_TIMEOUT), - listener - ) + ClusterStateResponse.class, + listener -> remoteClusterClient.execute( + ClusterStateAction.REMOTE_TYPE, + new ClusterStateRequest(TEST_REQUEST_TIMEOUT), + listener ) ).getMessage() ); assertEquals( "fake remote-cluster client", - asInstanceOf( + safeAwaitFailure( UnsupportedOperationException.class, - safeAwaitFailure( - ClusterStateResponse.class, - listener -> remoteClusterClient.execute( - null, - ClusterStateAction.REMOTE_TYPE, - new ClusterStateRequest(TEST_REQUEST_TIMEOUT), - listener - ) + ClusterStateResponse.class, + listener -> remoteClusterClient.execute( + null, + ClusterStateAction.REMOTE_TYPE, + new ClusterStateRequest(TEST_REQUEST_TIMEOUT), + listener ) ).getMessage() ); diff --git a/server/src/test/java/org/elasticsearch/cluster/metadata/DataStreamFailureStoreTests.java b/server/src/test/java/org/elasticsearch/cluster/metadata/DataStreamFailureStoreTests.java index 4a9f13170f694..ffd703048dbd3 100644 --- a/server/src/test/java/org/elasticsearch/cluster/metadata/DataStreamFailureStoreTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/metadata/DataStreamFailureStoreTests.java @@ -15,6 +15,8 @@ import java.io.IOException; +import static org.hamcrest.Matchers.containsString; + public class DataStreamFailureStoreTests extends AbstractXContentSerializingTestCase { @Override @@ -40,4 +42,9 @@ protected DataStreamFailureStore doParseInstance(XContentParser parser) throws I static DataStreamFailureStore randomFailureStore() { return new DataStreamFailureStore(randomBoolean()); } + + public void testInvalidEmptyConfiguration() { + IllegalArgumentException exception = expectThrows(IllegalArgumentException.class, () -> new DataStreamFailureStore((Boolean) null)); + assertThat(exception.getMessage(), containsString("at least one non-null configuration value")); + } } diff --git a/server/src/test/java/org/elasticsearch/cluster/metadata/DataStreamOptionsTests.java b/server/src/test/java/org/elasticsearch/cluster/metadata/DataStreamOptionsTests.java index 764c02d7fcec6..020955d226a0f 100644 --- a/server/src/test/java/org/elasticsearch/cluster/metadata/DataStreamOptionsTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/metadata/DataStreamOptionsTests.java @@ -29,11 +29,11 @@ protected DataStreamOptions createTestInstance() { @Override protected DataStreamOptions mutateInstance(DataStreamOptions instance) throws IOException { - var failureStore = instance.getFailureStore(); + var failureStore = instance.failureStore(); if (failureStore == null) { failureStore = DataStreamFailureStoreTests.randomFailureStore(); } else { - failureStore = randomBoolean() ? null : new DataStreamFailureStore(failureStore.enabled() == false); + failureStore = randomBoolean() ? null : randomValueOtherThan(failureStore, DataStreamFailureStoreTests::randomFailureStore); } return new DataStreamOptions(failureStore); } diff --git a/server/src/test/java/org/elasticsearch/cluster/metadata/DataStreamTests.java b/server/src/test/java/org/elasticsearch/cluster/metadata/DataStreamTests.java index 76573049c6962..cd9113ee551c7 100644 --- a/server/src/test/java/org/elasticsearch/cluster/metadata/DataStreamTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/metadata/DataStreamTests.java @@ -94,13 +94,13 @@ protected DataStream mutateInstance(DataStream instance) { var allowsCustomRouting = instance.isAllowCustomRouting(); var indexMode = instance.getIndexMode(); var lifecycle = instance.getLifecycle(); - var failureStore = instance.isFailureStoreEnabled(); + var dataStreamOptions = instance.getDataStreamOptions(); var failureIndices = instance.getFailureIndices().getIndices(); var rolloverOnWrite = instance.rolloverOnWrite(); var autoShardingEvent = instance.getAutoShardingEvent(); var failureRolloverOnWrite = instance.getFailureIndices().isRolloverOnWrite(); var failureAutoShardingEvent = instance.getBackingIndices().getAutoShardingEvent(); - switch (between(0, 14)) { + switch (between(0, 15)) { case 0 -> name = randomAlphaOfLength(10); case 1 -> indices = randomNonEmptyIndexInstances(); case 2 -> generation = instance.getGeneration() + randomIntBetween(1, 10); @@ -134,23 +134,23 @@ protected DataStream mutateInstance(DataStream instance) { case 9 -> lifecycle = randomBoolean() && lifecycle != null ? null : DataStreamLifecycle.newBuilder().dataRetention(randomMillisUpToYear9999()).build(); - case 10 -> { - failureIndices = randomValueOtherThan(failureIndices, DataStreamTestHelper::randomIndexInstances); - failureStore = failureIndices.isEmpty() == false; - } - case 11 -> { + case 10 -> failureIndices = randomValueOtherThan(failureIndices, DataStreamTestHelper::randomIndexInstances); + case 11 -> dataStreamOptions = dataStreamOptions.isEmpty() ? new DataStreamOptions(new DataStreamFailureStore(randomBoolean())) + : randomBoolean() ? (randomBoolean() ? null : DataStreamOptions.EMPTY) + : new DataStreamOptions(new DataStreamFailureStore(dataStreamOptions.failureStore().enabled() == false)); + case 12 -> { rolloverOnWrite = rolloverOnWrite == false; isReplicated = rolloverOnWrite == false && isReplicated; } - case 12 -> { + case 13 -> { if (randomBoolean() || autoShardingEvent == null) { // If we're mutating the auto sharding event of the failure store, we need to ensure there's at least one failure index. if (failureIndices.isEmpty()) { failureIndices = DataStreamTestHelper.randomNonEmptyIndexInstances(); - failureStore = true; + dataStreamOptions = DataStreamOptions.FAILURE_STORE_ENABLED; } autoShardingEvent = new DataStreamAutoShardingEvent( - failureIndices.get(failureIndices.size() - 1).getName(), + failureIndices.getLast().getName(), randomIntBetween(1, 10), randomMillisUpToYear9999() ); @@ -158,19 +158,13 @@ protected DataStream mutateInstance(DataStream instance) { autoShardingEvent = null; } } - case 13 -> { + case 14 -> { failureRolloverOnWrite = failureRolloverOnWrite == false; isReplicated = failureRolloverOnWrite == false && isReplicated; } - case 14 -> { - failureAutoShardingEvent = randomBoolean() && failureAutoShardingEvent != null - ? null - : new DataStreamAutoShardingEvent( - indices.get(indices.size() - 1).getName(), - randomIntBetween(1, 10), - randomMillisUpToYear9999() - ); - } + case 15 -> failureAutoShardingEvent = randomBoolean() && failureAutoShardingEvent != null + ? null + : new DataStreamAutoShardingEvent(indices.getLast().getName(), randomIntBetween(1, 10), randomMillisUpToYear9999()); } return new DataStream( @@ -184,7 +178,7 @@ protected DataStream mutateInstance(DataStream instance) { allowsCustomRouting, indexMode, lifecycle, - failureStore, + dataStreamOptions, new DataStream.DataStreamIndices(DataStream.BACKING_INDEX_PREFIX, indices, rolloverOnWrite, autoShardingEvent), new DataStream.DataStreamIndices( DataStream.BACKING_INDEX_PREFIX, @@ -198,7 +192,7 @@ protected DataStream mutateInstance(DataStream instance) { public void testRollover() { DataStream ds = DataStreamTestHelper.randomInstance().promoteDataStream(); Tuple newCoordinates = ds.nextWriteIndexAndGeneration(Metadata.EMPTY_METADATA, ds.getBackingIndices()); - final DataStream rolledDs = ds.rollover(new Index(newCoordinates.v1(), UUIDs.randomBase64UUID()), newCoordinates.v2(), false, null); + final DataStream rolledDs = ds.rollover(new Index(newCoordinates.v1(), UUIDs.randomBase64UUID()), newCoordinates.v2(), null, null); assertThat(rolledDs.getName(), equalTo(ds.getName())); assertThat(rolledDs.getGeneration(), equalTo(ds.getGeneration() + 1)); assertThat(rolledDs.getIndices().size(), equalTo(ds.getIndices().size() + 1)); @@ -225,7 +219,7 @@ public void testRolloverWithConflictingBackingIndexName() { } final Tuple newCoordinates = ds.nextWriteIndexAndGeneration(builder.build(), ds.getBackingIndices()); - final DataStream rolledDs = ds.rollover(new Index(newCoordinates.v1(), UUIDs.randomBase64UUID()), newCoordinates.v2(), false, null); + final DataStream rolledDs = ds.rollover(new Index(newCoordinates.v1(), UUIDs.randomBase64UUID()), newCoordinates.v2(), null, null); assertThat(rolledDs.getName(), equalTo(ds.getName())); assertThat(rolledDs.getGeneration(), equalTo(ds.getGeneration() + numConflictingIndices + 1)); assertThat(rolledDs.getIndices().size(), equalTo(ds.getIndices().size() + 1)); @@ -242,7 +236,12 @@ public void testRolloverUpgradeToTsdbDataStream() { .build(); var newCoordinates = ds.nextWriteIndexAndGeneration(Metadata.EMPTY_METADATA, ds.getBackingIndices()); - var rolledDs = ds.rollover(new Index(newCoordinates.v1(), UUIDs.randomBase64UUID()), newCoordinates.v2(), true, null); + var rolledDs = ds.rollover( + new Index(newCoordinates.v1(), UUIDs.randomBase64UUID()), + newCoordinates.v2(), + IndexMode.TIME_SERIES, + null + ); assertThat(rolledDs.getName(), equalTo(ds.getName())); assertThat(rolledDs.getGeneration(), equalTo(ds.getGeneration() + 1)); assertThat(rolledDs.getIndices().size(), equalTo(ds.getIndices().size() + 1)); @@ -251,11 +250,41 @@ public void testRolloverUpgradeToTsdbDataStream() { assertThat(rolledDs.getIndexMode(), equalTo(IndexMode.TIME_SERIES)); } - public void testRolloverDowngradeToRegularDataStream() { + public void testRolloverUpgradeToLogsdbDataStream() { + DataStream ds = DataStreamTestHelper.randomInstance() + .copy() + .setReplicated(false) + .setIndexMode(randomBoolean() ? IndexMode.STANDARD : null) + .build(); + var newCoordinates = ds.nextWriteIndexAndGeneration(Metadata.EMPTY_METADATA, ds.getBackingIndices()); + + var rolledDs = ds.rollover(new Index(newCoordinates.v1(), UUIDs.randomBase64UUID()), newCoordinates.v2(), IndexMode.LOGSDB, null); + assertThat(rolledDs.getName(), equalTo(ds.getName())); + assertThat(rolledDs.getGeneration(), equalTo(ds.getGeneration() + 1)); + assertThat(rolledDs.getIndices().size(), equalTo(ds.getIndices().size() + 1)); + assertTrue(rolledDs.getIndices().containsAll(ds.getIndices())); + assertTrue(rolledDs.getIndices().contains(rolledDs.getWriteIndex())); + assertThat(rolledDs.getIndexMode(), equalTo(IndexMode.LOGSDB)); + } + + public void testRolloverDowngradeFromTsdbToRegularDataStream() { DataStream ds = DataStreamTestHelper.randomInstance().copy().setReplicated(false).setIndexMode(IndexMode.TIME_SERIES).build(); var newCoordinates = ds.nextWriteIndexAndGeneration(Metadata.EMPTY_METADATA, ds.getBackingIndices()); - var rolledDs = ds.rollover(new Index(newCoordinates.v1(), UUIDs.randomBase64UUID()), newCoordinates.v2(), false, null); + var rolledDs = ds.rollover(new Index(newCoordinates.v1(), UUIDs.randomBase64UUID()), newCoordinates.v2(), null, null); + assertThat(rolledDs.getName(), equalTo(ds.getName())); + assertThat(rolledDs.getGeneration(), equalTo(ds.getGeneration() + 1)); + assertThat(rolledDs.getIndices().size(), equalTo(ds.getIndices().size() + 1)); + assertTrue(rolledDs.getIndices().containsAll(ds.getIndices())); + assertTrue(rolledDs.getIndices().contains(rolledDs.getWriteIndex())); + assertThat(rolledDs.getIndexMode(), nullValue()); + } + + public void testRolloverDowngradeFromLogsdbToRegularDataStream() { + DataStream ds = DataStreamTestHelper.randomInstance().copy().setReplicated(false).setIndexMode(IndexMode.LOGSDB).build(); + var newCoordinates = ds.nextWriteIndexAndGeneration(Metadata.EMPTY_METADATA, ds.getBackingIndices()); + + var rolledDs = ds.rollover(new Index(newCoordinates.v1(), UUIDs.randomBase64UUID()), newCoordinates.v2(), null, null); assertThat(rolledDs.getName(), equalTo(ds.getName())); assertThat(rolledDs.getGeneration(), equalTo(ds.getGeneration() + 1)); assertThat(rolledDs.getIndices().size(), equalTo(ds.getIndices().size() + 1)); @@ -1879,7 +1908,7 @@ public void testXContentSerializationWithRolloverAndEffectiveRetention() throws randomBoolean(), randomBoolean() ? IndexMode.STANDARD : null, // IndexMode.TIME_SERIES triggers validation that many unit tests doesn't pass lifecycle, - failureStore, + new DataStreamOptions(new DataStreamFailureStore(failureStore)), failureIndices, false, null @@ -2067,7 +2096,7 @@ public void testWriteFailureIndex() { randomBoolean(), randomBoolean() ? IndexMode.STANDARD : IndexMode.TIME_SERIES, DataStreamLifecycleTests.randomLifecycle(), - false, + DataStreamOptions.FAILURE_STORE_DISABLED, List.of(), replicated == false && randomBoolean(), null @@ -2085,7 +2114,7 @@ public void testWriteFailureIndex() { randomBoolean(), randomBoolean() ? IndexMode.STANDARD : IndexMode.TIME_SERIES, DataStreamLifecycleTests.randomLifecycle(), - true, + DataStreamOptions.FAILURE_STORE_ENABLED, List.of(), replicated == false && randomBoolean(), null @@ -2110,7 +2139,7 @@ public void testWriteFailureIndex() { randomBoolean(), randomBoolean() ? IndexMode.STANDARD : IndexMode.TIME_SERIES, DataStreamLifecycleTests.randomLifecycle(), - true, + DataStreamOptions.FAILURE_STORE_ENABLED, failureIndices, replicated == false && randomBoolean(), null @@ -2134,7 +2163,7 @@ public void testIsFailureIndex() { randomBoolean(), randomBoolean() ? IndexMode.STANDARD : IndexMode.TIME_SERIES, DataStreamLifecycleTests.randomLifecycle(), - false, + DataStreamOptions.FAILURE_STORE_DISABLED, List.of(), replicated == false && randomBoolean(), null @@ -2156,7 +2185,7 @@ public void testIsFailureIndex() { randomBoolean(), randomBoolean() ? IndexMode.STANDARD : IndexMode.TIME_SERIES, DataStreamLifecycleTests.randomLifecycle(), - true, + DataStreamOptions.FAILURE_STORE_ENABLED, List.of(), replicated == false && randomBoolean(), null @@ -2187,7 +2216,7 @@ public void testIsFailureIndex() { randomBoolean(), randomBoolean() ? IndexMode.STANDARD : IndexMode.TIME_SERIES, DataStreamLifecycleTests.randomLifecycle(), - true, + DataStreamOptions.FAILURE_STORE_ENABLED, failureIndices, replicated == false && randomBoolean(), null diff --git a/server/src/test/java/org/elasticsearch/cluster/metadata/MetadataCreateDataStreamServiceTests.java b/server/src/test/java/org/elasticsearch/cluster/metadata/MetadataCreateDataStreamServiceTests.java index bbcf1ca33a0c2..6c76abf7cebe3 100644 --- a/server/src/test/java/org/elasticsearch/cluster/metadata/MetadataCreateDataStreamServiceTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/metadata/MetadataCreateDataStreamServiceTests.java @@ -19,6 +19,7 @@ import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.util.Maps; import org.elasticsearch.core.TimeValue; +import org.elasticsearch.index.IndexMode; import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.IndexVersion; import org.elasticsearch.indices.ExecutorNames; @@ -77,6 +78,43 @@ public void testCreateDataStream() throws Exception { assertThat(newState.metadata().dataStreams().get(dataStreamName).isHidden(), is(false)); assertThat(newState.metadata().dataStreams().get(dataStreamName).isReplicated(), is(false)); assertThat(newState.metadata().dataStreams().get(dataStreamName).getLifecycle(), equalTo(DataStreamLifecycle.DEFAULT)); + assertThat(newState.metadata().dataStreams().get(dataStreamName).getIndexMode(), nullValue()); + assertThat(newState.metadata().index(DataStream.getDefaultBackingIndexName(dataStreamName, 1)), notNullValue()); + assertThat( + newState.metadata().index(DataStream.getDefaultBackingIndexName(dataStreamName, 1)).getSettings().get("index.hidden"), + equalTo("true") + ); + assertThat(newState.metadata().index(DataStream.getDefaultBackingIndexName(dataStreamName, 1)).isSystem(), is(false)); + } + + public void testCreateDataStreamLogsdb() throws Exception { + final MetadataCreateIndexService metadataCreateIndexService = getMetadataCreateIndexService(); + final String dataStreamName = "my-data-stream"; + ComposableIndexTemplate template = ComposableIndexTemplate.builder() + .indexPatterns(List.of(dataStreamName + "*")) + .template(new Template(Settings.builder().put("index.mode", "logsdb").build(), null, null)) + .dataStreamTemplate(new DataStreamTemplate()) + .build(); + ClusterState cs = ClusterState.builder(new ClusterName("_name")) + .metadata(Metadata.builder().put("template", template).build()) + .build(); + CreateDataStreamClusterStateUpdateRequest req = new CreateDataStreamClusterStateUpdateRequest(dataStreamName); + ClusterState newState = MetadataCreateDataStreamService.createDataStream( + metadataCreateIndexService, + Settings.EMPTY, + cs, + true, + req, + ActionListener.noop(), + false + ); + assertThat(newState.metadata().dataStreams().size(), equalTo(1)); + assertThat(newState.metadata().dataStreams().get(dataStreamName).getName(), equalTo(dataStreamName)); + assertThat(newState.metadata().dataStreams().get(dataStreamName).isSystem(), is(false)); + assertThat(newState.metadata().dataStreams().get(dataStreamName).isHidden(), is(false)); + assertThat(newState.metadata().dataStreams().get(dataStreamName).isReplicated(), is(false)); + assertThat(newState.metadata().dataStreams().get(dataStreamName).getIndexMode(), equalTo(IndexMode.LOGSDB)); + assertThat(newState.metadata().dataStreams().get(dataStreamName).getLifecycle(), equalTo(DataStreamLifecycle.DEFAULT)); assertThat(newState.metadata().index(DataStream.getDefaultBackingIndexName(dataStreamName, 1)), notNullValue()); assertThat( newState.metadata().index(DataStream.getDefaultBackingIndexName(dataStreamName, 1)).getSettings().get("index.hidden"), diff --git a/server/src/test/java/org/elasticsearch/cluster/metadata/MetadataTests.java b/server/src/test/java/org/elasticsearch/cluster/metadata/MetadataTests.java index 7d329432cf9f8..284bb870652c0 100644 --- a/server/src/test/java/org/elasticsearch/cluster/metadata/MetadataTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/metadata/MetadataTests.java @@ -112,6 +112,14 @@ public void testFindAliases() { .putAlias(AliasMetadata.builder("alias1").build()) .putAlias(AliasMetadata.builder("alias2").build()) ) + .put( + IndexMetadata.builder("index2") + .settings(Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, IndexVersion.current())) + .numberOfShards(1) + .numberOfReplicas(0) + .putAlias(AliasMetadata.builder("alias2").build()) + .putAlias(AliasMetadata.builder("alias3").build()) + ) .build(); { @@ -135,10 +143,12 @@ public void testFindAliases() { } { GetAliasesRequest request = new GetAliasesRequest("alias*"); - Map> aliases = metadata.findAliases(request.aliases(), new String[] { "index" }); - assertThat(aliases, aMapWithSize(1)); - List aliasMetadataList = aliases.get("index"); - assertThat(aliasMetadataList, transformedItemsMatch(AliasMetadata::alias, contains("alias1", "alias2"))); + Map> aliases = metadata.findAliases(request.aliases(), new String[] { "index", "index2" }); + assertThat(aliases, aMapWithSize(2)); + List indexAliasMetadataList = aliases.get("index"); + assertThat(indexAliasMetadataList, transformedItemsMatch(AliasMetadata::alias, contains("alias1", "alias2"))); + List index2AliasMetadataList = aliases.get("index2"); + assertThat(index2AliasMetadataList, transformedItemsMatch(AliasMetadata::alias, contains("alias2", "alias3"))); } { GetAliasesRequest request = new GetAliasesRequest("alias1"); diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceReconcilerTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceReconcilerTests.java index 1f80160c92ffd..1ae73c9c08137 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceReconcilerTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceReconcilerTests.java @@ -1258,8 +1258,8 @@ public void testShouldLogOnTooManyUndesiredAllocations() { final int shardCount = 5; - final var dataNode1Assignments = Maps.newMapWithExpectedSize(shardCount); - final var dataNode2Assignments = Maps.newMapWithExpectedSize(shardCount); + final var allShardsDesiredOnDataNode1 = Maps.newMapWithExpectedSize(shardCount); + final var allShardsDesiredOnDataNode2 = Maps.newMapWithExpectedSize(shardCount); final var metadataBuilder = Metadata.builder(); final var routingTableBuilder = RoutingTable.builder(); @@ -1270,10 +1270,23 @@ public void testShouldLogOnTooManyUndesiredAllocations() { metadataBuilder.put(indexMetadata, false); routingTableBuilder.add(IndexRoutingTable.builder(index).addShard(newShardRouting(shardId, "data-node-1", true, STARTED))); - dataNode1Assignments.put(shardId, new ShardAssignment(Set.of("data-node-1"), 1, 0, 0)); - dataNode2Assignments.put(shardId, new ShardAssignment(Set.of("data-node-2"), 1, 0, 0)); + allShardsDesiredOnDataNode1.put(shardId, new ShardAssignment(Set.of("data-node-1"), 1, 0, 0)); + allShardsDesiredOnDataNode2.put(shardId, new ShardAssignment(Set.of("data-node-2"), 1, 0, 0)); } + final var node1ShuttingDown = randomBoolean(); + if (node1ShuttingDown) { + var type = randomFrom(SingleNodeShutdownMetadata.Type.SIGTERM, SingleNodeShutdownMetadata.Type.REMOVE); + var builder = SingleNodeShutdownMetadata.builder() + .setType(type) + .setNodeId("data-node-1") + .setStartedAtMillis(randomNonNegativeLong()) + .setReason("test"); + if (type.equals(SingleNodeShutdownMetadata.Type.SIGTERM)) { + builder.setGracePeriod(randomPositiveTimeValue()); + } + metadataBuilder.putCustom(NodesShutdownMetadata.TYPE, new NodesShutdownMetadata(Map.of("data-node-1", builder.build()))); + } final var clusterState = ClusterState.builder(ClusterName.DEFAULT) .nodes(DiscoveryNodes.builder().add(newNode("data-node-1")).add(newNode("data-node-2"))) .metadata(metadataBuilder) @@ -1293,8 +1306,9 @@ public void testShouldLogOnTooManyUndesiredAllocations() { + "/" + shardCount + ") are not on their desired nodes, which exceeds the warn threshold of [10%]"; + // Desired assignment matches current routing table assertThatLogger( - () -> reconciler.reconcile(new DesiredBalance(1, dataNode1Assignments), createRoutingAllocationFrom(clusterState)), + () -> reconciler.reconcile(new DesiredBalance(1, allShardsDesiredOnDataNode1), createRoutingAllocationFrom(clusterState)), DesiredBalanceReconciler.class, new MockLog.UnseenEventExpectation( "Should not log if all shards on desired location", @@ -1304,17 +1318,24 @@ public void testShouldLogOnTooManyUndesiredAllocations() { ) ); assertThatLogger( - () -> reconciler.reconcile(new DesiredBalance(1, dataNode2Assignments), createRoutingAllocationFrom(clusterState)), + () -> reconciler.reconcile(new DesiredBalance(1, allShardsDesiredOnDataNode2), createRoutingAllocationFrom(clusterState)), DesiredBalanceReconciler.class, - new MockLog.SeenEventExpectation( - "Should log first too many shards on undesired locations", - DesiredBalanceReconciler.class.getCanonicalName(), - Level.WARN, - expectedWarningMessage - ) + node1ShuttingDown + ? new MockLog.UnseenEventExpectation( + "Should not log first too many shards on undesired locations", + DesiredBalanceReconciler.class.getCanonicalName(), + Level.WARN, + expectedWarningMessage + ) + : new MockLog.SeenEventExpectation( + "Should log first too many shards on undesired locations", + DesiredBalanceReconciler.class.getCanonicalName(), + Level.WARN, + expectedWarningMessage + ) ); assertThatLogger( - () -> reconciler.reconcile(new DesiredBalance(1, dataNode2Assignments), createRoutingAllocationFrom(clusterState)), + () -> reconciler.reconcile(new DesiredBalance(1, allShardsDesiredOnDataNode2), createRoutingAllocationFrom(clusterState)), DesiredBalanceReconciler.class, new MockLog.UnseenEventExpectation( "Should not log immediate second too many shards on undesired locations", diff --git a/server/src/test/java/org/elasticsearch/http/AbstractHttpServerTransportTests.java b/server/src/test/java/org/elasticsearch/http/AbstractHttpServerTransportTests.java index 981eae9d60694..77133516f37d5 100644 --- a/server/src/test/java/org/elasticsearch/http/AbstractHttpServerTransportTests.java +++ b/server/src/test/java/org/elasticsearch/http/AbstractHttpServerTransportTests.java @@ -420,15 +420,15 @@ protected void populatePerRequestThreadContext(RestRequest restRequest, ThreadCo } public void testHandlingCompatibleVersionParsingErrors() { - // a compatible version exception (v7 on accept and v8 on content-type) should be handled gracefully + // a compatible version exception (v8 on accept and v9 on content-type) should be handled gracefully final ClusterSettings clusterSettings = new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); try ( AbstractHttpServerTransport transport = failureAssertingtHttpServerTransport(clusterSettings, Set.of("Accept", "Content-Type")) ) { Map> headers = new HashMap<>(); - headers.put("Accept", Collections.singletonList("aaa/bbb;compatible-with=7")); - headers.put("Content-Type", Collections.singletonList("aaa/bbb;compatible-with=8")); + headers.put("Accept", Collections.singletonList("aaa/bbb;compatible-with=8")); + headers.put("Content-Type", Collections.singletonList("aaa/bbb;compatible-with=9")); FakeRestRequest.FakeHttpRequest fakeHttpRequest = new FakeRestRequest.FakeHttpRequest( RestRequest.Method.GET, diff --git a/server/src/test/java/org/elasticsearch/index/analysis/CustomNormalizerTests.java b/server/src/test/java/org/elasticsearch/index/analysis/CustomNormalizerTests.java index d2aa11f9f3866..13c58fed1c5ad 100644 --- a/server/src/test/java/org/elasticsearch/index/analysis/CustomNormalizerTests.java +++ b/server/src/test/java/org/elasticsearch/index/analysis/CustomNormalizerTests.java @@ -27,6 +27,7 @@ import static java.util.Collections.singletonList; import static java.util.Collections.singletonMap; +import static org.apache.lucene.tests.analysis.BaseTokenStreamTestCase.assertTokenStreamContents; public class CustomNormalizerTests extends ESTokenStreamTestCase { private static final AnalysisPlugin MOCK_ANALYSIS_PLUGIN = new MockAnalysisPlugin(); diff --git a/server/src/test/java/org/elasticsearch/index/analysis/ShingleTokenFilterFactoryTests.java b/server/src/test/java/org/elasticsearch/index/analysis/ShingleTokenFilterFactoryTests.java index 3cfdbdcdf37da..0aa7652e5a5f6 100644 --- a/server/src/test/java/org/elasticsearch/index/analysis/ShingleTokenFilterFactoryTests.java +++ b/server/src/test/java/org/elasticsearch/index/analysis/ShingleTokenFilterFactoryTests.java @@ -24,6 +24,7 @@ import java.io.IOException; import java.io.StringReader; +import static org.apache.lucene.tests.analysis.BaseTokenStreamTestCase.assertTokenStreamContents; import static org.hamcrest.Matchers.instanceOf; @ThreadLeakScope(Scope.NONE) diff --git a/server/src/test/java/org/elasticsearch/index/analysis/StopTokenFilterTests.java b/server/src/test/java/org/elasticsearch/index/analysis/StopTokenFilterTests.java index b02e05ae704ef..fc0e6cfab7a37 100644 --- a/server/src/test/java/org/elasticsearch/index/analysis/StopTokenFilterTests.java +++ b/server/src/test/java/org/elasticsearch/index/analysis/StopTokenFilterTests.java @@ -29,11 +29,13 @@ public class StopTokenFilterTests extends ESTokenStreamTestCase { public void testPositionIncrementSetting() throws IOException { + boolean versionSet = false; Builder builder = Settings.builder() .put("index.analysis.filter.my_stop.type", "stop") .put("index.analysis.filter.my_stop.enable_position_increments", false); if (random().nextBoolean()) { builder.put("index.analysis.filter.my_stop.version", "5.0"); + versionSet = true; } builder.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()); Settings settings = builder.build(); @@ -43,14 +45,17 @@ public void testPositionIncrementSetting() throws IOException { } catch (IllegalArgumentException e) { assertThat(e.getMessage(), containsString("enable_position_increments is not supported anymore")); } + if (versionSet) { + assertWarnings("Setting [version] on analysis component [my_stop] has no effect and is deprecated"); + } } public void testCorrectPositionIncrementSetting() throws IOException { Builder builder = Settings.builder().put("index.analysis.filter.my_stop.type", "stop"); + boolean versionSet = false; if (random().nextBoolean()) { builder.put("index.analysis.filter.my_stop.version", Version.LATEST); - } else { - // don't specify + versionSet = true; } builder.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()); ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(builder.build()); @@ -60,6 +65,9 @@ public void testCorrectPositionIncrementSetting() throws IOException { tokenizer.setReader(new StringReader("foo bar")); TokenStream create = tokenFilter.create(tokenizer); assertThat(create, instanceOf(StopFilter.class)); + if (versionSet) { + assertWarnings("Setting [version] on analysis component [my_stop] has no effect and is deprecated"); + } } public void testThatSuggestStopFilterWorks() throws Exception { diff --git a/server/src/test/java/org/elasticsearch/index/engine/InternalEngineTests.java b/server/src/test/java/org/elasticsearch/index/engine/InternalEngineTests.java index d50bea693cb6e..883723de31d46 100644 --- a/server/src/test/java/org/elasticsearch/index/engine/InternalEngineTests.java +++ b/server/src/test/java/org/elasticsearch/index/engine/InternalEngineTests.java @@ -3206,9 +3206,10 @@ public void testCurrentTranslogUUIIDIsCommitted() throws IOException { engine.syncTranslog(); // to advance persisted local checkpoint assertEquals(engine.getProcessedLocalCheckpoint(), engine.getPersistedLocalCheckpoint()); globalCheckpoint.set(engine.getPersistedLocalCheckpoint()); - asInstanceOf( + safeAwaitFailure( IllegalStateException.class, - safeAwaitFailure(Void.class, listener -> engine.recoverFromTranslog(translogHandler, Long.MAX_VALUE, listener)) + Void.class, + listener -> engine.recoverFromTranslog(translogHandler, Long.MAX_VALUE, listener) ); Map userData = engine.getLastCommittedSegmentInfos().getUserData(); assertEquals(engine.getTranslog().getTranslogUUID(), userData.get(Translog.TRANSLOG_UUID_KEY)); diff --git a/server/src/test/java/org/elasticsearch/index/mapper/DocumentParserTests.java b/server/src/test/java/org/elasticsearch/index/mapper/DocumentParserTests.java index 93f546eb288b9..71b52dc41705b 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/DocumentParserTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/DocumentParserTests.java @@ -2307,60 +2307,6 @@ public void testSubobjectsFalseFlattened() throws Exception { assertNotNull(doc.rootDoc().getField("attributes.simple.attribute")); } - public void testSubobjectsAutoFlattened() throws Exception { - DocumentMapper mapper = createDocumentMapper(mapping(b -> { - b.startObject("attributes"); - { - b.field("dynamic", false); - b.field("subobjects", "auto"); - b.startObject("properties"); - { - b.startObject("simple.attribute").field("type", "keyword").endObject(); - b.startObject("complex.attribute").field("type", "flattened").endObject(); - b.startObject("path").field("type", "object"); - { - b.field("store_array_source", "true").field("subobjects", "auto"); - b.startObject("properties"); - { - b.startObject("nested.attribute").field("type", "keyword").endObject(); - } - b.endObject(); - } - b.endObject(); - b.startObject("flattened_object").field("type", "object"); - { - b.startObject("properties"); - { - b.startObject("nested.attribute").field("type", "keyword").endObject(); - } - b.endObject(); - } - b.endObject(); - } - b.endObject(); - } - b.endObject(); - })); - ParsedDocument doc = mapper.parse(source(""" - { - "attributes": { - "complex.attribute": { - "foo" : "bar" - }, - "simple.attribute": "sa", - "path": { - "nested.attribute": "na" - }, - "flattened_object.nested.attribute": "fna" - } - } - """)); - assertNotNull(doc.rootDoc().getField("attributes.complex.attribute")); - assertNotNull(doc.rootDoc().getField("attributes.simple.attribute")); - assertNotNull(doc.rootDoc().getField("attributes.path.nested.attribute")); - assertNotNull(doc.rootDoc().getField("attributes.flattened_object.nested.attribute")); - } - public void testWriteToFieldAlias() throws Exception { DocumentMapper mapper = createDocumentMapper(mapping(b -> { b.startObject("alias-field"); diff --git a/server/src/test/java/org/elasticsearch/index/mapper/DotExpandingXContentParserTests.java b/server/src/test/java/org/elasticsearch/index/mapper/DotExpandingXContentParserTests.java index c4e223a4d1b77..b38c65c1710d6 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/DotExpandingXContentParserTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/DotExpandingXContentParserTests.java @@ -13,12 +13,9 @@ import org.elasticsearch.test.ESTestCase; import org.elasticsearch.xcontent.XContentBuilder; import org.elasticsearch.xcontent.XContentParser; -import org.elasticsearch.xcontent.XContentType; import org.elasticsearch.xcontent.json.JsonXContent; -import org.hamcrest.Matchers; import java.io.IOException; -import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -29,7 +26,7 @@ private void assertXContentMatches(String dotsExpanded, String withDots) throws final ContentPath contentPath = new ContentPath(); try ( XContentParser inputParser = createParser(JsonXContent.jsonXContent, withDots); - XContentParser expandedParser = DotExpandingXContentParser.expandDots(inputParser, contentPath, null) + XContentParser expandedParser = DotExpandingXContentParser.expandDots(inputParser, contentPath) ) { expandedParser.allowDuplicateKeys(true); @@ -40,7 +37,7 @@ private void assertXContentMatches(String dotsExpanded, String withDots) throws expectedParser.allowDuplicateKeys(true); try ( var p = createParser(JsonXContent.jsonXContent, withDots); - XContentParser actualParser = DotExpandingXContentParser.expandDots(p, contentPath, null) + XContentParser actualParser = DotExpandingXContentParser.expandDots(p, contentPath) ) { XContentParser.Token currentToken; while ((currentToken = actualParser.nextToken()) != null) { @@ -130,7 +127,7 @@ public void testDuplicateKeys() throws IOException { public void testDotsCollapsingFlatPaths() throws IOException { ContentPath contentPath = new ContentPath(); XContentParser parser = DotExpandingXContentParser.expandDots(createParser(JsonXContent.jsonXContent, """ - {"metrics.service.time": 10, "metrics.service.time.max": 500, "metrics.foo": "value"}"""), contentPath, null); + {"metrics.service.time": 10, "metrics.service.time.max": 500, "metrics.foo": "value"}"""), contentPath); parser.nextToken(); assertEquals(XContentParser.Token.FIELD_NAME, parser.nextToken()); assertEquals("metrics", parser.currentName()); @@ -200,7 +197,7 @@ public void testDotsCollapsingStructuredPath() throws IOException { }, "foo" : "value" } - }"""), contentPath, null); + }"""), contentPath); parser.nextToken(); assertEquals(XContentParser.Token.FIELD_NAME, parser.nextToken()); assertEquals("metrics", parser.currentName()); @@ -238,7 +235,7 @@ public void testDotsCollapsingStructuredPath() throws IOException { public void testSkipChildren() throws IOException { XContentParser parser = DotExpandingXContentParser.expandDots(createParser(JsonXContent.jsonXContent, """ - { "test.with.dots" : "value", "nodots" : "value2" }"""), new ContentPath(), null); + { "test.with.dots" : "value", "nodots" : "value2" }"""), new ContentPath()); parser.nextToken(); // start object assertEquals(XContentParser.Token.FIELD_NAME, parser.nextToken()); assertEquals("test", parser.currentName()); @@ -261,7 +258,7 @@ public void testSkipChildren() throws IOException { public void testSkipChildrenWithinInnerObject() throws IOException { XContentParser parser = DotExpandingXContentParser.expandDots(createParser(JsonXContent.jsonXContent, """ - { "test.with.dots" : {"obj" : {"field":"value"}}, "nodots" : "value2" }"""), new ContentPath(), null); + { "test.with.dots" : {"obj" : {"field":"value"}}, "nodots" : "value2" }"""), new ContentPath()); parser.nextToken(); // start object assertEquals(XContentParser.Token.FIELD_NAME, parser.nextToken()); @@ -309,8 +306,7 @@ public void testGetTokenLocation() throws IOException { XContentParser expectedParser = createParser(JsonXContent.jsonXContent, jsonInput); XContentParser dotExpandedParser = DotExpandingXContentParser.expandDots( createParser(JsonXContent.jsonXContent, jsonInput), - new ContentPath(), - null + new ContentPath() ); assertEquals(expectedParser.getTokenLocation(), dotExpandedParser.getTokenLocation()); @@ -368,8 +364,7 @@ public void testGetTokenLocation() throws IOException { public void testParseMapUOE() throws Exception { XContentParser dotExpandedParser = DotExpandingXContentParser.expandDots( createParser(JsonXContent.jsonXContent, ""), - new ContentPath(), - null + new ContentPath() ); expectThrows(UnsupportedOperationException.class, dotExpandedParser::map); } @@ -377,8 +372,7 @@ public void testParseMapUOE() throws Exception { public void testParseMapOrderedUOE() throws Exception { XContentParser dotExpandedParser = DotExpandingXContentParser.expandDots( createParser(JsonXContent.jsonXContent, ""), - new ContentPath(), - null + new ContentPath() ); expectThrows(UnsupportedOperationException.class, dotExpandedParser::mapOrdered); } @@ -386,8 +380,7 @@ public void testParseMapOrderedUOE() throws Exception { public void testParseMapStringsUOE() throws Exception { XContentParser dotExpandedParser = DotExpandingXContentParser.expandDots( createParser(JsonXContent.jsonXContent, ""), - new ContentPath(), - null + new ContentPath() ); expectThrows(UnsupportedOperationException.class, dotExpandedParser::mapStrings); } @@ -395,8 +388,7 @@ public void testParseMapStringsUOE() throws Exception { public void testParseMapSupplierUOE() throws Exception { XContentParser dotExpandedParser = DotExpandingXContentParser.expandDots( createParser(JsonXContent.jsonXContent, ""), - new ContentPath(), - null + new ContentPath() ); expectThrows(UnsupportedOperationException.class, () -> dotExpandedParser.map(HashMap::new, XContentParser::text)); } @@ -411,8 +403,7 @@ public void testParseMap() throws Exception { contentPath.setWithinLeafObject(true); XContentParser dotExpandedParser = DotExpandingXContentParser.expandDots( createParser(JsonXContent.jsonXContent, jsonInput), - contentPath, - null + contentPath ); assertEquals(XContentParser.Token.START_OBJECT, dotExpandedParser.nextToken()); assertEquals(XContentParser.Token.FIELD_NAME, dotExpandedParser.nextToken()); @@ -427,8 +418,7 @@ public void testParseMap() throws Exception { public void testParseListUOE() throws Exception { XContentParser dotExpandedParser = DotExpandingXContentParser.expandDots( createParser(JsonXContent.jsonXContent, ""), - new ContentPath(), - null + new ContentPath() ); expectThrows(UnsupportedOperationException.class, dotExpandedParser::list); } @@ -436,8 +426,7 @@ public void testParseListUOE() throws Exception { public void testParseListOrderedUOE() throws Exception { XContentParser dotExpandedParser = DotExpandingXContentParser.expandDots( createParser(JsonXContent.jsonXContent, ""), - new ContentPath(), - null + new ContentPath() ); expectThrows(UnsupportedOperationException.class, dotExpandedParser::listOrderedMap); } @@ -451,8 +440,7 @@ public void testParseList() throws Exception { contentPath.setWithinLeafObject(true); XContentParser dotExpandedParser = DotExpandingXContentParser.expandDots( createParser(JsonXContent.jsonXContent, jsonInput), - contentPath, - null + contentPath ); assertEquals(XContentParser.Token.START_OBJECT, dotExpandedParser.nextToken()); assertEquals(XContentParser.Token.FIELD_NAME, dotExpandedParser.nextToken()); @@ -462,104 +450,4 @@ public void testParseList() throws Exception { assertEquals("one", list.get(0)); assertEquals("two", list.get(1)); } - - private static DocumentParserContext createContext(XContentBuilder builder) throws IOException { - var documentMapper = new MapperServiceTestCase() { - }.createDocumentMapper(builder); - return new TestDocumentParserContext(documentMapper.mappers(), null); - } - - private static List getSubPaths(XContentBuilder builder, String... path) throws IOException { - DocumentParserContext context = createContext(builder); - return DotExpandingXContentParser.maybeFlattenPaths(Arrays.stream(path).toList(), context, new ContentPath()); - } - - private static List getSubPaths(XContentBuilder builder, List contentPath, List path) throws IOException { - DocumentParserContext context = createContext(builder); - ContentPath content = new ContentPath(); - for (String c : contentPath) { - content.add(c); - } - return DotExpandingXContentParser.maybeFlattenPaths(path, context, content); - } - - public void testAutoFlattening() throws Exception { - var b = XContentBuilder.builder(XContentType.JSON.xContent()); - b.startObject().startObject("_doc"); - { - b.field("subobjects", "auto"); - b.startObject("properties"); - { - b.startObject("path").startObject("properties"); - { - b.startObject("to").startObject("properties"); - { - b.startObject("field").field("type", "integer").endObject(); - } - b.endObject().endObject(); - } - b.endObject().endObject(); - b.startObject("path.auto").field("subobjects", "auto").startObject("properties"); - { - b.startObject("to").startObject("properties"); - { - b.startObject("some.field").field("type", "integer").endObject(); - } - b.endObject().endObject(); - b.startObject("inner.enabled").field("dynamic", "false").startObject("properties"); - { - b.startObject("field").field("type", "integer").endObject(); - } - b.endObject().endObject(); - } - b.endObject().endObject(); - b.startObject("path.disabled").field("subobjects", "false").startObject("properties"); - { - b.startObject("to").startObject("properties"); - { - b.startObject("some.field").field("type", "integer").endObject(); - } - b.endObject().endObject(); - } - b.endObject().endObject(); - } - b.endObject(); - } - b.endObject().endObject(); - - // inner [subobjects:enabled] gets flattened - assertThat(getSubPaths(b, "field"), Matchers.contains("field")); - assertThat(getSubPaths(b, "path", "field"), Matchers.contains("path.field")); - assertThat(getSubPaths(b, "path", "to", "field"), Matchers.contains("path.to.field")); - assertThat(getSubPaths(b, "path", "to", "any"), Matchers.contains("path.to.any")); - - // inner [subobjects:auto] does not get flattened - assertThat(getSubPaths(b, "path", "auto", "field"), Matchers.contains("path.auto", "field")); - assertThat(getSubPaths(b, "path", "auto", "some", "field"), Matchers.contains("path.auto", "some.field")); - assertThat(getSubPaths(b, "path", "auto", "to", "some", "field"), Matchers.contains("path.auto", "to.some.field")); - assertThat(getSubPaths(b, "path", "auto", "to", "some", "other"), Matchers.contains("path.auto", "to.some.other")); - assertThat(getSubPaths(b, "path", "auto", "inner", "enabled", "field"), Matchers.contains("path.auto", "inner.enabled", "field")); - assertThat( - getSubPaths(b, "path", "auto", "inner", "enabled", "to", "some", "field"), - Matchers.contains("path.auto", "inner.enabled", "to", "some", "field") - ); - - // inner [subobjects:disabled] gets flattened - assertThat(getSubPaths(b, "path", "disabled", "field"), Matchers.contains("path.disabled.field")); - assertThat(getSubPaths(b, "path", "disabled", "some", "field"), Matchers.contains("path.disabled.some.field")); - assertThat(getSubPaths(b, "path", "disabled", "to", "some", "field"), Matchers.contains("path.disabled.to.some.field")); - assertThat(getSubPaths(b, "path", "disabled", "to", "some", "other"), Matchers.contains("path.disabled.to.some.other")); - - // Non-empty content path. - assertThat(getSubPaths(b, List.of("path"), List.of("field")), Matchers.contains("field")); - assertThat(getSubPaths(b, List.of("path"), List.of("to", "field")), Matchers.contains("to", "field")); - assertThat(getSubPaths(b, List.of("path", "to"), List.of("field")), Matchers.contains("field")); - assertThat(getSubPaths(b, List.of("path"), List.of("auto", "field")), Matchers.contains("auto", "field")); - assertThat(getSubPaths(b, List.of("path", "auto"), List.of("to", "some", "field")), Matchers.contains("to.some.field")); - assertThat( - getSubPaths(b, List.of("path", "auto"), List.of("inner", "enabled", "to", "some", "field")), - Matchers.contains("inner.enabled", "to", "some", "field") - ); - assertThat(getSubPaths(b, List.of("path", "disabled"), List.of("to", "some", "field")), Matchers.contains("to", "some", "field")); - } } diff --git a/server/src/test/java/org/elasticsearch/index/mapper/DynamicTemplatesTests.java b/server/src/test/java/org/elasticsearch/index/mapper/DynamicTemplatesTests.java index 43ee47245f492..7f430cf676809 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/DynamicTemplatesTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/DynamicTemplatesTests.java @@ -1619,9 +1619,10 @@ public void testSubobjectsAutoWithInnerNestedFromDynamicTemplate() throws IOExce assertNotNull(doc.rootDoc().get("metrics.time.max")); assertNotNull(doc.docs().get(0).get("metrics.time.foo")); - var metrics = ((ObjectMapper) doc.dynamicMappingsUpdate().getRoot().getMapper("metrics")); - assertThat(metrics.getMapper("time"), instanceOf(NestedObjectMapper.class)); - assertThat(metrics.getMapper("time.max"), instanceOf(NumberFieldMapper.class)); + assertThat( + ((ObjectMapper) doc.dynamicMappingsUpdate().getRoot().getMapper("metrics")).getMapper("time"), + instanceOf(NestedObjectMapper.class) + ); } public void testDynamicSubobject() throws IOException { @@ -2056,7 +2057,7 @@ public void testSubobjectsAutoFlattened() throws IOException { "dynamic_templates": [ { "test": { - "path_match": "attributes.*", + "path_match": "attributes.resource.*", "match_mapping_type": "object", "mapping": { "type": "flattened" @@ -2069,7 +2070,7 @@ public void testSubobjectsAutoFlattened() throws IOException { """; String docJson = """ { - "attributes": { + "attributes.resource": { "complex.attribute": { "a": "b" }, @@ -2082,67 +2083,14 @@ public void testSubobjectsAutoFlattened() throws IOException { ParsedDocument parsedDoc = mapperService.documentMapper().parse(source(docJson)); merge(mapperService, dynamicMapping(parsedDoc.dynamicMappingsUpdate())); - Mapper fooBarMapper = mapperService.documentMapper().mappers().getMapper("attributes.foo.bar"); + Mapper fooBarMapper = mapperService.documentMapper().mappers().getMapper("attributes.resource.foo.bar"); assertNotNull(fooBarMapper); assertEquals("text", fooBarMapper.typeName()); - Mapper fooStructuredMapper = mapperService.documentMapper().mappers().getMapper("attributes.complex.attribute"); + Mapper fooStructuredMapper = mapperService.documentMapper().mappers().getMapper("attributes.resource.complex.attribute"); assertNotNull(fooStructuredMapper); assertEquals("flattened", fooStructuredMapper.typeName()); } - public void testSubobjectsAutoWithObjectInDynamicTemplate() throws IOException { - String mapping = """ - { - "_doc": { - "properties": { - "attributes": { - "type": "object", - "subobjects": "auto" - } - }, - "dynamic_templates": [ - { - "test": { - "path_match": "attributes.*", - "match_mapping_type": "object", - "mapping": { - "type": "object", - "dynamic": "false", - "properties": { - "id": { - "type": "integer" - } - } - } - } - } - ] - } - } - """; - String docJson = """ - { - "attributes": { - "to": { - "id": 10 - }, - "foo.bar": "baz" - } - } - """; - - MapperService mapperService = createMapperService(mapping); - ParsedDocument parsedDoc = mapperService.documentMapper().parse(source(docJson)); - merge(mapperService, dynamicMapping(parsedDoc.dynamicMappingsUpdate())); - - Mapper fooBarMapper = mapperService.documentMapper().mappers().getMapper("attributes.foo.bar"); - assertNotNull(fooBarMapper); - assertEquals("text", fooBarMapper.typeName()); - Mapper innerObject = mapperService.documentMapper().mappers().objectMappers().get("attributes.to"); - assertNotNull(innerObject); - assertEquals("integer", mapperService.documentMapper().mappers().getMapper("attributes.to.id").typeName()); - } - public void testMatchWithArrayOfFieldNames() throws IOException { String mapping = """ { diff --git a/server/src/test/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapperTests.java index 5d5273f0fc788..eaa7bf6528203 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapperTests.java @@ -1549,66 +1549,6 @@ public void testCopyToLogicInsideObject() throws IOException { assertEquals("{\"path\":{\"at\":\"A\"}}", syntheticSource); } - public void testCopyToRootWithSubobjectFlattening() throws IOException { - DocumentMapper documentMapper = createMapperService(topMapping(b -> { - b.startObject("_source").field("mode", "synthetic").endObject(); - b.field("subobjects", randomFrom("false", "auto")); - b.startObject("properties"); - { - b.startObject("k").field("type", "keyword").field("copy_to", "a.b.c").endObject(); - b.startObject("a").startObject("properties"); - { - b.startObject("b").startObject("properties"); - { - b.startObject("c").field("type", "keyword").endObject(); - } - b.endObject().endObject(); - } - b.endObject().endObject(); - } - b.endObject(); - })).documentMapper(); - - CheckedConsumer document = b -> b.field("k", "hey"); - - var doc = documentMapper.parse(source(document)); - assertNotNull(doc.docs().get(0).getField("a.b.c")); - - var syntheticSource = syntheticSource(documentMapper, document); - assertEquals("{\"k\":\"hey\"}", syntheticSource); - } - - public void testCopyToObjectWithSubobjectFlattening() throws IOException { - DocumentMapper documentMapper = createMapperService(syntheticSourceMapping(b -> { - b.startObject("path").field("subobjects", randomFrom("false", "auto")).startObject("properties"); - { - b.startObject("k").field("type", "keyword").field("copy_to", "path.a.b.c").endObject(); - b.startObject("a").startObject("properties"); - { - b.startObject("b").startObject("properties"); - { - b.startObject("c").field("type", "keyword").endObject(); - } - b.endObject().endObject(); - } - b.endObject().endObject(); - } - b.endObject().endObject(); - })).documentMapper(); - - CheckedConsumer document = b -> { - b.startObject("path"); - b.field("k", "hey"); - b.endObject(); - }; - - var doc = documentMapper.parse(source(document)); - assertNotNull(doc.docs().get(0).getField("path.a.b.c")); - - var syntheticSource = syntheticSource(documentMapper, document); - assertEquals("{\"path\":{\"k\":\"hey\"}}", syntheticSource); - } - protected void validateRoundTripReader(String syntheticSource, DirectoryReader reader, DirectoryReader roundTripReader) throws IOException { // We exclude ignored source field since in some cases it contains an exact copy of a part of document source. diff --git a/server/src/test/java/org/elasticsearch/index/mapper/ObjectMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/ObjectMapperTests.java index 4bc91b793d049..3312c94e8a0e1 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/ObjectMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/ObjectMapperTests.java @@ -354,8 +354,12 @@ public void testSubobjectsFalse() throws Exception { b.field("subobjects", false); b.startObject("properties"); { - b.startObject("time").field("type", "long").endObject(); - b.startObject("time.max").field("type", "long").endObject(); + b.startObject("time"); + b.field("type", "long"); + b.endObject(); + b.startObject("time.max"); + b.field("type", "long"); + b.endObject(); } b.endObject(); } @@ -376,7 +380,9 @@ public void testSubobjectsFalseWithInnerObject() throws IOException { { b.startObject("properties"); { - b.startObject("max").field("type", "long").endObject(); + b.startObject("max"); + b.field("type", "long"); + b.endObject(); } b.endObject(); } @@ -397,7 +403,9 @@ public void testSubobjectsFalseWithInnerNested() { b.field("subobjects", false); b.startObject("properties"); { - b.startObject("time").field("type", "nested").endObject(); + b.startObject("time"); + b.field("type", "nested"); + b.endObject(); } b.endObject(); } @@ -411,8 +419,12 @@ public void testSubobjectsFalseWithInnerNested() { public void testSubobjectsFalseRoot() throws Exception { MapperService mapperService = createMapperService(mappingNoSubobjects(b -> { - b.startObject("metrics.service.time").field("type", "long").endObject(); - b.startObject("metrics.service.time.max").field("type", "long").endObject(); + b.startObject("metrics.service.time"); + b.field("type", "long"); + b.endObject(); + b.startObject("metrics.service.time.max"); + b.field("type", "long"); + b.endObject(); })); assertNotNull(mapperService.fieldType("metrics.service.time")); assertNotNull(mapperService.fieldType("metrics.service.time.max")); @@ -429,7 +441,9 @@ public void testSubobjectsFalseRootWithInnerObject() throws IOException { { b.startObject("properties"); { - b.startObject("max").field("type", "long").endObject(); + b.startObject("max"); + b.field("type", "long"); + b.endObject(); } b.endObject(); } @@ -441,7 +455,9 @@ public void testSubobjectsFalseRootWithInnerObject() throws IOException { public void testSubobjectsFalseRootWithInnerNested() { MapperParsingException exception = expectThrows(MapperParsingException.class, () -> createMapperService(mappingNoSubobjects(b -> { - b.startObject("metrics.service").field("type", "nested").endObject(); + b.startObject("metrics.service"); + b.field("type", "nested"); + b.endObject(); }))); assertEquals( "Failed to parse mapping: Tried to add nested object [metrics.service] to object [_doc] which does not support subobjects", @@ -457,7 +473,8 @@ public void testSubobjectsCannotBeUpdated() throws IOException { "_doc", MergeReason.MAPPING_UPDATE, new CompressedXContent(BytesReference.bytes(fieldMapping(b -> { - b.field("type", "object").field("subobjects", "false"); + b.field("type", "object"); + b.field("subobjects", "false"); }))) ); MapperException exception = expectThrows( @@ -492,8 +509,12 @@ public void testSubobjectsAuto() throws Exception { b.field("subobjects", "auto"); b.startObject("properties"); { - b.startObject("time").field("type", "long").endObject(); - b.startObject("time.max").field("type", "long").endObject(); + b.startObject("time"); + b.field("type", "long"); + b.endObject(); + b.startObject("time.max"); + b.field("type", "long"); + b.endObject(); b.startObject("attributes"); { b.field("type", "object"); @@ -510,7 +531,7 @@ public void testSubobjectsAuto() throws Exception { assertNotNull(mapperService.documentMapper().mappers().objectMappers().get("metrics.service.attributes")); } - public void testSubobjectsAutoWithInnerFlattenableObject() throws IOException { + public void testSubobjectsAutoWithInnerObject() throws IOException { MapperService mapperService = createMapperService(mapping(b -> { b.startObject("metrics.service"); { @@ -521,42 +542,16 @@ public void testSubobjectsAutoWithInnerFlattenableObject() throws IOException { { b.startObject("properties"); { - b.startObject("max").field("type", "long").endObject(); + b.startObject("max"); + b.field("type", "long"); + b.endObject(); } b.endObject(); } b.endObject(); - b.startObject("foo").field("type", "keyword").endObject(); - } - b.endObject(); - } - b.endObject(); - })); - assertNull(mapperService.fieldType("metrics.service.time")); - assertNotNull(mapperService.fieldType("metrics.service.time.max")); - assertNotNull(mapperService.fieldType("metrics.service.foo")); - assertNull(mapperService.documentMapper().mappers().objectMappers().get("metrics.service.time")); // Gets flattened. - assertNotNull(mapperService.documentMapper().mappers().getMapper("metrics.service.foo")); - } - - public void testSubobjectsAutoWithInnerNonFlattenableObject() throws IOException { - MapperService mapperService = createMapperService(mapping(b -> { - b.startObject("metrics.service"); - { - b.field("subobjects", "auto"); - b.startObject("properties"); - { - b.startObject("time"); - { - b.field(ObjectMapper.STORE_ARRAY_SOURCE_PARAM, true); - b.startObject("properties"); - { - b.startObject("max").field("type", "long").endObject(); - } - b.endObject(); - } + b.startObject("foo"); + b.field("type", "keyword"); b.endObject(); - b.startObject("foo").field("type", "keyword").endObject(); } b.endObject(); } @@ -565,7 +560,7 @@ public void testSubobjectsAutoWithInnerNonFlattenableObject() throws IOException assertNull(mapperService.fieldType("metrics.service.time")); assertNotNull(mapperService.fieldType("metrics.service.time.max")); assertNotNull(mapperService.fieldType("metrics.service.foo")); - assertNotNull(mapperService.documentMapper().mappers().objectMappers().get("metrics.service.time")); // Not flattened. + assertNotNull(mapperService.documentMapper().mappers().objectMappers().get("metrics.service.time")); assertNotNull(mapperService.documentMapper().mappers().getMapper("metrics.service.foo")); } @@ -576,7 +571,9 @@ public void testSubobjectsAutoWithInnerNested() throws IOException { b.field("subobjects", "auto"); b.startObject("properties"); { - b.startObject("time").field("type", "nested").endObject(); + b.startObject("time"); + b.field("type", "nested"); + b.endObject(); } b.endObject(); } @@ -590,8 +587,12 @@ public void testSubobjectsAutoWithInnerNested() throws IOException { public void testSubobjectsAutoRoot() throws Exception { MapperService mapperService = createMapperService(mappingWithSubobjects(b -> { - b.startObject("metrics.service.time").field("type", "long").endObject(); - b.startObject("metrics.service.time.max").field("type", "long").endObject(); + b.startObject("metrics.service.time"); + b.field("type", "long"); + b.endObject(); + b.startObject("metrics.service.time.max"); + b.field("type", "long"); + b.endObject(); b.startObject("metrics.attributes"); { b.field("type", "object"); @@ -604,13 +605,15 @@ public void testSubobjectsAutoRoot() throws Exception { assertNotNull(mapperService.documentMapper().mappers().objectMappers().get("metrics.attributes")); } - public void testSubobjectsAutoRootWithInnerFlattenableObject() throws IOException { + public void testSubobjectsAutoRootWithInnerObject() throws IOException { MapperService mapperService = createMapperService(mappingWithSubobjects(b -> { b.startObject("metrics.service.time"); { b.startObject("properties"); { - b.startObject("max").field("type", "long").endObject(); + b.startObject("max"); + b.field("type", "long"); + b.endObject(); } b.endObject(); } @@ -618,48 +621,8 @@ public void testSubobjectsAutoRootWithInnerFlattenableObject() throws IOExceptio }, "auto")); assertNull(mapperService.fieldType("metrics.service.time")); assertNotNull(mapperService.fieldType("metrics.service.time.max")); - assertNull(mapperService.documentMapper().mappers().objectMappers().get("metrics.service.time")); // Gets flattened. - - Mapper innerField = mapperService.documentMapper().mappers().getMapper("metrics.service.time.max"); - assertNotNull(innerField); - assertEquals("metrics.service.time.max", innerField.leafName()); - } - - public void testSubobjectsAutoRootWithInnerNonFlattenableObject() throws IOException { - MapperService mapperService = createMapperService(mappingWithSubobjects(b -> { - b.startObject("metrics").startObject("properties"); - { - b.startObject("service.time"); - { - b.field("subobjects", "auto"); - b.startObject("properties"); - { - b.startObject("path").startObject("properties"); - { - b.startObject("to").startObject("properties"); - { - b.startObject("max").field("type", "long").endObject(); - } - b.endObject().endObject(); - } - b.endObject().endObject(); - } - b.endObject(); - } - b.endObject(); - } - b.endObject().endObject(); - }, "auto")); - assertNull(mapperService.fieldType("metrics.service.time")); - assertNotNull(mapperService.fieldType("metrics.service.time.path.to.max")); - - ObjectMapper innerObject = mapperService.documentMapper().mappers().objectMappers().get("metrics.service.time"); // Not flattened. - assertNotNull(innerObject); - assertEquals("metrics.service.time", innerObject.leafName()); - - Mapper innerField = mapperService.documentMapper().mappers().getMapper("metrics.service.time.path.to.max"); - assertNotNull(innerField); - assertEquals("path.to.max", innerField.leafName()); + assertNotNull(mapperService.documentMapper().mappers().objectMappers().get("metrics.service.time")); + assertNotNull(mapperService.documentMapper().mappers().getMapper("metrics.service.time.max")); } public void testSubobjectsAutoRootWithInnerNested() throws IOException { @@ -779,7 +742,16 @@ public void testFlatten() { ObjectMapper objectMapper = new ObjectMapper.Builder("parent", Optional.empty()).add( new ObjectMapper.Builder("child", Optional.empty()).add(new KeywordFieldMapper.Builder("keyword2", IndexVersion.current())) ).add(new KeywordFieldMapper.Builder("keyword1", IndexVersion.current())).build(rootContext); - List fields = objectMapper.asFlattenedFieldMappers(rootContext, true).stream().map(Mapper::fullPath).toList(); + List fields = objectMapper.asFlattenedFieldMappers(rootContext).stream().map(FieldMapper::fullPath).toList(); + assertThat(fields, containsInAnyOrder("parent.keyword1", "parent.child.keyword2")); + } + + public void testFlattenSubobjectsAuto() { + MapperBuilderContext rootContext = MapperBuilderContext.root(false, false); + ObjectMapper objectMapper = new ObjectMapper.Builder("parent", Optional.of(ObjectMapper.Subobjects.AUTO)).add( + new ObjectMapper.Builder("child", Optional.empty()).add(new KeywordFieldMapper.Builder("keyword2", IndexVersion.current())) + ).add(new KeywordFieldMapper.Builder("keyword1", IndexVersion.current())).build(rootContext); + List fields = objectMapper.asFlattenedFieldMappers(rootContext).stream().map(FieldMapper::fullPath).toList(); assertThat(fields, containsInAnyOrder("parent.keyword1", "parent.child.keyword2")); } @@ -788,7 +760,7 @@ public void testFlattenSubobjectsFalse() { ObjectMapper objectMapper = new ObjectMapper.Builder("parent", Optional.of(ObjectMapper.Subobjects.DISABLED)).add( new ObjectMapper.Builder("child", Optional.empty()).add(new KeywordFieldMapper.Builder("keyword2", IndexVersion.current())) ).add(new KeywordFieldMapper.Builder("keyword1", IndexVersion.current())).build(rootContext); - List fields = objectMapper.asFlattenedFieldMappers(rootContext, true).stream().map(Mapper::fullPath).toList(); + List fields = objectMapper.asFlattenedFieldMappers(rootContext).stream().map(FieldMapper::fullPath).toList(); assertThat(fields, containsInAnyOrder("parent.keyword1", "parent.child.keyword2")); } @@ -800,7 +772,7 @@ public void testFlattenDynamicIncompatible() { IllegalArgumentException exception = expectThrows( IllegalArgumentException.class, - () -> objectMapper.asFlattenedFieldMappers(rootContext, true) + () -> objectMapper.asFlattenedFieldMappers(rootContext) ); assertEquals( "Object mapper [parent.child] was found in a context where subobjects is set to false. " @@ -816,7 +788,7 @@ public void testFlattenEnabledFalse() { IllegalArgumentException exception = expectThrows( IllegalArgumentException.class, - () -> objectMapper.asFlattenedFieldMappers(rootContext, true) + () -> objectMapper.asFlattenedFieldMappers(rootContext) ); assertEquals( "Object mapper [parent] was found in a context where subobjects is set to false. " @@ -825,30 +797,13 @@ public void testFlattenEnabledFalse() { ); } - public void testFlattenSubobjectsAuto() { - MapperBuilderContext rootContext = MapperBuilderContext.root(false, false); - ObjectMapper objectMapper = new ObjectMapper.Builder("parent", Optional.of(ObjectMapper.Subobjects.AUTO)).add( - new ObjectMapper.Builder("child", Optional.empty()).add(new KeywordFieldMapper.Builder("keyword2", IndexVersion.current())) - ).add(new KeywordFieldMapper.Builder("keyword1", IndexVersion.current())).build(rootContext); - - IllegalArgumentException exception = expectThrows( - IllegalArgumentException.class, - () -> objectMapper.asFlattenedFieldMappers(rootContext, true) - ); - assertEquals( - "Object mapper [parent] was found in a context where subobjects is set to false. " - + "Auto-flattening [parent] failed because the value of [subobjects] is [auto]", - exception.getMessage() - ); - } - public void testFlattenExplicitSubobjectsTrue() { MapperBuilderContext rootContext = MapperBuilderContext.root(false, false); ObjectMapper objectMapper = new ObjectMapper.Builder("parent", Optional.of(ObjectMapper.Subobjects.ENABLED)).build(rootContext); IllegalArgumentException exception = expectThrows( IllegalArgumentException.class, - () -> objectMapper.asFlattenedFieldMappers(rootContext, true) + () -> objectMapper.asFlattenedFieldMappers(rootContext) ); assertEquals( "Object mapper [parent] was found in a context where subobjects is set to false. " diff --git a/server/src/test/java/org/elasticsearch/index/shard/IndexShardOperationPermitsTests.java b/server/src/test/java/org/elasticsearch/index/shard/IndexShardOperationPermitsTests.java index ba4d09566ef63..cb9927be732f6 100644 --- a/server/src/test/java/org/elasticsearch/index/shard/IndexShardOperationPermitsTests.java +++ b/server/src/test/java/org/elasticsearch/index/shard/IndexShardOperationPermitsTests.java @@ -565,9 +565,10 @@ public void testAsyncBlockOperationsOnTimeout() { assertEquals( "timeout while blocking operations after [0s]", - asInstanceOf( + safeAwaitFailure( ElasticsearchTimeoutException.class, - safeAwaitFailure(Releasable.class, f -> permits.blockOperations(f, 0, TimeUnit.SECONDS, threadPool.generic())) + Releasable.class, + f -> permits.blockOperations(f, 0, TimeUnit.SECONDS, threadPool.generic()) ).getMessage() ); diff --git a/server/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java b/server/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java index cddda8a76ae60..f15506676dc39 100644 --- a/server/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java +++ b/server/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java @@ -2169,16 +2169,14 @@ public void testShardCanNotBeMarkedAsRelocatedIfRelocationCancelled() throws IOE final ShardRouting relocationRouting = ShardRoutingHelper.relocate(originalRouting, "other_node"); IndexShardTestCase.updateRoutingEntry(shard, relocationRouting); IndexShardTestCase.updateRoutingEntry(shard, originalRouting); - asInstanceOf( + safeAwaitFailure( IllegalIndexShardStateException.class, - safeAwaitFailure( - Void.class, - listener -> shard.relocated( - relocationRouting.relocatingNodeId(), - relocationRouting.getTargetRelocatingShard().allocationId().getId(), - (primaryContext, l) -> fail("should not be called"), - listener - ) + Void.class, + listener -> shard.relocated( + relocationRouting.relocatingNodeId(), + relocationRouting.getTargetRelocatingShard().allocationId().getId(), + (primaryContext, l) -> fail("should not be called"), + listener ) ); closeShards(shard); @@ -2263,16 +2261,14 @@ public void testRelocateMismatchedTarget() throws Exception { final AtomicBoolean relocated = new AtomicBoolean(); - final IllegalIndexShardStateException wrongNodeException = asInstanceOf( + final IllegalIndexShardStateException wrongNodeException = safeAwaitFailure( IllegalIndexShardStateException.class, - safeAwaitFailure( - Void.class, - listener -> shard.relocated( - wrongTargetNodeShardRouting.relocatingNodeId(), - wrongTargetNodeShardRouting.getTargetRelocatingShard().allocationId().getId(), - (ctx, l) -> relocated.set(true), - listener - ) + Void.class, + listener -> shard.relocated( + wrongTargetNodeShardRouting.relocatingNodeId(), + wrongTargetNodeShardRouting.getTargetRelocatingShard().allocationId().getId(), + (ctx, l) -> relocated.set(true), + listener ) ); assertThat( @@ -2281,16 +2277,14 @@ public void testRelocateMismatchedTarget() throws Exception { ); assertFalse(relocated.get()); - final IllegalStateException wrongTargetIdException = asInstanceOf( + final IllegalStateException wrongTargetIdException = safeAwaitFailure( IllegalStateException.class, - safeAwaitFailure( - Void.class, - listener -> shard.relocated( - wrongTargetAllocationIdShardRouting.relocatingNodeId(), - wrongTargetAllocationIdShardRouting.getTargetRelocatingShard().allocationId().getId(), - (ctx, l) -> relocated.set(true), - listener - ) + Void.class, + listener -> shard.relocated( + wrongTargetAllocationIdShardRouting.relocatingNodeId(), + wrongTargetAllocationIdShardRouting.getTargetRelocatingShard().allocationId().getId(), + (ctx, l) -> relocated.set(true), + listener ) ); assertThat( diff --git a/server/src/test/java/org/elasticsearch/index/snapshots/blobstore/SlicedInputStreamTests.java b/server/src/test/java/org/elasticsearch/index/snapshots/blobstore/SlicedInputStreamTests.java index 88e74c2bc5151..c31a68f36de71 100644 --- a/server/src/test/java/org/elasticsearch/index/snapshots/blobstore/SlicedInputStreamTests.java +++ b/server/src/test/java/org/elasticsearch/index/snapshots/blobstore/SlicedInputStreamTests.java @@ -10,6 +10,7 @@ import com.carrotsearch.randomizedtesting.generators.RandomNumbers; +import org.elasticsearch.common.unit.ByteSizeValue; import org.elasticsearch.test.ESTestCase; import java.io.ByteArrayInputStream; @@ -18,11 +19,15 @@ import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; +import java.util.Arrays; import java.util.Random; +import java.util.function.Consumer; import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.in; public class SlicedInputStreamTests extends ESTestCase { + public void testReadRandom() throws IOException { int parts = randomIntBetween(1, 20); ByteArrayOutputStream stream = new ByteArrayOutputStream(); @@ -79,6 +84,42 @@ protected InputStream openSlice(int slice) throws IOException { } } + public void testSkip() throws IOException { + final int slices = randomIntBetween(1, 20); + final var bytes = randomByteArrayOfLength(randomIntBetween(1000, 10000)); + final int sliceSize = bytes.length / slices; + + final var streamsOpened = new ArrayList(); + SlicedInputStream input = new SlicedInputStream(slices) { + @Override + protected InputStream openSlice(int slice) throws IOException { + final int sliceOffset = slice * sliceSize; + final int length = slice == slices - 1 ? bytes.length - sliceOffset : sliceSize; + final var stream = new CheckClosedInputStream(new ByteArrayInputStream(bytes, sliceOffset, length)); + streamsOpened.add(stream); + return stream; + } + }; + + // Skip up to a random point + final int skip = randomIntBetween(0, bytes.length); + input.skipNBytes(skip); + + // Read all remaining bytes, which should be the bytes from skip up to the end + final int remainingBytes = bytes.length - skip; + if (remainingBytes > 0) { + final var remainingBytesRead = new byte[remainingBytes]; + input.readNBytes(remainingBytesRead, 0, remainingBytes); + final var expectedRemainingBytes = Arrays.copyOfRange(bytes, skip, bytes.length); + assertArrayEquals(expectedRemainingBytes, remainingBytesRead); + } + + // Confirm we reached the end and close the stream + assertThat(input.read(), equalTo(-1)); + input.close(); + streamsOpened.forEach(stream -> assertTrue(stream.closed)); + } + public void testRandomMarkReset() throws IOException { final int slices = randomIntBetween(1, 20); final var bytes = randomByteArrayOfLength(randomIntBetween(1000, 10000)); @@ -96,13 +137,17 @@ protected InputStream openSlice(int slice) throws IOException { } }; - // Read up to a random point + // Read or skip up to a random point final int mark = randomIntBetween(0, bytes.length); if (mark > 0) { - final var bytesReadUntilMark = new byte[mark]; - input.readNBytes(bytesReadUntilMark, 0, mark); - final var expectedBytesUntilMark = new ByteArrayInputStream(bytes, 0, mark).readAllBytes(); - assertArrayEquals(expectedBytesUntilMark, bytesReadUntilMark); + if (randomBoolean()) { + final var bytesReadUntilMark = new byte[mark]; + input.readNBytes(bytesReadUntilMark, 0, mark); + final var expectedBytesUntilMark = Arrays.copyOfRange(bytes, 0, mark); + assertArrayEquals(expectedBytesUntilMark, bytesReadUntilMark); + } else { + input.skipNBytes(mark); + } } // Reset should throw since there is no mark @@ -111,13 +156,22 @@ protected InputStream openSlice(int slice) throws IOException { // Mark input.mark(randomNonNegativeInt()); - // Read up to another random point + // Read or skip up to another random point final int moreBytes = randomIntBetween(0, bytes.length - mark); if (moreBytes > 0) { - final var moreBytesRead = new byte[moreBytes]; - input.readNBytes(moreBytesRead, 0, moreBytes); - final var expectedMoreBytes = new ByteArrayInputStream(bytes, mark, moreBytes).readAllBytes(); - assertArrayEquals(expectedMoreBytes, moreBytesRead); + if (randomBoolean()) { + final var moreBytesRead = new byte[moreBytes]; + input.readNBytes(moreBytesRead, 0, moreBytes); + final var expectedMoreBytes = Arrays.copyOfRange(bytes, mark, mark + moreBytes); + assertArrayEquals(expectedMoreBytes, moreBytesRead); + } else { + input.skipNBytes(moreBytes); + } + } + + // Randomly read to EOF + if (randomBoolean()) { + input.readAllBytes(); } // Reset @@ -128,7 +182,7 @@ protected InputStream openSlice(int slice) throws IOException { if (remainingBytes > 0) { final var remainingBytesRead = new byte[remainingBytes]; input.readNBytes(remainingBytesRead, 0, remainingBytes); - final var expectedRemainingBytes = new ByteArrayInputStream(bytes, mark, remainingBytes).readAllBytes(); + final var expectedRemainingBytes = Arrays.copyOfRange(bytes, mark, bytes.length); assertArrayEquals(expectedRemainingBytes, remainingBytesRead); } @@ -138,6 +192,67 @@ protected InputStream openSlice(int slice) throws IOException { streamsOpened.forEach(stream -> assertTrue(stream.closed)); } + public void testMarkSkipResetInBigSlice() throws IOException { + SlicedInputStream input = new SlicedInputStream(1) { + @Override + protected InputStream openSlice(int slice) throws IOException { + assertThat(slice, equalTo(0)); + return new IncreasingBytesUnlimitedInputStream(); + } + }; + + // Buffer to use for reading a few KiB from a start byte of IncreasingBytesUnlimitedInputStream, to verify expected bytes. + final byte[] buffer = new byte[Math.toIntExact(ByteSizeValue.ofKb(randomIntBetween(1, 8)).getBytes())]; + Consumer readAndAssert = (start) -> { + try { + final int read = input.read(buffer); + assertThat("Unexpected number of bytes read", read, equalTo(buffer.length)); + for (int i = 0; i < read; i++) { + assertThat("Unexpected value for startByte=" + start + " and i=" + i, buffer[i], equalTo((byte) ((start + i) % 255))); + } + } catch (IOException e) { + throw new AssertionError(e); + } + }; + + // Skip up to a random point that is larger than 2GiB so that the marked offset is larger than an int (ES-9639). + final long mark = randomLongBetween(Integer.MAX_VALUE, Long.MAX_VALUE - buffer.length); + input.skipNBytes(mark); + + // Mark + input.mark(randomNonNegativeInt()); + + // Skip a large amount of bytes + final long skipTo = randomLongBetween(mark, Long.MAX_VALUE - buffer.length); + input.skipNBytes(skipTo - mark); + + // Read a few KiB, asserting the bytes are what they are expected + readAndAssert.accept(skipTo); + + // Reset + input.reset(); + + // Read a few KiB, asserting the bytes are what they are expected + readAndAssert.accept(mark); + } + + public void testMarkBeyondEOF() throws IOException { + final int slices = randomIntBetween(1, 20); + SlicedInputStream input = new SlicedInputStream(slices) { + @Override + protected InputStream openSlice(int slice) throws IOException { + return new ByteArrayInputStream(new byte[] { 0 }, 0, 1); + } + }; + + input.readAllBytes(); + assertThat(input.read(), equalTo(-1)); + input.mark(randomNonNegativeInt()); + assertThat(input.read(), equalTo(-1)); + input.reset(); + assertThat(input.read(), equalTo(-1)); + } + public void testMarkResetClosedStream() throws IOException { final int slices = randomIntBetween(1, 20); SlicedInputStream input = new SlicedInputStream(slices) { @@ -147,7 +262,7 @@ protected InputStream openSlice(int slice) throws IOException { } }; - input.skip(randomIntBetween(1, slices)); + input.skipNBytes(randomIntBetween(1, slices)); input.mark(randomNonNegativeInt()); input.close(); // SlicedInputStream supports reading -1 after close without throwing @@ -232,4 +347,19 @@ public void close() throws IOException { super.close(); } } + + private static final class IncreasingBytesUnlimitedInputStream extends InputStream { + long currentByte = 0; + + @Override + public int read() throws IOException { + return (int) (currentByte++ % 255); + } + + @Override + public long skip(long n) throws IOException { + currentByte += n; + return n; + } + } } diff --git a/server/src/test/java/org/elasticsearch/rest/RestCompatibleVersionHelperTests.java b/server/src/test/java/org/elasticsearch/rest/RestCompatibleVersionHelperTests.java index 104d578ef969b..040ab9fd5c2e9 100644 --- a/server/src/test/java/org/elasticsearch/rest/RestCompatibleVersionHelperTests.java +++ b/server/src/test/java/org/elasticsearch/rest/RestCompatibleVersionHelperTests.java @@ -10,7 +10,6 @@ import org.elasticsearch.ElasticsearchStatusException; import org.elasticsearch.core.RestApiVersion; -import org.elasticsearch.core.UpdateForV9; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.xcontent.ParsedMediaType; import org.hamcrest.CustomTypeSafeMatcher; @@ -165,8 +164,6 @@ public void testAcceptAndContentTypeCombinations() { assertThat(requestWith(acceptHeader(null), contentTypeHeader("application/json"), bodyPresent()), not(isCompatible())); } - @UpdateForV9 - @AwaitsFix(bugUrl = "this can be re-enabled once our rest api version is bumped to V_9") public void testObsoleteVersion() { ElasticsearchStatusException e = expectThrows( ElasticsearchStatusException.class, @@ -213,14 +210,11 @@ public void testObsoleteVersion() { assertThat( e.getMessage(), equalTo( - "Content-Type version must be either version " - + CURRENT_VERSION - + " or " - + PREVIOUS_VERSION - + ", but found " - + OBSOLETE_VERSION - + ". " - + "Content-Type=" + "A compatible version is required on both Content-Type and Accept headers if either one has requested a " + + "compatible version and the compatible versions must match. " + + "Accept=" + + acceptHeader(PREVIOUS_VERSION) + + ", Content-Type=" + contentTypeHeader(OBSOLETE_VERSION) ) ); @@ -242,8 +236,8 @@ public void testMediaTypeCombinations() { assertThat( requestWith( - acceptHeader("application/vnd.elasticsearch+json;compatible-with=7"), - contentTypeHeader("application/vnd.elasticsearch+cbor;compatible-with=7"), + acceptHeader("application/vnd.elasticsearch+json;compatible-with=8"), + contentTypeHeader("application/vnd.elasticsearch+cbor;compatible-with=8"), bodyPresent() ), isCompatible() @@ -253,8 +247,8 @@ public void testMediaTypeCombinations() { expectThrows( ElasticsearchStatusException.class, () -> requestWith( - acceptHeader("application/vnd.elasticsearch+json;compatible-with=7"), - contentTypeHeader("application/vnd.elasticsearch+cbor;compatible-with=8"), + acceptHeader("application/vnd.elasticsearch+json;compatible-with=8"), + contentTypeHeader("application/vnd.elasticsearch+cbor;compatible-with=9"), bodyPresent() ) ); @@ -273,20 +267,20 @@ public void testTextMediaTypes() { // versioned assertThat( requestWith( - acceptHeader("text/vnd.elasticsearch+tab-separated-values;compatible-with=7"), - contentTypeHeader(7), + acceptHeader("text/vnd.elasticsearch+tab-separated-values;compatible-with=8"), + contentTypeHeader(8), bodyNotPresent() ), isCompatible() ); assertThat( - requestWith(acceptHeader("text/vnd.elasticsearch+plain;compatible-with=7"), contentTypeHeader(7), bodyNotPresent()), + requestWith(acceptHeader("text/vnd.elasticsearch+plain;compatible-with=8"), contentTypeHeader(8), bodyNotPresent()), isCompatible() ); assertThat( - requestWith(acceptHeader("text/vnd.elasticsearch+csv;compatible-with=7"), contentTypeHeader(7), bodyNotPresent()), + requestWith(acceptHeader("text/vnd.elasticsearch+csv;compatible-with=8"), contentTypeHeader(8), bodyNotPresent()), isCompatible() ); } diff --git a/server/src/test/java/org/elasticsearch/rest/action/admin/indices/RestCreateIndexActionTests.java b/server/src/test/java/org/elasticsearch/rest/action/admin/indices/RestCreateIndexActionTests.java index 601905635ff5e..2682a8c778168 100644 --- a/server/src/test/java/org/elasticsearch/rest/action/admin/indices/RestCreateIndexActionTests.java +++ b/server/src/test/java/org/elasticsearch/rest/action/admin/indices/RestCreateIndexActionTests.java @@ -9,29 +9,15 @@ package org.elasticsearch.rest.action.admin.indices; -import org.elasticsearch.action.admin.indices.create.CreateIndexRequest; -import org.elasticsearch.client.internal.node.NodeClient; -import org.elasticsearch.common.bytes.BytesArray; import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.xcontent.XContentHelper; -import org.elasticsearch.core.RestApiVersion; -import org.elasticsearch.rest.RestRequest; import org.elasticsearch.test.ESTestCase; -import org.elasticsearch.test.rest.FakeRestRequest; import org.elasticsearch.xcontent.XContentBuilder; import org.elasticsearch.xcontent.XContentFactory; -import org.elasticsearch.xcontent.XContentType; import java.io.IOException; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; import java.util.Map; -import static org.elasticsearch.rest.BaseRestHandler.INCLUDE_TYPE_NAME_PARAMETER; -import static org.hamcrest.Matchers.equalTo; -import static org.mockito.Mockito.mock; - public class RestCreateIndexActionTests extends ESTestCase { public void testPrepareTypelessRequest() throws IOException { @@ -99,59 +85,4 @@ public void testMalformedMappings() throws IOException { Map source = RestCreateIndexAction.prepareMappings(contentAsMap); assertEquals(contentAsMap, source); } - - public void testIncludeTypeName() throws IOException { - RestCreateIndexAction action = new RestCreateIndexAction(); - List compatibleMediaType = Collections.singletonList(randomCompatibleMediaType(RestApiVersion.V_7)); - - Map params = new HashMap<>(); - params.put(INCLUDE_TYPE_NAME_PARAMETER, randomFrom("true", "false")); - RestRequest deprecatedRequest = new FakeRestRequest.Builder(xContentRegistry()).withHeaders(Map.of("Accept", compatibleMediaType)) - .withMethod(RestRequest.Method.PUT) - .withPath("/some_index") - .withParams(params) - .build(); - - action.prepareRequest(deprecatedRequest, mock(NodeClient.class)); - assertCriticalWarnings(RestCreateIndexAction.TYPES_DEPRECATION_MESSAGE); - - RestRequest validRequest = new FakeRestRequest.Builder(xContentRegistry()).withMethod(RestRequest.Method.PUT) - .withPath("/some_index") - .build(); - action.prepareRequest(validRequest, mock(NodeClient.class)); - } - - public void testTypeInMapping() throws IOException { - RestCreateIndexAction action = new RestCreateIndexAction(); - - List contentTypeHeader = Collections.singletonList(compatibleMediaType(XContentType.VND_JSON, RestApiVersion.V_7)); - - String content = """ - { - "mappings": { - "some_type": { - "properties": { - "field1": { - "type": "text" - } - } - } - } - }"""; - - Map params = new HashMap<>(); - params.put(RestCreateIndexAction.INCLUDE_TYPE_NAME_PARAMETER, "true"); - RestRequest request = new FakeRestRequest.Builder(xContentRegistry()).withMethod(RestRequest.Method.PUT) - .withHeaders(Map.of("Content-Type", contentTypeHeader, "Accept", contentTypeHeader)) - .withPath("/some_index") - .withParams(params) - .withContent(new BytesArray(content), null) - .build(); - - CreateIndexRequest createIndexRequest = RestCreateIndexAction.prepareRequestV7(request); - // some_type is replaced with _doc - assertThat(createIndexRequest.mappings(), equalTo(""" - {"_doc":{"properties":{"field1":{"type":"text"}}}}""")); - assertCriticalWarnings(RestCreateIndexAction.TYPES_DEPRECATION_MESSAGE); - } } diff --git a/server/src/test/java/org/elasticsearch/rest/action/admin/indices/RestGetIndicesActionTests.java b/server/src/test/java/org/elasticsearch/rest/action/admin/indices/RestGetIndicesActionTests.java deleted file mode 100644 index 33e7b7fa21382..0000000000000 --- a/server/src/test/java/org/elasticsearch/rest/action/admin/indices/RestGetIndicesActionTests.java +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the "Elastic License - * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side - * Public License v 1"; you may not use this file except in compliance with, at - * your election, the "Elastic License 2.0", the "GNU Affero General Public - * License v3.0 only", or the "Server Side Public License, v 1". - */ - -package org.elasticsearch.rest.action.admin.indices; - -import org.elasticsearch.client.internal.node.NodeClient; -import org.elasticsearch.core.RestApiVersion; -import org.elasticsearch.rest.RestRequest; -import org.elasticsearch.test.ESTestCase; -import org.elasticsearch.test.rest.FakeRestRequest; - -import java.io.IOException; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import static org.elasticsearch.rest.BaseRestHandler.INCLUDE_TYPE_NAME_PARAMETER; -import static org.mockito.Mockito.mock; - -public final class RestGetIndicesActionTests extends ESTestCase { - final List contentTypeHeader = Collections.singletonList(randomCompatibleMediaType(RestApiVersion.V_7)); - - /** - * Test that setting the "include_type_name" parameter raises a warning for the GET request - */ - public void testIncludeTypeNamesWarning() throws IOException { - Map params = new HashMap<>(); - params.put(INCLUDE_TYPE_NAME_PARAMETER, randomFrom("true", "false")); - RestRequest request = new FakeRestRequest.Builder(xContentRegistry()).withHeaders( - Map.of("Content-Type", contentTypeHeader, "Accept", contentTypeHeader) - ).withMethod(RestRequest.Method.GET).withPath("/some_index").withParams(params).build(); - - RestGetIndicesAction handler = new RestGetIndicesAction(); - handler.prepareRequest(request, mock(NodeClient.class)); - assertCriticalWarnings(RestGetIndicesAction.TYPES_DEPRECATION_MESSAGE); - - // the same request without the parameter should pass without warning - request = new FakeRestRequest.Builder(xContentRegistry()).withHeaders( - Map.of("Content-Type", contentTypeHeader, "Accept", contentTypeHeader) - ).withMethod(RestRequest.Method.GET).withPath("/some_index").build(); - handler.prepareRequest(request, mock(NodeClient.class)); - } - - /** - * Test that setting the "include_type_name" parameter doesn't raises a warning if the HEAD method is used (indices.exists) - */ - public void testIncludeTypeNamesWarningExists() throws IOException { - Map params = new HashMap<>(); - params.put(INCLUDE_TYPE_NAME_PARAMETER, randomFrom("true", "false")); - RestRequest request = new FakeRestRequest.Builder(xContentRegistry()).withHeaders( - Map.of("Content-Type", contentTypeHeader, "Accept", contentTypeHeader) - ).withMethod(RestRequest.Method.HEAD).withPath("/some_index").withParams(params).build(); - - RestGetIndicesAction handler = new RestGetIndicesAction(); - handler.prepareRequest(request, mock(NodeClient.class)); - } -} diff --git a/server/src/test/java/org/elasticsearch/rest/action/admin/indices/RestPutIndexTemplateActionTests.java b/server/src/test/java/org/elasticsearch/rest/action/admin/indices/RestPutIndexTemplateActionTests.java deleted file mode 100644 index 5728e902aff6b..0000000000000 --- a/server/src/test/java/org/elasticsearch/rest/action/admin/indices/RestPutIndexTemplateActionTests.java +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the "Elastic License - * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side - * Public License v 1"; you may not use this file except in compliance with, at - * your election, the "Elastic License 2.0", the "GNU Affero General Public - * License v3.0 only", or the "Server Side Public License, v 1". - */ - -package org.elasticsearch.rest.action.admin.indices; - -import org.elasticsearch.client.internal.node.NodeClient; -import org.elasticsearch.common.bytes.BytesReference; -import org.elasticsearch.core.RestApiVersion; -import org.elasticsearch.rest.RestRequest; -import org.elasticsearch.test.ESTestCase; -import org.elasticsearch.test.rest.FakeRestRequest; -import org.elasticsearch.xcontent.XContentBuilder; -import org.elasticsearch.xcontent.XContentFactory; -import org.elasticsearch.xcontent.XContentType; -import org.junit.Before; - -import java.io.IOException; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import static org.elasticsearch.rest.BaseRestHandler.INCLUDE_TYPE_NAME_PARAMETER; -import static org.mockito.Mockito.mock; - -public final class RestPutIndexTemplateActionTests extends ESTestCase { - final List contentTypeHeader = Collections.singletonList(compatibleMediaType(XContentType.VND_JSON, RestApiVersion.V_7)); - - private RestPutIndexTemplateAction action; - - @Before - public void setUpAction() { - action = new RestPutIndexTemplateAction(); - } - - public void testIncludeTypeName() throws IOException { - XContentBuilder typedContent = XContentFactory.jsonBuilder() - .startObject() - .startObject("mappings") - .startObject("my_doc") - .startObject("properties") - .startObject("field1") - .field("type", "keyword") - .endObject() - .startObject("field2") - .field("type", "text") - .endObject() - .endObject() - .endObject() - .endObject() - .startObject("aliases") - .startObject("read_alias") - .endObject() - .endObject() - .endObject(); - - Map params = new HashMap<>(); - params.put(INCLUDE_TYPE_NAME_PARAMETER, "true"); - RestRequest request = new FakeRestRequest.Builder(xContentRegistry()).withHeaders( - Map.of("Content-Type", contentTypeHeader, "Accept", contentTypeHeader) - ) - .withMethod(RestRequest.Method.PUT) - .withParams(params) - .withPath("/_template/_some_template") - .withContent(BytesReference.bytes(typedContent), null) - .build(); - action.prepareRequest(request, mock(NodeClient.class)); - assertCriticalWarnings(RestPutIndexTemplateAction.TYPES_DEPRECATION_MESSAGE); - } -} diff --git a/server/src/test/java/org/elasticsearch/rest/action/admin/indices/RestValidateQueryActionTests.java b/server/src/test/java/org/elasticsearch/rest/action/admin/indices/RestValidateQueryActionTests.java index 2d719c1ed537d..16e651a12c4d6 100644 --- a/server/src/test/java/org/elasticsearch/rest/action/admin/indices/RestValidateQueryActionTests.java +++ b/server/src/test/java/org/elasticsearch/rest/action/admin/indices/RestValidateQueryActionTests.java @@ -19,10 +19,7 @@ import org.elasticsearch.common.bytes.BytesArray; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.util.concurrent.EsExecutors; -import org.elasticsearch.common.util.concurrent.ThreadContext; -import org.elasticsearch.core.RestApiVersion; import org.elasticsearch.indices.breaker.NoneCircuitBreakerService; -import org.elasticsearch.rest.RestChannel; import org.elasticsearch.rest.RestController; import org.elasticsearch.rest.RestRequest; import org.elasticsearch.search.AbstractSearchTestCase; @@ -41,7 +38,6 @@ import java.util.Collections; import java.util.HashMap; -import java.util.List; import java.util.Map; import static java.util.Collections.emptyMap; @@ -159,37 +155,4 @@ private RestRequest createRestRequest(String content) { .withContent(new BytesArray(content), XContentType.JSON) .build(); } - - public void testTypeInPath() { - List compatibleMediaType = Collections.singletonList(randomCompatibleMediaType(RestApiVersion.V_7)); - - RestRequest request = new FakeRestRequest.Builder(xContentRegistry()).withHeaders(Map.of("Accept", compatibleMediaType)) - .withMethod(RestRequest.Method.GET) - .withPath("/some_index/some_type/_validate/query") - .build(); - - performRequest(request); - assertCriticalWarnings(RestValidateQueryAction.TYPES_DEPRECATION_MESSAGE); - } - - public void testTypeParameter() { - List compatibleMediaType = Collections.singletonList(randomCompatibleMediaType(RestApiVersion.V_7)); - - Map params = new HashMap<>(); - params.put("type", "some_type"); - RestRequest request = new FakeRestRequest.Builder(xContentRegistry()).withHeaders(Map.of("Accept", compatibleMediaType)) - .withMethod(RestRequest.Method.GET) - .withPath("_validate/query") - .withParams(params) - .build(); - - performRequest(request); - assertCriticalWarnings(RestValidateQueryAction.TYPES_DEPRECATION_MESSAGE); - } - - private void performRequest(RestRequest request) { - RestChannel channel = new FakeRestChannel(request, false, 1); - ThreadContext threadContext = new ThreadContext(Settings.EMPTY); - controller.dispatchRequest(request, channel, threadContext); - } } diff --git a/server/src/test/java/org/elasticsearch/rest/action/document/RestDeleteActionTests.java b/server/src/test/java/org/elasticsearch/rest/action/document/RestDeleteActionTests.java deleted file mode 100644 index d9141002eb32c..0000000000000 --- a/server/src/test/java/org/elasticsearch/rest/action/document/RestDeleteActionTests.java +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the "Elastic License - * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side - * Public License v 1"; you may not use this file except in compliance with, at - * your election, the "Elastic License 2.0", the "GNU Affero General Public - * License v3.0 only", or the "Server Side Public License, v 1". - */ - -package org.elasticsearch.rest.action.document; - -import org.elasticsearch.action.delete.DeleteResponse; -import org.elasticsearch.core.RestApiVersion; -import org.elasticsearch.rest.RestRequest; -import org.elasticsearch.test.rest.FakeRestRequest; -import org.elasticsearch.test.rest.RestActionTestCase; -import org.junit.Before; -import org.mockito.Mockito; - -import java.util.Collections; -import java.util.List; -import java.util.Map; - -public final class RestDeleteActionTests extends RestActionTestCase { - - final List contentTypeHeader = Collections.singletonList(randomCompatibleMediaType(RestApiVersion.V_7)); - - @Before - public void setUpAction() { - controller().registerHandler(new RestDeleteAction()); - verifyingClient.setExecuteVerifier((actionType, request) -> Mockito.mock(DeleteResponse.class)); - verifyingClient.setExecuteLocallyVerifier((actionType, request) -> Mockito.mock(DeleteResponse.class)); - } - - public void testTypeInPath() { - RestRequest request = new FakeRestRequest.Builder(xContentRegistry()).withHeaders(Map.of("Accept", contentTypeHeader)) - .withMethod(RestRequest.Method.DELETE) - .withPath("/some_index/some_type/some_id") - .build(); - dispatchRequest(request); - assertCriticalWarnings(RestDeleteAction.TYPES_DEPRECATION_MESSAGE); - - RestRequest validRequest = new FakeRestRequest.Builder(xContentRegistry()).withHeaders(Map.of("Accept", contentTypeHeader)) - .withMethod(RestRequest.Method.DELETE) - .withPath("/some_index/_doc/some_id") - .build(); - dispatchRequest(validRequest); - } -} diff --git a/server/src/test/java/org/elasticsearch/rest/action/document/RestGetActionTests.java b/server/src/test/java/org/elasticsearch/rest/action/document/RestGetActionTests.java deleted file mode 100644 index 0a9abacd82635..0000000000000 --- a/server/src/test/java/org/elasticsearch/rest/action/document/RestGetActionTests.java +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the "Elastic License - * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side - * Public License v 1"; you may not use this file except in compliance with, at - * your election, the "Elastic License 2.0", the "GNU Affero General Public - * License v3.0 only", or the "Server Side Public License, v 1". - */ - -package org.elasticsearch.rest.action.document; - -import org.elasticsearch.action.get.GetRequest; -import org.elasticsearch.action.get.GetResponse; -import org.elasticsearch.core.RestApiVersion; -import org.elasticsearch.rest.RestRequest; -import org.elasticsearch.test.rest.FakeRestRequest; -import org.elasticsearch.test.rest.RestActionTestCase; -import org.junit.Before; -import org.mockito.Mockito; - -import java.util.Collections; -import java.util.List; -import java.util.Map; - -import static org.hamcrest.Matchers.instanceOf; - -public final class RestGetActionTests extends RestActionTestCase { - final List contentTypeHeader = Collections.singletonList(randomCompatibleMediaType(RestApiVersion.V_7)); - - @Before - public void setUpAction() { - controller().registerHandler(new RestGetAction()); - verifyingClient.setExecuteVerifier((actionType, request) -> { - assertThat(request, instanceOf(GetRequest.class)); - return Mockito.mock(GetResponse.class); - }); - } - - public void testTypeInPath() { - testTypeInPath(RestRequest.Method.GET); - testTypeInPath(RestRequest.Method.HEAD); - } - - private void testTypeInPath(RestRequest.Method method) { - FakeRestRequest.Builder deprecatedRequest = new FakeRestRequest.Builder(xContentRegistry()).withHeaders( - Map.of("Content-Type", contentTypeHeader, "Accept", contentTypeHeader) - ).withPath("/some_index/some_type/some_id"); - dispatchRequest(deprecatedRequest.withMethod(method).build()); - assertCriticalWarnings(RestGetAction.TYPES_DEPRECATION_MESSAGE); - } -} diff --git a/server/src/test/java/org/elasticsearch/rest/action/document/RestGetSourceActionTests.java b/server/src/test/java/org/elasticsearch/rest/action/document/RestGetSourceActionTests.java index 17840990d5b10..7cec10299280e 100644 --- a/server/src/test/java/org/elasticsearch/rest/action/document/RestGetSourceActionTests.java +++ b/server/src/test/java/org/elasticsearch/rest/action/document/RestGetSourceActionTests.java @@ -14,7 +14,6 @@ import org.elasticsearch.action.get.GetResponse; import org.elasticsearch.common.bytes.BytesArray; import org.elasticsearch.common.bytes.BytesReference; -import org.elasticsearch.core.RestApiVersion; import org.elasticsearch.index.get.GetResult; import org.elasticsearch.rest.RestRequest; import org.elasticsearch.rest.RestResponse; @@ -26,12 +25,6 @@ import org.junit.Before; import org.mockito.Mockito; -import java.util.Arrays; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - import static java.util.Collections.emptyMap; import static org.elasticsearch.index.seqno.SequenceNumbers.UNASSIGNED_SEQ_NO; import static org.elasticsearch.rest.RestStatus.OK; @@ -43,7 +36,6 @@ public final class RestGetSourceActionTests extends RestActionTestCase { private static RestRequest request = new FakeRestRequest(); private static FakeRestChannel channel = new FakeRestChannel(request, true, 0); private static RestGetSourceResponseListener listener = new RestGetSourceResponseListener(channel, request); - private final List compatibleMediaType = Collections.singletonList(randomCompatibleMediaType(RestApiVersion.V_7)); @Before public void setUpAction() { @@ -89,36 +81,4 @@ public void testRestGetSourceActionWithMissingDocumentSource() { assertThat(exception.getMessage(), equalTo("Source not found [index1]/[1]")); } - - /** - * test deprecation is logged if type is used in path - */ - public void testTypeInPath() { - for (RestRequest.Method method : Arrays.asList(RestRequest.Method.GET, RestRequest.Method.HEAD)) { - RestRequest request = new FakeRestRequest.Builder(xContentRegistry()).withHeaders(Map.of("Accept", compatibleMediaType)) - .withMethod(method) - .withPath("/some_index/some_type/id/_source") - .build(); - dispatchRequest(request); - assertCriticalWarnings(RestGetSourceAction.TYPES_DEPRECATION_MESSAGE); - } - } - - /** - * test deprecation is logged if type is used as parameter - */ - public void testTypeParameter() { - Map params = new HashMap<>(); - params.put("type", "some_type"); - for (RestRequest.Method method : Arrays.asList(RestRequest.Method.GET, RestRequest.Method.HEAD)) { - RestRequest request = new FakeRestRequest.Builder(xContentRegistry()).withHeaders(Map.of("Accept", compatibleMediaType)) - .withMethod(method) - .withPath("/some_index/_source/id") - .withParams(params) - .build(); - dispatchRequest(request); - assertCriticalWarnings(RestGetSourceAction.TYPES_DEPRECATION_MESSAGE); - } - } - } diff --git a/server/src/test/java/org/elasticsearch/rest/action/document/RestIndexActionTests.java b/server/src/test/java/org/elasticsearch/rest/action/document/RestIndexActionTests.java index b7f0fa3c1c707..1aa53382666ef 100644 --- a/server/src/test/java/org/elasticsearch/rest/action/document/RestIndexActionTests.java +++ b/server/src/test/java/org/elasticsearch/rest/action/document/RestIndexActionTests.java @@ -19,7 +19,6 @@ import org.elasticsearch.cluster.node.DiscoveryNodeUtils; import org.elasticsearch.cluster.node.DiscoveryNodes; import org.elasticsearch.common.bytes.BytesArray; -import org.elasticsearch.core.RestApiVersion; import org.elasticsearch.index.shard.ShardId; import org.elasticsearch.rest.RestRequest; import org.elasticsearch.rest.action.document.RestIndexAction.AutoIdHandler; @@ -29,18 +28,12 @@ import org.elasticsearch.xcontent.XContentType; import org.junit.Before; -import java.util.Collections; -import java.util.List; -import java.util.Map; import java.util.concurrent.atomic.AtomicReference; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.instanceOf; public final class RestIndexActionTests extends RestActionTestCase { - - final List contentTypeHeader = Collections.singletonList(randomCompatibleMediaType(RestApiVersion.V_7)); - private final AtomicReference clusterStateSupplier = new AtomicReference<>(); @Before @@ -85,34 +78,4 @@ private void checkAutoIdOpType(Version minClusterVersion, DocWriteRequest.OpType dispatchRequest(autoIdRequest); assertThat(executeCalled.get(), equalTo(true)); } - - public void testTypeInPath() { - // using CompatibleRestIndexAction - RestRequest deprecatedRequest = new FakeRestRequest.Builder(xContentRegistry()).withMethod(RestRequest.Method.PUT) - .withHeaders(Map.of("Content-Type", contentTypeHeader, "Accept", contentTypeHeader)) - .withPath("/some_index/some_type/some_id") - .build(); - dispatchRequest(deprecatedRequest); - assertCriticalWarnings(RestIndexAction.TYPES_DEPRECATION_MESSAGE); - } - - public void testCreateWithTypeInPath() { - // using CompatibleCreateHandler - RestRequest deprecatedRequest = new FakeRestRequest.Builder(xContentRegistry()).withMethod(RestRequest.Method.PUT) - .withHeaders(Map.of("Content-Type", contentTypeHeader, "Accept", contentTypeHeader)) - .withPath("/some_index/some_type/some_id/_create") - .build(); - dispatchRequest(deprecatedRequest); - assertCriticalWarnings(RestIndexAction.TYPES_DEPRECATION_MESSAGE); - } - - public void testAutoIdWithType() { - // using CompatibleAutoIdHandler - RestRequest deprecatedRequest = new FakeRestRequest.Builder(xContentRegistry()).withMethod(RestRequest.Method.POST) - .withHeaders(Map.of("Content-Type", contentTypeHeader, "Accept", contentTypeHeader)) - .withPath("/some_index/some_type/") - .build(); - dispatchRequest(deprecatedRequest); - assertCriticalWarnings(RestIndexAction.TYPES_DEPRECATION_MESSAGE); - } } diff --git a/server/src/test/java/org/elasticsearch/rest/action/document/RestMultiGetActionTests.java b/server/src/test/java/org/elasticsearch/rest/action/document/RestMultiGetActionTests.java deleted file mode 100644 index ed793f3127a93..0000000000000 --- a/server/src/test/java/org/elasticsearch/rest/action/document/RestMultiGetActionTests.java +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the "Elastic License - * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side - * Public License v 1"; you may not use this file except in compliance with, at - * your election, the "Elastic License 2.0", the "GNU Affero General Public - * License v3.0 only", or the "Server Side Public License, v 1". - */ - -package org.elasticsearch.rest.action.document; - -import org.elasticsearch.action.get.MultiGetRequest; -import org.elasticsearch.action.get.MultiGetResponse; -import org.elasticsearch.common.bytes.BytesReference; -import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.core.RestApiVersion; -import org.elasticsearch.rest.RestRequest; -import org.elasticsearch.test.rest.FakeRestRequest; -import org.elasticsearch.test.rest.RestActionTestCase; -import org.elasticsearch.xcontent.XContentBuilder; -import org.elasticsearch.xcontent.XContentFactory; -import org.elasticsearch.xcontent.XContentType; -import org.junit.Before; -import org.mockito.Mockito; - -import java.util.Collections; -import java.util.List; -import java.util.Map; - -import static org.hamcrest.Matchers.instanceOf; - -public final class RestMultiGetActionTests extends RestActionTestCase { - XContentType VND_TYPE = randomVendorType(); - List contentTypeHeader = Collections.singletonList(compatibleMediaType(VND_TYPE, RestApiVersion.V_7)); - - @Before - public void setUpAction() { - controller().registerHandler(new RestMultiGetAction(Settings.EMPTY)); - verifyingClient.setExecuteVerifier((actionType, request) -> { - assertThat(request, instanceOf(MultiGetRequest.class)); - return Mockito.mock(MultiGetResponse.class); - }); - } - - public void testTypeInPath() { - RestRequest deprecatedRequest = new FakeRestRequest.Builder(xContentRegistry()).withHeaders( - Map.of("Content-Type", contentTypeHeader, "Accept", contentTypeHeader) - ).withMethod(RestRequest.Method.GET).withPath("some_index/some_type/_mget").build(); - dispatchRequest(deprecatedRequest); - assertCriticalWarnings(RestMultiGetAction.TYPES_DEPRECATION_MESSAGE); - } - - public void testTypeInBody() throws Exception { - XContentBuilder content = XContentFactory.contentBuilder(VND_TYPE) - .startObject() - .startArray("docs") - .startObject() - .field("_index", "some_index") - .field("_type", "_doc") - .field("_id", "2") - .endObject() - .startObject() - .field("_index", "test") - .field("_id", "2") - .endObject() - .endArray() - .endObject(); - - RestRequest request = new FakeRestRequest.Builder(xContentRegistry()).withPath("_mget") - .withHeaders(Map.of("Content-Type", contentTypeHeader, "Accept", contentTypeHeader)) - .withContent(BytesReference.bytes(content), null) - .build(); - dispatchRequest(request); - assertCriticalWarnings(RestMultiGetAction.TYPES_DEPRECATION_MESSAGE); - } - -} diff --git a/server/src/test/java/org/elasticsearch/rest/action/document/RestMultiTermVectorsActionTests.java b/server/src/test/java/org/elasticsearch/rest/action/document/RestMultiTermVectorsActionTests.java deleted file mode 100644 index 0e247d70b2ba3..0000000000000 --- a/server/src/test/java/org/elasticsearch/rest/action/document/RestMultiTermVectorsActionTests.java +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the "Elastic License - * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side - * Public License v 1"; you may not use this file except in compliance with, at - * your election, the "Elastic License 2.0", the "GNU Affero General Public - * License v3.0 only", or the "Server Side Public License, v 1". - */ - -package org.elasticsearch.rest.action.document; - -import org.elasticsearch.action.termvectors.MultiTermVectorsResponse; -import org.elasticsearch.common.bytes.BytesReference; -import org.elasticsearch.core.RestApiVersion; -import org.elasticsearch.rest.RestRequest; -import org.elasticsearch.test.rest.FakeRestRequest; -import org.elasticsearch.test.rest.RestActionTestCase; -import org.elasticsearch.xcontent.XContentBuilder; -import org.elasticsearch.xcontent.XContentFactory; -import org.elasticsearch.xcontent.XContentType; -import org.junit.Before; -import org.mockito.Mockito; - -import java.io.IOException; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -public final class RestMultiTermVectorsActionTests extends RestActionTestCase { - final List contentTypeHeader = Collections.singletonList(compatibleMediaType(XContentType.VND_JSON, RestApiVersion.V_7)); - - @Before - public void setUpAction() { - controller().registerHandler(new RestMultiTermVectorsAction()); - verifyingClient.setExecuteVerifier((actionType, request) -> Mockito.mock(MultiTermVectorsResponse.class)); - verifyingClient.setExecuteLocallyVerifier((actionType, request) -> Mockito.mock(MultiTermVectorsResponse.class)); - } - - public void testTypeInPath() { - RestRequest request = new FakeRestRequest.Builder(xContentRegistry()).withHeaders( - Map.of("Content-Type", contentTypeHeader, "Accept", contentTypeHeader) - ).withMethod(RestRequest.Method.POST).withPath("/some_index/some_type/_mtermvectors").build(); - - dispatchRequest(request); - assertCriticalWarnings(RestMultiTermVectorsAction.TYPES_DEPRECATION_MESSAGE); - } - - public void testTypeParameter() { - Map params = new HashMap<>(); - params.put("type", "some_type"); - - RestRequest request = new FakeRestRequest.Builder(xContentRegistry()).withHeaders( - Map.of("Content-Type", contentTypeHeader, "Accept", contentTypeHeader) - ).withPath("/some_index/_mtermvectors").withParams(params).build(); - - dispatchRequest(request); - assertCriticalWarnings(RestMultiTermVectorsAction.TYPES_DEPRECATION_MESSAGE); - } - - public void testTypeInBody() throws IOException { - XContentBuilder content = XContentFactory.jsonBuilder() - .startObject() - .startArray("docs") - .startObject() - .field("_type", "some_type") - .field("_id", 1) - .endObject() - .endArray() - .endObject(); - - RestRequest request = new FakeRestRequest.Builder(xContentRegistry()).withHeaders( - Map.of("Content-Type", contentTypeHeader, "Accept", contentTypeHeader) - ) - .withMethod(RestRequest.Method.POST) - .withPath("/some_index/_mtermvectors") - .withContent(BytesReference.bytes(content), null) - .build(); - - dispatchRequest(request); - assertCriticalWarnings(RestTermVectorsAction.TYPES_DEPRECATION_MESSAGE); - } -} diff --git a/server/src/test/java/org/elasticsearch/rest/action/document/RestTermVectorsActionTests.java b/server/src/test/java/org/elasticsearch/rest/action/document/RestTermVectorsActionTests.java deleted file mode 100644 index a69c167c12729..0000000000000 --- a/server/src/test/java/org/elasticsearch/rest/action/document/RestTermVectorsActionTests.java +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the "Elastic License - * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side - * Public License v 1"; you may not use this file except in compliance with, at - * your election, the "Elastic License 2.0", the "GNU Affero General Public - * License v3.0 only", or the "Server Side Public License, v 1". - */ - -package org.elasticsearch.rest.action.document; - -import org.elasticsearch.action.termvectors.TermVectorsResponse; -import org.elasticsearch.common.bytes.BytesReference; -import org.elasticsearch.core.RestApiVersion; -import org.elasticsearch.rest.RestRequest; -import org.elasticsearch.test.rest.FakeRestRequest; -import org.elasticsearch.test.rest.RestActionTestCase; -import org.elasticsearch.xcontent.XContentBuilder; -import org.elasticsearch.xcontent.XContentFactory; -import org.elasticsearch.xcontent.XContentType; -import org.junit.Before; -import org.mockito.Mockito; - -import java.io.IOException; -import java.util.Collections; -import java.util.List; -import java.util.Map; - -public final class RestTermVectorsActionTests extends RestActionTestCase { - final List contentTypeHeader = Collections.singletonList(compatibleMediaType(XContentType.VND_JSON, RestApiVersion.V_7)); - - @Before - public void setUpAction() { - controller().registerHandler(new RestTermVectorsAction()); - // todo how to workaround this? we get AssertionError without this - verifyingClient.setExecuteVerifier((actionType, request) -> Mockito.mock(TermVectorsResponse.class)); - verifyingClient.setExecuteLocallyVerifier((actionType, request) -> Mockito.mock(TermVectorsResponse.class)); - } - - public void testTypeInPath() { - RestRequest request = new FakeRestRequest.Builder(xContentRegistry()).withHeaders( - Map.of("Content-Type", contentTypeHeader, "Accept", contentTypeHeader) - ).withMethod(RestRequest.Method.POST).withPath("/some_index/some_type/some_id/_termvectors").build(); - - dispatchRequest(request); - assertCriticalWarnings(RestTermVectorsAction.TYPES_DEPRECATION_MESSAGE); - } - - public void testTypeInBody() throws IOException { - XContentBuilder content = XContentFactory.jsonBuilder().startObject().field("_type", "some_type").field("_id", 1).endObject(); - - RestRequest request = new FakeRestRequest.Builder(xContentRegistry()).withHeaders( - Map.of("Content-Type", contentTypeHeader, "Accept", contentTypeHeader) - ) - .withMethod(RestRequest.Method.GET) - .withPath("/some_index/_termvectors/some_id") - .withContent(BytesReference.bytes(content), null) - .build(); - - dispatchRequest(request); - assertCriticalWarnings(RestTermVectorsAction.TYPES_DEPRECATION_MESSAGE); - } -} diff --git a/server/src/test/java/org/elasticsearch/rest/action/document/RestUpdateActionTests.java b/server/src/test/java/org/elasticsearch/rest/action/document/RestUpdateActionTests.java index def6e8eb0375d..c68867649e25a 100644 --- a/server/src/test/java/org/elasticsearch/rest/action/document/RestUpdateActionTests.java +++ b/server/src/test/java/org/elasticsearch/rest/action/document/RestUpdateActionTests.java @@ -13,7 +13,6 @@ import org.elasticsearch.action.update.UpdateResponse; import org.elasticsearch.client.internal.node.NodeClient; import org.elasticsearch.common.bytes.BytesArray; -import org.elasticsearch.core.RestApiVersion; import org.elasticsearch.index.VersionType; import org.elasticsearch.rest.RestRequest; import org.elasticsearch.test.rest.FakeRestRequest; @@ -22,17 +21,13 @@ import org.junit.Before; import org.mockito.Mockito; -import java.util.Collections; import java.util.HashMap; -import java.util.List; import java.util.Map; import static org.hamcrest.CoreMatchers.containsString; import static org.mockito.Mockito.mock; public final class RestUpdateActionTests extends RestActionTestCase { - final List contentTypeHeader = Collections.singletonList(randomCompatibleMediaType(RestApiVersion.V_7)); - private RestUpdateAction action; @Before @@ -76,17 +71,4 @@ public void testUpdateDocVersion() { ) ); } - - public void testTypeInPath() { - RestRequest request = new FakeRestRequest.Builder(xContentRegistry()).withHeaders( - Map.of("Content-Type", contentTypeHeader, "Accept", contentTypeHeader) - ).withMethod(RestRequest.Method.POST).withPath("/some_index/some_type/some_id/_update").build(); - dispatchRequest(request); - assertCriticalWarnings(RestUpdateAction.TYPES_DEPRECATION_MESSAGE); - - RestRequest validRequest = new FakeRestRequest.Builder(xContentRegistry()).withHeaders( - Map.of("Content-Type", contentTypeHeader, "Accept", contentTypeHeader) - ).withMethod(RestRequest.Method.DELETE).withPath("/some_index/_update/some_id").build(); - dispatchRequest(validRequest); - } } diff --git a/server/src/test/java/org/elasticsearch/rest/action/search/RestCountActionTests.java b/server/src/test/java/org/elasticsearch/rest/action/search/RestCountActionTests.java deleted file mode 100644 index e72511989f083..0000000000000 --- a/server/src/test/java/org/elasticsearch/rest/action/search/RestCountActionTests.java +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the "Elastic License - * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side - * Public License v 1"; you may not use this file except in compliance with, at - * your election, the "Elastic License 2.0", the "GNU Affero General Public - * License v3.0 only", or the "Server Side Public License, v 1". - */ - -package org.elasticsearch.rest.action.search; - -import org.elasticsearch.action.search.SearchRequest; -import org.elasticsearch.action.search.SearchResponse; -import org.elasticsearch.core.RestApiVersion; -import org.elasticsearch.rest.RestRequest; -import org.elasticsearch.rest.RestRequest.Method; -import org.elasticsearch.test.rest.FakeRestRequest; -import org.elasticsearch.test.rest.RestActionTestCase; -import org.junit.Before; -import org.mockito.Mockito; - -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import static org.hamcrest.Matchers.instanceOf; - -public final class RestCountActionTests extends RestActionTestCase { - - final List contentTypeHeader = Collections.singletonList(randomCompatibleMediaType(RestApiVersion.V_7)); - - @Before - public void setUpAction() { - controller().registerHandler(new RestCountAction()); - verifyingClient.setExecuteVerifier((actionType, request) -> { - assertThat(request, instanceOf(SearchRequest.class)); - return Mockito.mock(SearchResponse.class); - }); - } - - public void testTypeInPath() { - RestRequest request = new FakeRestRequest.Builder(xContentRegistry()).withHeaders(Map.of("Accept", contentTypeHeader)) - .withMethod(Method.POST) - .withPath("/some_index/some_type/_count") - .build(); - - dispatchRequest(request); - assertCriticalWarnings(RestCountAction.TYPES_DEPRECATION_MESSAGE); - } - - public void testTypeParameter() { - Map params = new HashMap<>(); - params.put("type", "some_type"); - - RestRequest request = new FakeRestRequest.Builder(xContentRegistry()).withHeaders(Map.of("Accept", contentTypeHeader)) - .withMethod(Method.GET) - .withPath("/some_index/_count") - .withParams(params) - .build(); - - dispatchRequest(request); - assertCriticalWarnings(RestCountAction.TYPES_DEPRECATION_MESSAGE); - } -} diff --git a/server/src/test/java/org/elasticsearch/rest/action/search/RestExplainActionTests.java b/server/src/test/java/org/elasticsearch/rest/action/search/RestExplainActionTests.java deleted file mode 100644 index ddbe7243d304b..0000000000000 --- a/server/src/test/java/org/elasticsearch/rest/action/search/RestExplainActionTests.java +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the "Elastic License - * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side - * Public License v 1"; you may not use this file except in compliance with, at - * your election, the "Elastic License 2.0", the "GNU Affero General Public - * License v3.0 only", or the "Server Side Public License, v 1". - */ - -package org.elasticsearch.rest.action.search; - -import org.elasticsearch.action.explain.ExplainResponse; -import org.elasticsearch.core.RestApiVersion; -import org.elasticsearch.rest.RestRequest; -import org.elasticsearch.test.rest.FakeRestRequest; -import org.elasticsearch.test.rest.RestActionTestCase; -import org.elasticsearch.xcontent.XContentType; -import org.junit.Before; -import org.mockito.Mockito; - -import java.util.Collections; -import java.util.List; -import java.util.Map; - -public final class RestExplainActionTests extends RestActionTestCase { - final List contentTypeHeader = Collections.singletonList(compatibleMediaType(XContentType.VND_JSON, RestApiVersion.V_7)); - - @Before - public void setUpAction() { - RestExplainAction action = new RestExplainAction(); - controller().registerHandler(action); - verifyingClient.setExecuteVerifier((actionType, request) -> Mockito.mock(ExplainResponse.class)); - verifyingClient.setExecuteLocallyVerifier((actionType, request) -> Mockito.mock(ExplainResponse.class)); - } - - public void testTypeInPath() { - RestRequest deprecatedRequest = new FakeRestRequest.Builder(xContentRegistry()).withHeaders(Map.of("Accept", contentTypeHeader)) - .withMethod(RestRequest.Method.GET) - .withPath("/some_index/some_type/some_id/_explain") - .build(); - dispatchRequest(deprecatedRequest); - assertCriticalWarnings(RestExplainAction.TYPES_DEPRECATION_MESSAGE); - - RestRequest validRequest = new FakeRestRequest.Builder(xContentRegistry()).withHeaders(Map.of("Accept", contentTypeHeader)) - .withMethod(RestRequest.Method.GET) - .withPath("/some_index/_explain/some_id") - .build(); - dispatchRequest(validRequest); - } - -} diff --git a/server/src/test/java/org/elasticsearch/rest/action/search/RestMultiSearchActionTests.java b/server/src/test/java/org/elasticsearch/rest/action/search/RestMultiSearchActionTests.java deleted file mode 100644 index b77817e4c0258..0000000000000 --- a/server/src/test/java/org/elasticsearch/rest/action/search/RestMultiSearchActionTests.java +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the "Elastic License - * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side - * Public License v 1"; you may not use this file except in compliance with, at - * your election, the "Elastic License 2.0", the "GNU Affero General Public - * License v3.0 only", or the "Server Side Public License, v 1". - */ - -package org.elasticsearch.rest.action.search; - -import org.elasticsearch.action.search.MultiSearchResponse; -import org.elasticsearch.common.bytes.BytesArray; -import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.core.RestApiVersion; -import org.elasticsearch.rest.RestRequest; -import org.elasticsearch.test.rest.FakeRestRequest; -import org.elasticsearch.test.rest.RestActionTestCase; -import org.elasticsearch.usage.UsageService; -import org.elasticsearch.xcontent.XContentType; -import org.junit.Before; - -import java.nio.charset.StandardCharsets; -import java.util.Collections; -import java.util.List; -import java.util.Map; - -import static org.mockito.Mockito.mock; - -public final class RestMultiSearchActionTests extends RestActionTestCase { - final List contentTypeHeader = Collections.singletonList(compatibleMediaType(XContentType.VND_JSON, RestApiVersion.V_7)); - - @Before - public void setUpAction() { - RestMultiSearchAction action = new RestMultiSearchAction(Settings.EMPTY, new UsageService().getSearchUsageHolder(), nf -> false); - controller().registerHandler(action); - verifyingClient.setExecuteVerifier((actionType, request) -> mock(MultiSearchResponse.class)); - verifyingClient.setExecuteLocallyVerifier((actionType, request) -> mock(MultiSearchResponse.class)); - } - - public void testTypeInPath() { - String content = "{ \"index\": \"some_index\" } \n {} \n"; - BytesArray bytesContent = new BytesArray(content.getBytes(StandardCharsets.UTF_8)); - - RestRequest request = new FakeRestRequest.Builder(xContentRegistry()).withHeaders( - Map.of("Content-Type", contentTypeHeader, "Accept", contentTypeHeader) - ).withMethod(RestRequest.Method.GET).withPath("/some_index/some_type/_msearch").withContent(bytesContent, null).build(); - - dispatchRequest(request); - assertCriticalWarnings(RestMultiSearchAction.TYPES_DEPRECATION_MESSAGE); - } - - public void testTypeInBody() { - String content = "{ \"index\": \"some_index\", \"type\": \"some_type\" } \n {} \n"; - BytesArray bytesContent = new BytesArray(content.getBytes(StandardCharsets.UTF_8)); - - RestRequest request = new FakeRestRequest.Builder(xContentRegistry()).withHeaders( - Map.of("Content-Type", contentTypeHeader, "Accept", contentTypeHeader) - ).withMethod(RestRequest.Method.POST).withPath("/some_index/_msearch").withContent(bytesContent, null).build(); - - dispatchRequest(request); - assertCriticalWarnings(RestMultiSearchAction.TYPES_DEPRECATION_MESSAGE); - } - -} diff --git a/server/src/test/java/org/elasticsearch/rest/action/search/RestSearchActionTests.java b/server/src/test/java/org/elasticsearch/rest/action/search/RestSearchActionTests.java index e207d150ac6cd..24f59a8c3abe7 100644 --- a/server/src/test/java/org/elasticsearch/rest/action/search/RestSearchActionTests.java +++ b/server/src/test/java/org/elasticsearch/rest/action/search/RestSearchActionTests.java @@ -11,7 +11,6 @@ import org.elasticsearch.action.search.SearchRequest; import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.action.search.SearchType; -import org.elasticsearch.core.RestApiVersion; import org.elasticsearch.rest.RestRequest; import org.elasticsearch.search.builder.SearchSourceBuilder; import org.elasticsearch.search.suggest.SuggestBuilder; @@ -23,7 +22,6 @@ import org.elasticsearch.usage.UsageService; import org.junit.Before; -import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -31,8 +29,6 @@ import static org.mockito.Mockito.mock; public final class RestSearchActionTests extends RestActionTestCase { - final List contentTypeHeader = Collections.singletonList(randomCompatibleMediaType(RestApiVersion.V_7)); - private RestSearchAction action; @Before @@ -43,27 +39,6 @@ public void setUpAction() { verifyingClient.setExecuteLocallyVerifier((actionType, request) -> mock(SearchResponse.class)); } - public void testTypeInPath() { - RestRequest request = new FakeRestRequest.Builder(xContentRegistry()).withHeaders( - Map.of("Content-Type", contentTypeHeader, "Accept", contentTypeHeader) - ).withMethod(RestRequest.Method.GET).withPath("/some_index/some_type/_search").build(); - - dispatchRequest(request); - assertCriticalWarnings(RestSearchAction.TYPES_DEPRECATION_MESSAGE); - } - - public void testTypeParameter() { - Map params = new HashMap<>(); - params.put("type", "some_type"); - - RestRequest request = new FakeRestRequest.Builder(xContentRegistry()).withHeaders( - Map.of("Content-Type", contentTypeHeader, "Accept", contentTypeHeader) - ).withMethod(RestRequest.Method.GET).withPath("/some_index/_search").withParams(params).build(); - - dispatchRequest(request); - assertCriticalWarnings(RestSearchAction.TYPES_DEPRECATION_MESSAGE); - } - /** * The "enable_fields_emulation" flag on search requests is a no-op but should not raise an error */ @@ -71,9 +46,10 @@ public void testEnableFieldsEmulationNoErrors() throws Exception { Map params = new HashMap<>(); params.put("enable_fields_emulation", "true"); - RestRequest request = new FakeRestRequest.Builder(xContentRegistry()).withHeaders( - Map.of("Content-Type", contentTypeHeader, "Accept", contentTypeHeader) - ).withMethod(RestRequest.Method.GET).withPath("/some_index/_search").withParams(params).build(); + RestRequest request = new FakeRestRequest.Builder(xContentRegistry()).withMethod(RestRequest.Method.GET) + .withPath("/some_index/_search") + .withParams(params) + .build(); action.handleRequest(request, new FakeRestChannel(request, false, 1), verifyingClient); } @@ -83,9 +59,10 @@ public void testValidateSearchRequest() { Map params = new HashMap<>(); params.put("rest_total_hits_as_int", "true"); - RestRequest request = new FakeRestRequest.Builder(xContentRegistry()).withHeaders( - Map.of("Content-Type", contentTypeHeader, "Accept", contentTypeHeader) - ).withMethod(RestRequest.Method.GET).withPath("/some_index/_search").withParams(params).build(); + RestRequest request = new FakeRestRequest.Builder(xContentRegistry()).withMethod(RestRequest.Method.GET) + .withPath("/some_index/_search") + .withParams(params) + .build(); SearchRequest searchRequest = new SearchRequest(); searchRequest.source(new SearchSourceBuilder().trackTotalHitsUpTo(100)); @@ -100,9 +77,10 @@ public void testValidateSearchRequest() { Map params = new HashMap<>(); params.put("search_type", randomFrom(SearchType.values()).name()); - RestRequest request = new FakeRestRequest.Builder(xContentRegistry()).withHeaders( - Map.of("Content-Type", contentTypeHeader, "Accept", contentTypeHeader) - ).withMethod(RestRequest.Method.GET).withPath("/some_index/_search").withParams(params).build(); + RestRequest request = new FakeRestRequest.Builder(xContentRegistry()).withMethod(RestRequest.Method.GET) + .withPath("/some_index/_search") + .withParams(params) + .build(); SearchRequest searchRequest = new SearchRequest(); KnnSearchBuilder knnSearch = new KnnSearchBuilder("vector", new float[] { 1, 1, 1 }, 10, 100, null); @@ -126,9 +104,10 @@ public void testIllegalSearchType() { Map params = new HashMap<>(); params.put("search_type", "some_search_type"); - RestRequest request = new FakeRestRequest.Builder(xContentRegistry()).withHeaders( - Map.of("Content-Type", contentTypeHeader, "Accept", contentTypeHeader) - ).withMethod(RestRequest.Method.GET).withPath("/some_index/_search").withParams(params).build(); + RestRequest request = new FakeRestRequest.Builder(xContentRegistry()).withMethod(RestRequest.Method.GET) + .withPath("/some_index/_search") + .withParams(params) + .build(); Exception ex = expectThrows(IllegalArgumentException.class, () -> action.prepareRequest(request, verifyingClient)); assertEquals("No search type for [some_search_type]", ex.getMessage()); diff --git a/server/src/test/java/org/elasticsearch/search/SearchModuleTests.java b/server/src/test/java/org/elasticsearch/search/SearchModuleTests.java index 0a3c2c939b456..9109cd6b89bed 100644 --- a/server/src/test/java/org/elasticsearch/search/SearchModuleTests.java +++ b/server/src/test/java/org/elasticsearch/search/SearchModuleTests.java @@ -18,12 +18,10 @@ import org.elasticsearch.common.xcontent.LoggingDeprecationHandler; import org.elasticsearch.core.RestApiVersion; import org.elasticsearch.index.query.AbstractQueryBuilder; -import org.elasticsearch.index.query.CommonTermsQueryBuilder; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.QueryRewriteContext; import org.elasticsearch.index.query.SearchExecutionContext; import org.elasticsearch.index.query.TermQueryBuilder; -import org.elasticsearch.index.query.TypeQueryV7Builder; import org.elasticsearch.index.query.functionscore.GaussDecayFunctionBuilder; import org.elasticsearch.plugins.SearchPlugin; import org.elasticsearch.search.aggregations.AggregationBuilder; @@ -33,7 +31,6 @@ import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregationBuilder; import org.elasticsearch.search.aggregations.bucket.terms.heuristic.ChiSquare; import org.elasticsearch.search.aggregations.pipeline.AbstractPipelineAggregationBuilder; -import org.elasticsearch.search.aggregations.pipeline.MovAvgPipelineAggregationBuilder; import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator; import org.elasticsearch.search.aggregations.support.AggregationContext; import org.elasticsearch.search.aggregations.support.CoreValuesSourceType; @@ -60,7 +57,6 @@ import org.elasticsearch.test.ESTestCase; import org.elasticsearch.xcontent.ConstructingObjectParser; import org.elasticsearch.xcontent.NamedXContentRegistry; -import org.elasticsearch.xcontent.ParseField; import org.elasticsearch.xcontent.XContentBuilder; import org.elasticsearch.xcontent.XContentParser; @@ -301,7 +297,6 @@ public void testRegisteredQueries() { List allSupportedQueries = new ArrayList<>(); Collections.addAll(allSupportedQueries, NON_DEPRECATED_QUERIES); Collections.addAll(allSupportedQueries, DEPRECATED_QUERIES); - Collections.addAll(allSupportedQueries, REST_COMPATIBLE_QUERIES); SearchModule module = new SearchModule(Settings.EMPTY, emptyList()); @@ -471,11 +466,6 @@ public CheckedBiConsumer getReque // add here deprecated queries to make sure we log a deprecation warnings when they are used private static final String[] DEPRECATED_QUERIES = new String[] { "field_masking_span", "geo_polygon" }; - private static final String[] REST_COMPATIBLE_QUERIES = new String[] { - TypeQueryV7Builder.NAME_V7.getPreferredName(), - CommonTermsQueryBuilder.NAME_V7.getPreferredName() }; - private static final String[] REST_COMPATIBLE_AGGREGATIONS = new String[] { - MovAvgPipelineAggregationBuilder.NAME_V7.getPreferredName() }; /** * Dummy test {@link AggregationBuilder} used to test registering aggregation builders. @@ -692,58 +682,6 @@ public String getWriteableName() { } } - static class CompatQueryBuilder extends DummyQueryBuilder { - public static final String NAME = "compat_name"; - public static final ParseField NAME_OLD = new ParseField(NAME).forRestApiVersion( - RestApiVersion.equalTo(RestApiVersion.minimumSupported()) - ); - - @Override - public String getWriteableName() { - return NAME; - } - } - - public void testRegisterRestApiCompatibleQuery() { - SearchPlugin registerCompatQuery = new SearchPlugin() { - @Override - public List> getQueries() { - return singletonList( - new QuerySpec<>( - CompatQueryBuilder.NAME_OLD, - (streamInput) -> new CompatQueryBuilder(), - CompatQueryBuilder::fromXContent - ) - ); - } - }; - - final SearchModule searchModule = new SearchModule(Settings.EMPTY, singletonList(registerCompatQuery)); - - // all entries can be used for current and previous versions except for compatible entry - assertThat(searchModule.getNamedXContents().stream().filter(e -> - // filter out compatible entry - e.name.match(CompatQueryBuilder.NAME_OLD.getPreferredName(), LoggingDeprecationHandler.INSTANCE) == false) - .filter(e -> RestApiVersion.minimumSupported().matches(e.restApiCompatibility)) - .filter(e -> RestApiVersion.current().matches(e.restApiCompatibility)) - .collect(toSet()), - // -1 because of the registered in the test - hasSize(searchModule.getNamedXContents().size() - REST_COMPATIBLE_QUERIES.length - REST_COMPATIBLE_AGGREGATIONS.length - 1) - ); - - final List compatEntry = searchModule.getNamedXContents() - .stream() - .filter( - e -> e.categoryClass.equals(QueryBuilder.class) - && RestApiVersion.minimumSupported().matches(e.name.getForRestApiVersion()) // v7 compatbile - && RestApiVersion.current().matches(e.name.getForRestApiVersion()) == false - ) // but not v8 compatible - .collect(toList()); - assertThat(compatEntry, hasSize(REST_COMPATIBLE_QUERIES.length + 1));// +1 because of registered in the test - assertTrue(RestApiVersion.minimumSupported().matches(compatEntry.get(0).restApiCompatibility)); - assertFalse(RestApiVersion.current().matches(compatEntry.get(0).restApiCompatibility)); - } - public void testDefaultMaxNestedDepth() { new SearchModule(Settings.EMPTY, emptyList()); assertEquals( diff --git a/server/src/test/java/org/elasticsearch/search/SearchSortValuesTests.java b/server/src/test/java/org/elasticsearch/search/SearchSortValuesTests.java index 6a1874183a30f..79ba65c76ee48 100644 --- a/server/src/test/java/org/elasticsearch/search/SearchSortValuesTests.java +++ b/server/src/test/java/org/elasticsearch/search/SearchSortValuesTests.java @@ -72,7 +72,7 @@ protected SearchSortValues doParseInstance(XContentParser parser) throws IOExcep parser.nextToken(); // skip to the elements start array token, fromXContent advances from there if called parser.nextToken(); parser.nextToken(); - SearchSortValues searchSortValues = SearchSortValues.fromXContent(parser); + SearchSortValues searchSortValues = SearchResponseUtils.parseSearchSortValues(parser); parser.nextToken(); assertEquals(XContentParser.Token.END_OBJECT, parser.currentToken()); assertNull(parser.nextToken()); diff --git a/server/src/test/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightFieldTests.java b/server/src/test/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightFieldTests.java index 6b2259d8fbedc..a609a13a87833 100644 --- a/server/src/test/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightFieldTests.java +++ b/server/src/test/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightFieldTests.java @@ -13,6 +13,7 @@ import org.elasticsearch.common.io.stream.BytesStreamOutput; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.text.Text; +import org.elasticsearch.search.SearchResponseUtils; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.xcontent.ToXContent; import org.elasticsearch.xcontent.XContentBuilder; @@ -56,7 +57,7 @@ public void testFromXContent() throws IOException { try (XContentParser parser = createParser(builder)) { parser.nextToken(); // skip to the opening object token, fromXContent advances from here and starts with the field name parser.nextToken(); - HighlightField parsedField = HighlightField.fromXContent(parser); + HighlightField parsedField = SearchResponseUtils.parseHighlightField(parser); assertEquals(highlightField, parsedField); if (highlightField.fragments() != null) { assertEquals(XContentParser.Token.END_ARRAY, parser.currentToken()); diff --git a/server/src/test/java/org/elasticsearch/search/profile/ProfileResultTests.java b/server/src/test/java/org/elasticsearch/search/profile/ProfileResultTests.java index 5f052e1c40dab..c4a0b3b588310 100644 --- a/server/src/test/java/org/elasticsearch/search/profile/ProfileResultTests.java +++ b/server/src/test/java/org/elasticsearch/search/profile/ProfileResultTests.java @@ -12,6 +12,7 @@ import org.elasticsearch.common.Strings; import org.elasticsearch.common.io.stream.Writeable.Reader; import org.elasticsearch.common.util.Maps; +import org.elasticsearch.search.SearchResponseUtils; import org.elasticsearch.test.AbstractXContentSerializingTestCase; import org.elasticsearch.xcontent.ToXContent; import org.elasticsearch.xcontent.XContentBuilder; @@ -72,7 +73,7 @@ protected Reader instanceReader() { @Override protected ProfileResult doParseInstance(XContentParser parser) throws IOException { - return ProfileResult.fromXContent(parser); + return SearchResponseUtils.parseProfileResult(parser); } @Override diff --git a/server/src/test/java/org/elasticsearch/search/profile/aggregation/AggregationProfileShardResultTests.java b/server/src/test/java/org/elasticsearch/search/profile/aggregation/AggregationProfileShardResultTests.java index eba3a89a953e4..d2a4cdf62a16b 100644 --- a/server/src/test/java/org/elasticsearch/search/profile/aggregation/AggregationProfileShardResultTests.java +++ b/server/src/test/java/org/elasticsearch/search/profile/aggregation/AggregationProfileShardResultTests.java @@ -13,6 +13,7 @@ import org.elasticsearch.common.io.stream.Writeable.Reader; import org.elasticsearch.common.xcontent.XContentHelper; import org.elasticsearch.common.xcontent.XContentParserUtils; +import org.elasticsearch.search.SearchResponseUtils; import org.elasticsearch.search.profile.ProfileResult; import org.elasticsearch.search.profile.ProfileResultTests; import org.elasticsearch.test.AbstractXContentSerializingTestCase; @@ -55,7 +56,7 @@ protected AggregationProfileShardResult doParseInstance(XContentParser parser) t XContentParserUtils.ensureExpectedToken(XContentParser.Token.START_OBJECT, parser.nextToken(), parser); XContentParserUtils.ensureFieldName(parser, parser.nextToken(), AggregationProfileShardResult.AGGREGATIONS); XContentParserUtils.ensureExpectedToken(XContentParser.Token.START_ARRAY, parser.nextToken(), parser); - AggregationProfileShardResult result = AggregationProfileShardResult.fromXContent(parser); + AggregationProfileShardResult result = SearchResponseUtils.readAggregationProfileShardResult(parser); XContentParserUtils.ensureExpectedToken(XContentParser.Token.END_ARRAY, parser.currentToken(), parser); XContentParserUtils.ensureExpectedToken(XContentParser.Token.END_OBJECT, parser.nextToken(), parser); return result; diff --git a/server/src/test/java/org/elasticsearch/search/profile/query/CollectorResultTests.java b/server/src/test/java/org/elasticsearch/search/profile/query/CollectorResultTests.java index 2ca4744bfd7bd..21c32388e8707 100644 --- a/server/src/test/java/org/elasticsearch/search/profile/query/CollectorResultTests.java +++ b/server/src/test/java/org/elasticsearch/search/profile/query/CollectorResultTests.java @@ -11,6 +11,7 @@ import org.elasticsearch.common.Strings; import org.elasticsearch.common.io.stream.Writeable.Reader; +import org.elasticsearch.search.SearchResponseUtils; import org.elasticsearch.test.AbstractXContentSerializingTestCase; import org.elasticsearch.xcontent.ToXContent; import org.elasticsearch.xcontent.XContentBuilder; @@ -56,7 +57,7 @@ protected CollectorResult mutateInstance(CollectorResult instance) { @Override protected CollectorResult doParseInstance(XContentParser parser) throws IOException { ensureExpectedToken(XContentParser.Token.START_OBJECT, parser.nextToken(), parser); - CollectorResult result = CollectorResult.fromXContent(parser); + CollectorResult result = SearchResponseUtils.parseCollectorResult(parser); ensureExpectedToken(null, parser.nextToken(), parser); return result; } diff --git a/server/src/test/java/org/elasticsearch/transport/RemoteClusterAwareTests.java b/server/src/test/java/org/elasticsearch/transport/RemoteClusterAwareTests.java index 169f6d8060020..2394e0b07cc57 100644 --- a/server/src/test/java/org/elasticsearch/transport/RemoteClusterAwareTests.java +++ b/server/src/test/java/org/elasticsearch/transport/RemoteClusterAwareTests.java @@ -130,7 +130,7 @@ public void testGroupClusterIndicesFail() { RemoteClusterAwareTest remoteClusterAware = new RemoteClusterAwareTest(); Set remoteClusterNames = Set.of("cluster1", "cluster2", "some-cluster3"); - mustThrowException(new String[] { ":foo" }, NoSuchRemoteClusterException.class, "no such remote cluster"); + mustThrowException(new String[] { ":foo" }, IllegalArgumentException.class, "is invalid because the remote part is empty"); mustThrowException(new String[] { "notacluster:foo" }, NoSuchRemoteClusterException.class, "no such remote cluster"); // Cluster wildcard exclusion requires :* mustThrowException( diff --git a/server/src/test/java/org/elasticsearch/transport/RemoteClusterClientTests.java b/server/src/test/java/org/elasticsearch/transport/RemoteClusterClientTests.java index 985fd6e10445d..ff0742c89bba9 100644 --- a/server/src/test/java/org/elasticsearch/transport/RemoteClusterClientTests.java +++ b/server/src/test/java/org/elasticsearch/transport/RemoteClusterClientTests.java @@ -8,7 +8,6 @@ */ package org.elasticsearch.transport; -import org.elasticsearch.ExceptionsHelper; import org.elasticsearch.TransportVersion; import org.elasticsearch.action.ActionListener; import org.elasticsearch.action.admin.cluster.state.ClusterStateAction; @@ -127,14 +126,10 @@ public void testConnectAndExecuteRequest() throws Exception { assertNotNull(clusterStateResponse); assertEquals("foo_bar_cluster", clusterStateResponse.getState().getClusterName().value()); // also test a failure, there is no handler for scroll registered - ActionNotFoundTransportException ex = asInstanceOf( + ActionNotFoundTransportException ex = safeAwaitAndUnwrapFailure( ActionNotFoundTransportException.class, - ExceptionsHelper.unwrapCause( - safeAwaitFailure( - SearchResponse.class, - listener -> client.execute(TransportSearchScrollAction.REMOTE_TYPE, new SearchScrollRequest(""), listener) - ) - ) + SearchResponse.class, + listener -> client.execute(TransportSearchScrollAction.REMOTE_TYPE, new SearchScrollRequest(""), listener) ); assertEquals("No handler for action [indices:data/read/scroll]", ex.getMessage()); } diff --git a/server/src/test/java/org/elasticsearch/transport/TransportServiceHandshakeTests.java b/server/src/test/java/org/elasticsearch/transport/TransportServiceHandshakeTests.java index 28be70533597c..c686329c4154c 100644 --- a/server/src/test/java/org/elasticsearch/transport/TransportServiceHandshakeTests.java +++ b/server/src/test/java/org/elasticsearch/transport/TransportServiceHandshakeTests.java @@ -186,9 +186,10 @@ public void testMismatchedClusterName() { ) ) { assertThat( - asInstanceOf( + safeAwaitFailure( IllegalStateException.class, - safeAwaitFailure(DiscoveryNode.class, listener -> transportServiceA.handshake(connection, timeout, listener)) + DiscoveryNode.class, + listener -> transportServiceA.handshake(connection, timeout, listener) ).getMessage(), containsString( "handshake with [" + discoveryNode + "] failed: remote cluster name [b] does not match local cluster name [a]" @@ -231,9 +232,10 @@ public void testIncompatibleNodeVersions() { ) ) { assertThat( - asInstanceOf( + safeAwaitFailure( IllegalStateException.class, - safeAwaitFailure(DiscoveryNode.class, listener -> transportServiceA.handshake(connection, timeout, listener)) + DiscoveryNode.class, + listener -> transportServiceA.handshake(connection, timeout, listener) ).getMessage(), containsString( "handshake with [" @@ -303,12 +305,10 @@ public void testNodeConnectWithDifferentNodeId() { .version(transportServiceB.getLocalNode().getVersionInformation()) .build(); assertThat( - asInstanceOf( + safeAwaitFailure( ConnectTransportException.class, - safeAwaitFailure( - Releasable.class, - listener -> transportServiceA.connectToNode(discoveryNode, TestProfiles.LIGHT_PROFILE, listener) - ) + Releasable.class, + listener -> transportServiceA.connectToNode(discoveryNode, TestProfiles.LIGHT_PROFILE, listener) ).getMessage(), allOf( containsString("Connecting to [" + discoveryNode.getAddress() + "] failed"), @@ -360,9 +360,10 @@ public void testRejectsMismatchedBuildHash() { ) { assertThat( ExceptionsHelper.unwrap( - asInstanceOf( + safeAwaitFailure( TransportSerializationException.class, - safeAwaitFailure(DiscoveryNode.class, listener -> transportServiceA.handshake(connection, timeout, listener)) + DiscoveryNode.class, + listener -> transportServiceA.handshake(connection, timeout, listener) ), IllegalArgumentException.class ).getMessage(), diff --git a/test/framework/src/main/java/org/elasticsearch/cluster/metadata/DataStreamTestHelper.java b/test/framework/src/main/java/org/elasticsearch/cluster/metadata/DataStreamTestHelper.java index dd9b4ec21a4d1..5ca52024e82f6 100644 --- a/test/framework/src/main/java/org/elasticsearch/cluster/metadata/DataStreamTestHelper.java +++ b/test/framework/src/main/java/org/elasticsearch/cluster/metadata/DataStreamTestHelper.java @@ -152,7 +152,7 @@ public static DataStream newInstance( .setMetadata(metadata) .setReplicated(replicated) .setLifecycle(lifecycle) - .setFailureStoreEnabled(failureStores.isEmpty() == false) + .setDataStreamOptions(failureStores.isEmpty() ? DataStreamOptions.EMPTY : DataStreamOptions.FAILURE_STORE_ENABLED) .setFailureIndices(DataStream.DataStreamIndices.failureIndicesBuilder(failureStores).build()) .build(); } @@ -348,7 +348,7 @@ public static DataStream randomInstance(String dataStreamName, LongSupplier time randomBoolean(), randomBoolean() ? IndexMode.STANDARD : null, // IndexMode.TIME_SERIES triggers validation that many unit tests doesn't pass randomBoolean() ? DataStreamLifecycle.newBuilder().dataRetention(randomMillisUpToYear9999()).build() : null, - failureStore, + failureStore ? DataStreamOptions.FAILURE_STORE_ENABLED : DataStreamOptions.EMPTY, DataStream.DataStreamIndices.backingIndicesBuilder(indices) .setRolloverOnWrite(replicated == false && randomBoolean()) .setAutoShardingEvent( diff --git a/test/framework/src/main/java/org/elasticsearch/repositories/blobstore/AbstractBlobContainerRetriesTestCase.java b/test/framework/src/main/java/org/elasticsearch/repositories/blobstore/AbstractBlobContainerRetriesTestCase.java index 7a9e9ed5c2a4e..90c621c62c305 100644 --- a/test/framework/src/main/java/org/elasticsearch/repositories/blobstore/AbstractBlobContainerRetriesTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/repositories/blobstore/AbstractBlobContainerRetriesTestCase.java @@ -44,6 +44,8 @@ import java.util.regex.Pattern; import static org.elasticsearch.repositories.blobstore.BlobStoreTestUtil.randomPurpose; +import static org.elasticsearch.test.NeverMatcher.never; +import static org.hamcrest.Matchers.anyOf; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.either; import static org.hamcrest.Matchers.equalTo; @@ -280,9 +282,12 @@ public void testReadBlobWithReadTimeouts() { assertThat(exception, readTimeoutExceptionMatcher()); assertThat( exception.getMessage().toLowerCase(Locale.ROOT), - either(containsString("read timed out")).or(containsString("premature end of chunk coded message body: closing chunk expected")) - .or(containsString("Read timed out")) - .or(containsString("unexpected end of file from server")) + anyOf( + containsString("read timed out"), + containsString("premature end of chunk coded message body: closing chunk expected"), + containsString("Read timed out"), + containsString("unexpected end of file from server") + ) ); assertThat(exception.getSuppressed().length, getMaxRetriesMatcher(maxRetries)); } @@ -323,10 +328,15 @@ public void testReadBlobWithPrematureConnectionClose() { final int maxRetries = randomInt(20); final BlobContainer blobContainer = createBlobContainer(maxRetries, null, null, null); + final boolean alwaysFlushBody = randomBoolean(); + // HTTP server sends a partial response final byte[] bytes = randomBlobContent(1); httpServer.createContext(downloadStorageEndpoint(blobContainer, "read_blob_incomplete"), exchange -> { sendIncompleteContent(exchange, bytes); + if (alwaysFlushBody) { + exchange.getResponseBody().flush(); + } exchange.close(); }); @@ -341,9 +351,14 @@ public void testReadBlobWithPrematureConnectionClose() { }); assertThat( exception.getMessage().toLowerCase(Locale.ROOT), - either(containsString("premature end of chunk coded message body: closing chunk expected")).or( - containsString("premature end of content-length delimited message body") - ).or(containsString("connection closed prematurely")) + anyOf( + // closing the connection after sending the headers and some incomplete body might yield one of these: + containsString("premature end of chunk coded message body: closing chunk expected"), + containsString("premature end of content-length delimited message body"), + containsString("connection closed prematurely"), + // if we didn't call exchange.getResponseBody().flush() then we might not even have sent the response headers: + alwaysFlushBody ? never() : containsString("the target server failed to respond") + ) ); assertThat(exception.getSuppressed().length, getMaxRetriesMatcher(Math.min(10, maxRetries))); } diff --git a/test/framework/src/main/java/org/elasticsearch/search/SearchResponseUtils.java b/test/framework/src/main/java/org/elasticsearch/search/SearchResponseUtils.java index 86bd0899e862a..cc4aac686a02d 100644 --- a/test/framework/src/main/java/org/elasticsearch/search/SearchResponseUtils.java +++ b/test/framework/src/main/java/org/elasticsearch/search/SearchResponseUtils.java @@ -17,6 +17,7 @@ import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.action.search.ShardSearchFailure; import org.elasticsearch.client.Response; +import org.elasticsearch.cluster.metadata.IndexMetadata; import org.elasticsearch.common.ParsingException; import org.elasticsearch.common.bytes.BytesArray; import org.elasticsearch.common.bytes.BytesReference; @@ -26,12 +27,14 @@ import org.elasticsearch.common.xcontent.XContentParserUtils; import org.elasticsearch.core.RefCounted; import org.elasticsearch.core.TimeValue; +import org.elasticsearch.index.Index; import org.elasticsearch.index.mapper.IgnoredFieldMapper; import org.elasticsearch.index.mapper.SourceFieldMapper; import org.elasticsearch.index.seqno.SequenceNumbers; import org.elasticsearch.index.shard.ShardId; import org.elasticsearch.rest.action.RestActions; import org.elasticsearch.search.aggregations.Aggregation; +import org.elasticsearch.search.aggregations.InternalAggregation; import org.elasticsearch.search.aggregations.InternalAggregations; import org.elasticsearch.search.fetch.subphase.highlight.HighlightField; import org.elasticsearch.search.profile.ProfileResult; @@ -62,8 +65,10 @@ import java.util.Locale; import java.util.Map; +import static java.util.stream.Collectors.toMap; import static org.elasticsearch.common.xcontent.XContentParserUtils.ensureExpectedToken; import static org.elasticsearch.common.xcontent.XContentParserUtils.ensureFieldName; +import static org.elasticsearch.common.xcontent.XContentParserUtils.parseTypedKeysObject; import static org.elasticsearch.xcontent.ConstructingObjectParser.constructorArg; import static org.elasticsearch.xcontent.ConstructingObjectParser.optionalConstructorArg; @@ -230,7 +235,7 @@ public static SearchResponse parseInnerSearchResponse(XContentParser parser) thr if (SearchHits.Fields.HITS.equals(currentFieldName)) { hits = parseSearchHits(parser); } else if (InternalAggregations.AGGREGATIONS_FIELD.equals(currentFieldName)) { - aggs = InternalAggregations.fromXContent(parser); + aggs = parseInternalAggregations(parser); } else if (Suggest.NAME.equals(currentFieldName)) { suggest = parseSuggest(parser); } else if (SearchProfileResults.PROFILE_FIELD.equals(currentFieldName)) { @@ -254,7 +259,7 @@ public static SearchResponse parseInnerSearchResponse(XContentParser parser) thr } else if (token == XContentParser.Token.START_ARRAY) { if (RestActions.FAILURES_FIELD.match(currentFieldName, parser.getDeprecationHandler())) { while (parser.nextToken() != XContentParser.Token.END_ARRAY) { - failures.add(ShardSearchFailure.fromXContent(parser)); + failures.add(parseShardSearchFailure(parser)); } } else { parser.skipChildren(); @@ -407,7 +412,7 @@ private static SearchResponse.Cluster parseCluster(String clusterAlias, XContent } else if (token == XContentParser.Token.START_ARRAY) { if (RestActions.FAILURES_FIELD.match(currentFieldName, parser.getDeprecationHandler())) { while (parser.nextToken() != XContentParser.Token.END_ARRAY) { - failures.add(ShardSearchFailure.fromXContent(parser)); + failures.add(parseShardSearchFailure(parser)); } } else { parser.skipChildren(); @@ -483,7 +488,7 @@ private static void parseProfileResultsEntry(XContentParser parser, Map ProfileResult.fromXContent(p), SearchProfileDfsPhaseResult.STATISTICS); + parser.declareObject(optionalConstructorArg(), (p, c) -> parseProfileResult(p), SearchProfileDfsPhaseResult.STATISTICS); parser.declareObjectArray(optionalConstructorArg(), (p, c) -> parseQueryProfileShardResult(p), SearchProfileDfsPhaseResult.KNN); PROFILE_DFS_PHASE_RESULT_PARSER = parser.build(); } @@ -546,11 +551,11 @@ public static QueryProfileShardResult parseQueryProfileShardResult(XContentParse } else if (token == XContentParser.Token.START_ARRAY) { if (QueryProfileShardResult.QUERY_ARRAY.equals(currentFieldName)) { while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { - queryProfileResults.add(ProfileResult.fromXContent(parser)); + queryProfileResults.add(parseProfileResult(parser)); } } else if (QueryProfileShardResult.COLLECTOR.equals(currentFieldName)) { while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { - collector = CollectorResult.fromXContent(parser); + collector = parseCollectorResult(parser); } } else { parser.skipChildren(); @@ -704,7 +709,7 @@ public static void declareInnerHitsParseFields(ObjectParser, parser.declareField( (map, list) -> map.put(SearchHit.Fields.SORT, list), - SearchSortValues::fromXContent, + SearchResponseUtils::parseSearchSortValues, new ParseField(SearchHit.Fields.SORT), ObjectParser.ValueType.OBJECT_ARRAY ); @@ -753,7 +758,7 @@ private static Map parseInnerHits(XContentParser parser) thr private static Map parseHighlightFields(XContentParser parser) throws IOException { Map highlightFields = new HashMap<>(); while ((parser.nextToken()) != XContentParser.Token.END_OBJECT) { - HighlightField highlightField = HighlightField.fromXContent(parser); + HighlightField highlightField = parseHighlightField(parser); highlightFields.put(highlightField.name(), highlightField); } return highlightFields; @@ -851,11 +856,9 @@ public static SearchHit searchHitFromMap(Map values) { String index = get(SearchHit.Fields._INDEX, values, null); String clusterAlias = null; if (index != null) { - int indexOf = index.indexOf(RemoteClusterAware.REMOTE_CLUSTER_INDEX_SEPARATOR); - if (indexOf > 0) { - clusterAlias = index.substring(0, indexOf); - index = index.substring(indexOf + 1); - } + String[] split = RemoteClusterAware.splitIndexName(index); + clusterAlias = split[0]; + index = split[1]; } ShardId shardId = get(SearchHit.Fields._SHARD, values, null); String nodeId = get(SearchHit.Fields._NODE, values, null); @@ -898,4 +901,168 @@ private static T get(String key, Map map, T defaultValue) { return (T) map.getOrDefault(key, defaultValue); } + public static AggregationProfileShardResult readAggregationProfileShardResult(XContentParser parser) throws IOException { + XContentParser.Token token = parser.currentToken(); + ensureExpectedToken(XContentParser.Token.START_ARRAY, token, parser); + List aggProfileResults = new ArrayList<>(); + while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { + aggProfileResults.add(parseProfileResult(parser)); + } + return new AggregationProfileShardResult(aggProfileResults); + } + + public static CollectorResult parseCollectorResult(XContentParser parser) throws IOException { + XContentParser.Token token = parser.currentToken(); + ensureExpectedToken(XContentParser.Token.START_OBJECT, token, parser); + String currentFieldName = null; + String name = null, reason = null; + long time = -1; + List children = new ArrayList<>(); + while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { + if (token == XContentParser.Token.FIELD_NAME) { + currentFieldName = parser.currentName(); + } else if (token.isValue()) { + if (CollectorResult.NAME.match(currentFieldName, parser.getDeprecationHandler())) { + name = parser.text(); + } else if (CollectorResult.REASON.match(currentFieldName, parser.getDeprecationHandler())) { + reason = parser.text(); + } else if (CollectorResult.TIME.match(currentFieldName, parser.getDeprecationHandler())) { + // we need to consume this value, but we use the raw nanosecond value + parser.text(); + } else if (CollectorResult.TIME_NANOS.match(currentFieldName, parser.getDeprecationHandler())) { + time = parser.longValue(); + } else { + parser.skipChildren(); + } + } else if (token == XContentParser.Token.START_ARRAY) { + if (CollectorResult.CHILDREN.match(currentFieldName, parser.getDeprecationHandler())) { + while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { + children.add(parseCollectorResult(parser)); + } + } else { + parser.skipChildren(); + } + } else { + parser.skipChildren(); + } + } + return new CollectorResult(name, reason, time, children); + } + + public static HighlightField parseHighlightField(XContentParser parser) throws IOException { + ensureExpectedToken(XContentParser.Token.FIELD_NAME, parser.currentToken(), parser); + String fieldName = parser.currentName(); + Text[] fragments; + XContentParser.Token token = parser.nextToken(); + if (token == XContentParser.Token.START_ARRAY) { + List values = new ArrayList<>(); + while (parser.nextToken() != XContentParser.Token.END_ARRAY) { + values.add(new Text(parser.text())); + } + fragments = values.toArray(Text.EMPTY_ARRAY); + } else if (token == XContentParser.Token.VALUE_NULL) { + fragments = null; + } else { + throw new ParsingException(parser.getTokenLocation(), "unexpected token type [" + token + "]"); + } + return new HighlightField(fieldName, fragments); + } + + private static InternalAggregations parseInternalAggregations(XContentParser parser) throws IOException { + final List aggregations = new ArrayList<>(); + XContentParser.Token token; + while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { + if (token == XContentParser.Token.START_OBJECT) { + SetOnce typedAgg = new SetOnce<>(); + String currentField = parser.currentName(); + parseTypedKeysObject(parser, Aggregation.TYPED_KEYS_DELIMITER, InternalAggregation.class, typedAgg::set); + if (typedAgg.get() != null) { + aggregations.add(typedAgg.get()); + } else { + throw new ParsingException( + parser.getTokenLocation(), + String.format(Locale.ROOT, "Could not parse aggregation keyed as [%s]", currentField) + ); + } + } + } + return new InternalAggregations(aggregations); + } + + private static final InstantiatingObjectParser PROFILE_RESULT_PARSER; + static { + InstantiatingObjectParser.Builder parser = InstantiatingObjectParser.builder( + "profile_result", + true, + ProfileResult.class + ); + parser.declareString(constructorArg(), ProfileResult.TYPE); + parser.declareString(constructorArg(), ProfileResult.DESCRIPTION); + parser.declareObject( + constructorArg(), + (p, c) -> p.map().entrySet().stream().collect(toMap(Map.Entry::getKey, e -> ((Number) e.getValue()).longValue())), + ProfileResult.BREAKDOWN + ); + parser.declareObject(optionalConstructorArg(), (p, c) -> p.map(), ProfileResult.DEBUG); + parser.declareLong(constructorArg(), ProfileResult.NODE_TIME_RAW); + parser.declareObjectArray(optionalConstructorArg(), (p, c) -> parseProfileResult(p), ProfileResult.CHILDREN); + PROFILE_RESULT_PARSER = parser.build(); + } + + public static ProfileResult parseProfileResult(XContentParser p) throws IOException { + return PROFILE_RESULT_PARSER.parse(p, null); + } + + public static SearchSortValues parseSearchSortValues(XContentParser parser) throws IOException { + ensureExpectedToken(XContentParser.Token.START_ARRAY, parser.currentToken(), parser); + return new SearchSortValues(parser.list().toArray()); + } + + public static ShardSearchFailure parseShardSearchFailure(XContentParser parser) throws IOException { + XContentParser.Token token; + ensureExpectedToken(XContentParser.Token.START_OBJECT, parser.currentToken(), parser); + String currentFieldName = null; + int shardId = -1; + String indexName = null; + String clusterAlias = null; + String nodeId = null; + ElasticsearchException exception = null; + while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { + if (token == XContentParser.Token.FIELD_NAME) { + currentFieldName = parser.currentName(); + } else if (token.isValue()) { + if (ShardSearchFailure.SHARD_FIELD.equals(currentFieldName)) { + shardId = parser.intValue(); + } else if (ShardSearchFailure.INDEX_FIELD.equals(currentFieldName)) { + indexName = parser.text(); + int indexOf = indexName.indexOf(RemoteClusterAware.REMOTE_CLUSTER_INDEX_SEPARATOR); + if (indexOf > 0) { + clusterAlias = indexName.substring(0, indexOf); + indexName = indexName.substring(indexOf + 1); + } + } else if (ShardSearchFailure.NODE_FIELD.equals(currentFieldName)) { + nodeId = parser.text(); + } else { + parser.skipChildren(); + } + } else if (token == XContentParser.Token.START_OBJECT) { + if (ShardSearchFailure.REASON_FIELD.equals(currentFieldName)) { + exception = ElasticsearchException.fromXContent(parser); + } else { + parser.skipChildren(); + } + } else { + parser.skipChildren(); + } + } + SearchShardTarget searchShardTarget = null; + if (nodeId != null) { + searchShardTarget = new SearchShardTarget( + nodeId, + new ShardId(new Index(indexName, IndexMetadata.INDEX_UUID_NA_VALUE), shardId), + clusterAlias + ); + } + return new ShardSearchFailure(exception, searchShardTarget); + } } diff --git a/test/framework/src/main/java/org/elasticsearch/test/ClusterServiceUtils.java b/test/framework/src/main/java/org/elasticsearch/test/ClusterServiceUtils.java index 7fb480952356e..8c6058b47cf0c 100644 --- a/test/framework/src/main/java/org/elasticsearch/test/ClusterServiceUtils.java +++ b/test/framework/src/main/java/org/elasticsearch/test/ClusterServiceUtils.java @@ -243,7 +243,7 @@ public static void awaitNoPendingTasks(ClusterService clusterService) { ESTestCase.safeAwait( listener -> clusterService.submitUnbatchedStateUpdateTask( "await-queue-empty", - new ClusterStateUpdateTask(Priority.LANGUID, TimeValue.timeValueSeconds(10)) { + new ClusterStateUpdateTask(Priority.LANGUID, ESTestCase.SAFE_AWAIT_TIMEOUT) { @Override public ClusterState execute(ClusterState currentState) { return currentState; @@ -287,7 +287,7 @@ public String toString() { if (predicate.test(clusterService.state())) { listener.onResponse(null); } else { - listener.addTimeout(TimeValue.timeValueSeconds(10), clusterService.threadPool(), EsExecutors.DIRECT_EXECUTOR_SERVICE); + listener.addTimeout(ESTestCase.SAFE_AWAIT_TIMEOUT, clusterService.threadPool(), EsExecutors.DIRECT_EXECUTOR_SERVICE); } return listener; } diff --git a/test/framework/src/main/java/org/elasticsearch/test/ESTestCase.java b/test/framework/src/main/java/org/elasticsearch/test/ESTestCase.java index e6fc32a8ebe1b..7021ea47aa8dd 100644 --- a/test/framework/src/main/java/org/elasticsearch/test/ESTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/test/ESTestCase.java @@ -39,6 +39,7 @@ import org.apache.lucene.tests.util.TestUtil; import org.apache.lucene.tests.util.TimeUnits; import org.apache.lucene.util.SetOnce; +import org.elasticsearch.ElasticsearchWrapperException; import org.elasticsearch.ExceptionsHelper; import org.elasticsearch.TransportVersion; import org.elasticsearch.action.ActionFuture; @@ -215,7 +216,6 @@ import static org.hamcrest.Matchers.emptyCollectionOf; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.hasItem; -import static org.hamcrest.Matchers.in; import static org.hamcrest.Matchers.startsWith; /** @@ -1658,6 +1658,15 @@ public String randomCompatibleMediaType(RestApiVersion version) { } public String compatibleMediaType(XContentType type, RestApiVersion version) { + if (type.canonical().equals(type)) { + throw new IllegalArgumentException( + "Compatible header is only supported for vendor content types." + + " You requested " + + type.name() + + "but likely want VND_" + + type.name() + ); + } return type.toParsedMediaType() .responseContentTypeHeader(Map.of(MediaType.COMPATIBLE_WITH_PARAMETER_NAME, String.valueOf(version.major))); } @@ -2421,6 +2430,44 @@ public static Exception safeAwaitFailure(@SuppressWarnings("unused") Class ExpectedException safeAwaitFailure( + Class exceptionType, + Class responseType, + Consumer> consumer + ) { + return asInstanceOf(exceptionType, safeAwaitFailure(responseType, consumer)); + } + + /** + * Wait for the exceptional completion of the given async action, with a timeout of {@link #SAFE_AWAIT_TIMEOUT}, + * preserving the thread's interrupt status flag and converting a successful completion, interrupt or timeout into an {@link + * AssertionError} to trigger a test failure. Any layers of {@link ElasticsearchWrapperException} are removed from the thrown exception + * using {@link ExceptionsHelper#unwrapCause}. + * + * @param responseType Class of listener response type, to aid type inference but otherwise ignored. + * @param exceptionType Expected unwrapped exception type. This method throws an {@link AssertionError} if a different type of exception + * is seen. + * + * @return The unwrapped exception with which the {@code listener} was completed exceptionally. + */ + public static ExpectedException safeAwaitAndUnwrapFailure( + Class exceptionType, + Class responseType, + Consumer> consumer + ) { + return asInstanceOf(exceptionType, ExceptionsHelper.unwrapCause(safeAwaitFailure(responseType, consumer))); + } + /** * Send the current thread to sleep for the given duration, asserting that the sleep is not interrupted but preserving the thread's * interrupt status flag in any case. diff --git a/test/framework/src/main/java/org/elasticsearch/test/ESTokenStreamTestCase.java b/test/framework/src/main/java/org/elasticsearch/test/ESTokenStreamTestCase.java index a1ce19f820433..7b6ea4e2cd256 100644 --- a/test/framework/src/main/java/org/elasticsearch/test/ESTokenStreamTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/test/ESTokenStreamTestCase.java @@ -12,10 +12,8 @@ import com.carrotsearch.randomizedtesting.annotations.Listeners; import com.carrotsearch.randomizedtesting.annotations.TimeoutSuite; -import org.apache.lucene.tests.analysis.BaseTokenStreamTestCase; import org.apache.lucene.tests.util.LuceneTestCase; import org.apache.lucene.tests.util.TimeUnits; -import org.elasticsearch.bootstrap.BootstrapForTesting; import org.elasticsearch.cluster.metadata.IndexMetadata; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.IndexVersion; @@ -29,19 +27,8 @@ * Basic test case for token streams. the assertion methods in this class will * run basic checks to enforce correct behavior of the token streams. */ -public abstract class ESTokenStreamTestCase extends BaseTokenStreamTestCase { - - static { - try { - Class.forName("org.elasticsearch.test.ESTestCase"); - } catch (ClassNotFoundException e) { - throw new AssertionError(e); - } - BootstrapForTesting.ensureInitialized(); - } - +public abstract class ESTokenStreamTestCase extends ESTestCase { public Settings.Builder newAnalysisSettingsBuilder() { return Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, IndexVersion.current()); } - } diff --git a/test/framework/src/main/java/org/elasticsearch/test/NeverMatcher.java b/test/framework/src/main/java/org/elasticsearch/test/NeverMatcher.java new file mode 100644 index 0000000000000..aad771a30d82e --- /dev/null +++ b/test/framework/src/main/java/org/elasticsearch/test/NeverMatcher.java @@ -0,0 +1,35 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.test; + +import org.hamcrest.BaseMatcher; +import org.hamcrest.Description; +import org.hamcrest.Matcher; + +public class NeverMatcher extends BaseMatcher { + @SuppressWarnings("unchecked") + public static Matcher never() { + return (Matcher) INSTANCE; + } + + private static final Matcher INSTANCE = new NeverMatcher<>(); + + private NeverMatcher() {/* singleton */} + + @Override + public boolean matches(Object actual) { + return false; + } + + @Override + public void describeTo(Description description) { + description.appendText("never matches"); + } +} diff --git a/test/framework/src/main/java/org/elasticsearch/test/rest/ESRestTestCase.java b/test/framework/src/main/java/org/elasticsearch/test/rest/ESRestTestCase.java index 6ed0a1dfe0229..b15e4bed573a5 100644 --- a/test/framework/src/main/java/org/elasticsearch/test/rest/ESRestTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/test/rest/ESRestTestCase.java @@ -1179,7 +1179,14 @@ protected static void wipeDataStreams() throws IOException { // We hit a version of ES that doesn't serialize DeleteDataStreamAction.Request#wildcardExpressionsOriginallySpecified field // or that doesn't support data streams so it's safe to ignore int statusCode = ee.getResponse().getStatusLine().getStatusCode(); - if (statusCode < 404 || statusCode > 405) { + if (statusCode == 400) { + // the test cluster likely does not include the data streams module so we can ignore this error code + // additionally there is an implementation gotcha that cause response code to be 400 or 405 dependent on if + // "_data_stream/*" matches a registered index pattern such as {a}/{b} but not for the HTTP verb. + // Prior to v9 POST {index}/{type} was registered as a compatible index pattern so the request would partially match + // and return a 405, but without that pattern registered at all the return value is a 400. + return; + } else if (statusCode < 404 || statusCode > 405) { throw ee; } } @@ -1848,7 +1855,7 @@ public static CreateIndexResponse createIndex(RestClient client, String name, Se final Response response = client.performRequest(request); try (var parser = responseAsParser(response)) { - return CreateIndexResponse.fromXContent(parser); + return TestResponseParsers.parseCreateIndexResponse(parser); } } @@ -1860,7 +1867,7 @@ protected static AcknowledgedResponse deleteIndex(RestClient restClient, String Request request = new Request("DELETE", "/" + name); Response response = restClient.performRequest(request); try (var parser = responseAsParser(response)) { - return AcknowledgedResponse.fromXContent(parser); + return TestResponseParsers.parseAcknowledgedResponse(parser); } } diff --git a/test/framework/src/main/java/org/elasticsearch/test/rest/TestResponseParsers.java b/test/framework/src/main/java/org/elasticsearch/test/rest/TestResponseParsers.java new file mode 100644 index 0000000000000..5ab017d79b882 --- /dev/null +++ b/test/framework/src/main/java/org/elasticsearch/test/rest/TestResponseParsers.java @@ -0,0 +1,119 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.test.rest; + +import org.elasticsearch.action.admin.cluster.settings.RestClusterGetSettingsResponse; +import org.elasticsearch.action.admin.indices.create.CreateIndexResponse; +import org.elasticsearch.action.support.master.AcknowledgedResponse; +import org.elasticsearch.action.support.master.ShardsAcknowledgedResponse; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.xcontent.ConstructingObjectParser; +import org.elasticsearch.xcontent.ObjectParser; +import org.elasticsearch.xcontent.ParseField; +import org.elasticsearch.xcontent.XContentParser; + +import static org.elasticsearch.xcontent.ConstructingObjectParser.constructorArg; +import static org.elasticsearch.xcontent.ConstructingObjectParser.optionalConstructorArg; + +public enum TestResponseParsers { + ; + + private static final ConstructingObjectParser REST_SETTINGS_RESPONSE_PARSER = + new ConstructingObjectParser<>("cluster_get_settings_response", true, a -> { + Settings defaultSettings = a[2] == null ? Settings.EMPTY : (Settings) a[2]; + return new RestClusterGetSettingsResponse((Settings) a[0], (Settings) a[1], defaultSettings); + }); + static { + REST_SETTINGS_RESPONSE_PARSER.declareObject( + constructorArg(), + (p, c) -> Settings.fromXContent(p), + new ParseField(RestClusterGetSettingsResponse.PERSISTENT_FIELD) + ); + REST_SETTINGS_RESPONSE_PARSER.declareObject( + constructorArg(), + (p, c) -> Settings.fromXContent(p), + new ParseField(RestClusterGetSettingsResponse.TRANSIENT_FIELD) + ); + REST_SETTINGS_RESPONSE_PARSER.declareObject( + optionalConstructorArg(), + (p, c) -> Settings.fromXContent(p), + new ParseField(RestClusterGetSettingsResponse.DEFAULTS_FIELD) + ); + } + + public static RestClusterGetSettingsResponse parseClusterSettingsResponse(XContentParser parser) { + return REST_SETTINGS_RESPONSE_PARSER.apply(parser, null); + } + + private static final ParseField ACKNOWLEDGED_FIELD = new ParseField(AcknowledgedResponse.ACKNOWLEDGED_KEY); + + public static void declareAcknowledgedField(ConstructingObjectParser objectParser) { + objectParser.declareField( + constructorArg(), + (parser, context) -> parser.booleanValue(), + ACKNOWLEDGED_FIELD, + ObjectParser.ValueType.BOOLEAN + ); + } + + public static void declareAcknowledgedAndShardsAcknowledgedFields( + ConstructingObjectParser objectParser + ) { + declareAcknowledgedField(objectParser); + objectParser.declareField( + constructorArg(), + (parser, context) -> parser.booleanValue(), + ShardsAcknowledgedResponse.SHARDS_ACKNOWLEDGED, + ObjectParser.ValueType.BOOLEAN + ); + } + + private static final ConstructingObjectParser CREATE_INDEX_RESPONSE_PARSER = new ConstructingObjectParser<>( + "create_index", + true, + args -> new CreateIndexResponse((boolean) args[0], (boolean) args[1], (String) args[2]) + ); + + static { + declareAcknowledgedAndShardsAcknowledgedFields(CREATE_INDEX_RESPONSE_PARSER); + CREATE_INDEX_RESPONSE_PARSER.declareField( + constructorArg(), + (parser, context) -> parser.textOrNull(), + CreateIndexResponse.INDEX, + ObjectParser.ValueType.STRING + ); + } + + public static CreateIndexResponse parseCreateIndexResponse(XContentParser parser) { + return CREATE_INDEX_RESPONSE_PARSER.apply(parser, null); + } + + /** + * A generic parser that simply parses the acknowledged flag + */ + private static final ConstructingObjectParser ACKNOWLEDGED_FLAG_PARSER = new ConstructingObjectParser<>( + "acknowledged_flag", + true, + args -> (Boolean) args[0] + ); + + static { + ACKNOWLEDGED_FLAG_PARSER.declareField( + constructorArg(), + (parser, context) -> parser.booleanValue(), + ACKNOWLEDGED_FIELD, + ObjectParser.ValueType.BOOLEAN + ); + } + + public static AcknowledgedResponse parseAcknowledgedResponse(XContentParser parser) { + return AcknowledgedResponse.of(ACKNOWLEDGED_FLAG_PARSER.apply(parser, null)); + } +} diff --git a/test/framework/src/main/java/org/elasticsearch/transport/AbstractSimpleTransportTestCase.java b/test/framework/src/main/java/org/elasticsearch/transport/AbstractSimpleTransportTestCase.java index fe3de2218a493..840ccd611c52f 100644 --- a/test/framework/src/main/java/org/elasticsearch/transport/AbstractSimpleTransportTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/transport/AbstractSimpleTransportTestCase.java @@ -3507,9 +3507,10 @@ public static ConnectTransportException connectToNodeExpectFailure( DiscoveryNode node, ConnectionProfile connectionProfile ) { - return asInstanceOf( + return safeAwaitFailure( ConnectTransportException.class, - safeAwaitFailure(Releasable.class, listener -> service.connectToNode(node, connectionProfile, listener)) + Releasable.class, + listener -> service.connectToNode(node, connectionProfile, listener) ); } @@ -3532,9 +3533,10 @@ public static ConnectTransportException openConnectionExpectFailure( DiscoveryNode node, ConnectionProfile connectionProfile ) { - return asInstanceOf( + return safeAwaitFailure( ConnectTransportException.class, - safeAwaitFailure(Transport.Connection.class, listener -> service.openConnection(node, connectionProfile, listener)) + Transport.Connection.class, + listener -> service.openConnection(node, connectionProfile, listener) ); } diff --git a/test/test-clusters/src/main/java/org/elasticsearch/test/cluster/local/DefaultLocalClusterSpecBuilder.java b/test/test-clusters/src/main/java/org/elasticsearch/test/cluster/local/DefaultLocalClusterSpecBuilder.java index 9bc58dd64404e..1d7cc76be165b 100644 --- a/test/test-clusters/src/main/java/org/elasticsearch/test/cluster/local/DefaultLocalClusterSpecBuilder.java +++ b/test/test-clusters/src/main/java/org/elasticsearch/test/cluster/local/DefaultLocalClusterSpecBuilder.java @@ -19,7 +19,9 @@ public final class DefaultLocalClusterSpecBuilder extends AbstractLocalClusterSp public DefaultLocalClusterSpecBuilder() { super(); - this.apply(c -> c.systemProperty("ingest.geoip.downloader.enabled.default", "false")); + this.apply( + c -> c.systemProperty("ingest.geoip.downloader.enabled.default", "false").systemProperty("tests.testfeatures.enabled", "true") + ); this.apply(new FipsEnabledClusterConfigProvider()); this.settings(new DefaultSettingsProvider()); this.environment(new DefaultEnvironmentProvider()); diff --git a/x-pack/plugin/autoscaling/src/main/java/org/elasticsearch/xpack/autoscaling/storage/ReactiveStorageDeciderService.java b/x-pack/plugin/autoscaling/src/main/java/org/elasticsearch/xpack/autoscaling/storage/ReactiveStorageDeciderService.java index 2f8cccdc303e6..4c612d5e04886 100644 --- a/x-pack/plugin/autoscaling/src/main/java/org/elasticsearch/xpack/autoscaling/storage/ReactiveStorageDeciderService.java +++ b/x-pack/plugin/autoscaling/src/main/java/org/elasticsearch/xpack/autoscaling/storage/ReactiveStorageDeciderService.java @@ -823,7 +823,7 @@ private SingleForecast forecast(Metadata metadata, DataStream stream, long forec stream = stream.unsafeRollover( new Index(rolledDataStreamInfo.v1(), uuid), rolledDataStreamInfo.v2(), - false, + null, stream.getAutoShardingEvent() ); diff --git a/x-pack/plugin/build.gradle b/x-pack/plugin/build.gradle index ebc79ca6ce44a..eb0796672a174 100644 --- a/x-pack/plugin/build.gradle +++ b/x-pack/plugin/build.gradle @@ -82,5 +82,6 @@ tasks.named("precommit").configure { tasks.named("yamlRestCompatTestTransform").configure({ task -> task.skipTest("security/10_forbidden/Test bulk response with invalid credentials", "warning does not exist for compatibility") + task.skipTest("wildcard/30_ignore_above_synthetic_source/wildcard field type ignore_above", "Temporary until backported") }) diff --git a/x-pack/plugin/ccr/src/internalClusterTest/java/org/elasticsearch/xpack/ccr/FollowerFailOverIT.java b/x-pack/plugin/ccr/src/internalClusterTest/java/org/elasticsearch/xpack/ccr/FollowerFailOverIT.java index 501a664d64698..274d723a37574 100644 --- a/x-pack/plugin/ccr/src/internalClusterTest/java/org/elasticsearch/xpack/ccr/FollowerFailOverIT.java +++ b/x-pack/plugin/ccr/src/internalClusterTest/java/org/elasticsearch/xpack/ccr/FollowerFailOverIT.java @@ -9,6 +9,7 @@ import org.elasticsearch.action.DocWriteResponse; import org.elasticsearch.action.delete.DeleteResponse; +import org.elasticsearch.action.support.ActiveShardCount; import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.cluster.metadata.IndexMetadata; import org.elasticsearch.cluster.node.DiscoveryNode; @@ -157,6 +158,7 @@ public void testFollowIndexAndCloseNode() throws Exception { followRequest.getParameters().setMaxWriteRequestOperationCount(randomIntBetween(32, 2048)); followRequest.getParameters().setMaxWriteRequestSize(new ByteSizeValue(randomIntBetween(1, 4096), ByteSizeUnit.KB)); followRequest.getParameters().setMaxOutstandingWriteRequests(randomIntBetween(1, 10)); + followRequest.waitForActiveShards(ActiveShardCount.ALL); followerClient().execute(PutFollowAction.INSTANCE, followRequest).get(); disableDelayedAllocation("index2"); logger.info("--> follow request {}", Strings.toString(followRequest)); diff --git a/x-pack/plugin/core/src/internalClusterTest/java/org/elasticsearch/xpack/core/action/DataStreamLifecycleUsageTransportActionIT.java b/x-pack/plugin/core/src/internalClusterTest/java/org/elasticsearch/xpack/core/action/DataStreamLifecycleUsageTransportActionIT.java index a08eb935178cf..499e660d2e542 100644 --- a/x-pack/plugin/core/src/internalClusterTest/java/org/elasticsearch/xpack/core/action/DataStreamLifecycleUsageTransportActionIT.java +++ b/x-pack/plugin/core/src/internalClusterTest/java/org/elasticsearch/xpack/core/action/DataStreamLifecycleUsageTransportActionIT.java @@ -16,6 +16,7 @@ import org.elasticsearch.cluster.metadata.DataStreamAlias; import org.elasticsearch.cluster.metadata.DataStreamGlobalRetentionSettings; import org.elasticsearch.cluster.metadata.DataStreamLifecycle; +import org.elasticsearch.cluster.metadata.DataStreamOptions; import org.elasticsearch.cluster.metadata.Metadata; import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.bytes.BytesReference; @@ -178,7 +179,7 @@ public void testAction() throws Exception { randomBoolean(), IndexMode.STANDARD, lifecycle, - false, + DataStreamOptions.EMPTY, List.of(), replicated == false && randomBoolean(), null diff --git a/x-pack/plugin/core/src/main/java/module-info.java b/x-pack/plugin/core/src/main/java/module-info.java index 47848310fe781..72436bb9d5171 100644 --- a/x-pack/plugin/core/src/main/java/module-info.java +++ b/x-pack/plugin/core/src/main/java/module-info.java @@ -228,7 +228,6 @@ exports org.elasticsearch.xpack.core.watcher.trigger; exports org.elasticsearch.xpack.core.watcher.watch; exports org.elasticsearch.xpack.core.watcher; - exports org.elasticsearch.xpack.cluster.settings; provides org.elasticsearch.action.admin.cluster.node.info.ComponentVersionNumber with diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/license/RemoteClusterLicenseChecker.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/license/RemoteClusterLicenseChecker.java index 8db05703a3f0d..01280b1d95f80 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/license/RemoteClusterLicenseChecker.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/license/RemoteClusterLicenseChecker.java @@ -239,7 +239,7 @@ private void remoteClusterLicense(final String clusterAlias, final ActionListene * @return true if the collection of indices contains a remote index, otherwise false */ public static boolean isRemoteIndex(final String index) { - return index.indexOf(RemoteClusterAware.REMOTE_CLUSTER_INDEX_SEPARATOR) != -1; + return RemoteClusterAware.isRemoteIndexName(index); } /** @@ -275,7 +275,7 @@ public static List remoteIndices(final Collection indices) { public static List remoteClusterAliases(final Set remoteClusters, final List indices) { return indices.stream() .filter(RemoteClusterLicenseChecker::isRemoteIndex) - .map(index -> index.substring(0, index.indexOf(RemoteClusterAware.REMOTE_CLUSTER_INDEX_SEPARATOR))) + .map(index -> RemoteClusterAware.splitIndexName(index)[0]) .distinct() .flatMap(clusterExpression -> ClusterNameExpressionResolver.resolveClusterNames(remoteClusters, clusterExpression).stream()) .distinct() diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/cluster/settings/ClusterSettings.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/cluster/settings/ClusterSettings.java deleted file mode 100644 index 1127889783f16..0000000000000 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/cluster/settings/ClusterSettings.java +++ /dev/null @@ -1,19 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. - */ - -package org.elasticsearch.xpack.cluster.settings; - -import org.elasticsearch.common.settings.Setting; - -public class ClusterSettings { - public static final Setting CLUSTER_LOGSDB_ENABLED = Setting.boolSetting( - "cluster.logsdb.enabled", - false, - Setting.Property.Dynamic, - Setting.Property.NodeScope - ); -} diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ilm/IndexLifecycleExplainResponse.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ilm/IndexLifecycleExplainResponse.java index c3c9fa88a1a96..9c679cd04c94d 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ilm/IndexLifecycleExplainResponse.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ilm/IndexLifecycleExplainResponse.java @@ -48,6 +48,7 @@ public class IndexLifecycleExplainResponse implements ToXContentObject, Writeabl private static final ParseField STEP_TIME_MILLIS_FIELD = new ParseField("step_time_millis"); private static final ParseField STEP_TIME_FIELD = new ParseField("step_time"); private static final ParseField STEP_INFO_FIELD = new ParseField("step_info"); + private static final ParseField PREVIOUS_STEP_INFO_FIELD = new ParseField("previous_step_info"); private static final ParseField PHASE_EXECUTION_INFO = new ParseField("phase_execution"); private static final ParseField AGE_FIELD = new ParseField("age"); private static final ParseField TIME_SINCE_INDEX_CREATION_FIELD = new ParseField("time_since_index_creation"); @@ -76,6 +77,7 @@ public class IndexLifecycleExplainResponse implements ToXContentObject, Writeabl (String) a[17], (String) a[18], (BytesReference) a[11], + (BytesReference) a[21], (PhaseExecutionInfo) a[12] // a[13] == "age" // a[20] == "time_since_index_creation" @@ -111,6 +113,11 @@ public class IndexLifecycleExplainResponse implements ToXContentObject, Writeabl PARSER.declareString(ConstructingObjectParser.optionalConstructorArg(), SHRINK_INDEX_NAME); PARSER.declareLong(ConstructingObjectParser.optionalConstructorArg(), INDEX_CREATION_DATE_MILLIS_FIELD); PARSER.declareString(ConstructingObjectParser.optionalConstructorArg(), TIME_SINCE_INDEX_CREATION_FIELD); + PARSER.declareObject(ConstructingObjectParser.optionalConstructorArg(), (p, c) -> { + XContentBuilder builder = JsonXContent.contentBuilder(); + builder.copyCurrentStructure(p); + return BytesReference.bytes(builder); + }, PREVIOUS_STEP_INFO_FIELD); } private final String index; @@ -126,6 +133,7 @@ public class IndexLifecycleExplainResponse implements ToXContentObject, Writeabl private final Long stepTime; private final boolean managedByILM; private final BytesReference stepInfo; + private final BytesReference previousStepInfo; private final PhaseExecutionInfo phaseExecutionInfo; private final Boolean isAutoRetryableError; private final Integer failedStepRetryCount; @@ -153,6 +161,7 @@ public static IndexLifecycleExplainResponse newManagedIndexResponse( String snapshotName, String shrinkIndexName, BytesReference stepInfo, + BytesReference previousStepInfo, PhaseExecutionInfo phaseExecutionInfo ) { return new IndexLifecycleExplainResponse( @@ -174,6 +183,7 @@ public static IndexLifecycleExplainResponse newManagedIndexResponse( snapshotName, shrinkIndexName, stepInfo, + previousStepInfo, phaseExecutionInfo ); } @@ -198,6 +208,7 @@ public static IndexLifecycleExplainResponse newUnmanagedIndexResponse(String ind null, null, null, + null, null ); } @@ -221,6 +232,7 @@ private IndexLifecycleExplainResponse( String snapshotName, String shrinkIndexName, BytesReference stepInfo, + BytesReference previousStepInfo, PhaseExecutionInfo phaseExecutionInfo ) { if (managedByILM) { @@ -262,6 +274,7 @@ private IndexLifecycleExplainResponse( || actionTime != null || stepTime != null || stepInfo != null + || previousStepInfo != null || phaseExecutionInfo != null) { throw new IllegalArgumentException( "Unmanaged index response must only contain fields: [" + MANAGED_BY_ILM_FIELD + ", " + INDEX_FIELD + "]" @@ -283,6 +296,7 @@ private IndexLifecycleExplainResponse( this.isAutoRetryableError = isAutoRetryableError; this.failedStepRetryCount = failedStepRetryCount; this.stepInfo = stepInfo; + this.previousStepInfo = previousStepInfo; this.phaseExecutionInfo = phaseExecutionInfo; this.repositoryName = repositoryName; this.snapshotName = snapshotName; @@ -314,6 +328,11 @@ public IndexLifecycleExplainResponse(StreamInput in) throws IOException { } else { indexCreationDate = null; } + if (in.getTransportVersion().onOrAfter(TransportVersions.RETAIN_ILM_STEP_INFO)) { + previousStepInfo = in.readOptionalBytesReference(); + } else { + previousStepInfo = null; + } } else { policyName = null; lifecycleDate = null; @@ -327,6 +346,7 @@ public IndexLifecycleExplainResponse(StreamInput in) throws IOException { actionTime = null; stepTime = null; stepInfo = null; + previousStepInfo = null; phaseExecutionInfo = null; repositoryName = null; snapshotName = null; @@ -359,6 +379,9 @@ public void writeTo(StreamOutput out) throws IOException { if (out.getTransportVersion().onOrAfter(TransportVersions.V_8_1_0)) { out.writeOptionalLong(indexCreationDate); } + if (out.getTransportVersion().onOrAfter(TransportVersions.RETAIN_ILM_STEP_INFO)) { + out.writeOptionalBytesReference(previousStepInfo); + } } } @@ -422,6 +445,10 @@ public BytesReference getStepInfo() { return stepInfo; } + public BytesReference getPreviousStepInfo() { + return previousStepInfo; + } + public PhaseExecutionInfo getPhaseExecutionInfo() { return phaseExecutionInfo; } @@ -515,6 +542,9 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws if (stepInfo != null && stepInfo.length() > 0) { builder.rawField(STEP_INFO_FIELD.getPreferredName(), stepInfo.streamInput(), XContentType.JSON); } + if (previousStepInfo != null && previousStepInfo.length() > 0) { + builder.rawField(PREVIOUS_STEP_INFO_FIELD.getPreferredName(), previousStepInfo.streamInput(), XContentType.JSON); + } if (phaseExecutionInfo != null) { builder.field(PHASE_EXECUTION_INFO.getPreferredName(), phaseExecutionInfo); } @@ -544,6 +574,7 @@ public int hashCode() { snapshotName, shrinkIndexName, stepInfo, + previousStepInfo, phaseExecutionInfo ); } @@ -575,6 +606,7 @@ public boolean equals(Object obj) { && Objects.equals(snapshotName, other.snapshotName) && Objects.equals(shrinkIndexName, other.shrinkIndexName) && Objects.equals(stepInfo, other.stepInfo) + && Objects.equals(previousStepInfo, other.previousStepInfo) && Objects.equals(phaseExecutionInfo, other.phaseExecutionInfo); } diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ilm/IndexLifecycleExplainResponseTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ilm/IndexLifecycleExplainResponseTests.java index a12b4ff75ee39..ea3c9cc5926ab 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ilm/IndexLifecycleExplainResponseTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ilm/IndexLifecycleExplainResponseTests.java @@ -73,6 +73,7 @@ private static IndexLifecycleExplainResponse randomManagedIndexExplainResponse() stepNull ? null : randomAlphaOfLength(10), stepNull ? null : randomAlphaOfLength(10), randomBoolean() ? null : new BytesArray(new RandomStepInfo(() -> randomAlphaOfLength(10)).toString()), + randomBoolean() ? null : new BytesArray(new RandomStepInfo(() -> randomAlphaOfLength(10)).toString()), randomBoolean() ? null : PhaseExecutionInfoTests.randomPhaseExecutionInfo("") ); } @@ -99,6 +100,7 @@ public void testInvalidStepDetails() { randomBoolean() ? null : randomAlphaOfLength(10), randomBoolean() ? null : randomAlphaOfLength(10), randomBoolean() ? null : new BytesArray(new RandomStepInfo(() -> randomAlphaOfLength(10)).toString()), + randomBoolean() ? null : new BytesArray(new RandomStepInfo(() -> randomAlphaOfLength(10)).toString()), randomBoolean() ? null : PhaseExecutionInfoTests.randomPhaseExecutionInfo("") ) ); @@ -132,6 +134,7 @@ public void testIndexAges() { null, null, null, + null, null ); assertThat(managedExplainResponse.getLifecycleDate(), is(notNullValue())); @@ -191,42 +194,32 @@ protected IndexLifecycleExplainResponse mutateInstance(IndexLifecycleExplainResp String shrinkIndexName = instance.getShrinkIndexName(); boolean managed = instance.managedByILM(); BytesReference stepInfo = instance.getStepInfo(); + BytesReference previousStepInfo = instance.getPreviousStepInfo(); PhaseExecutionInfo phaseExecutionInfo = instance.getPhaseExecutionInfo(); + if (managed) { - switch (between(0, 14)) { - case 0: - index = index + randomAlphaOfLengthBetween(1, 5); - break; - case 1: - policy = policy + randomAlphaOfLengthBetween(1, 5); - break; - case 2: + switch (between(0, 15)) { + case 0 -> index += randomAlphaOfLengthBetween(1, 5); + case 1 -> policy += randomAlphaOfLengthBetween(1, 5); + case 2 -> { phase = randomAlphaOfLengthBetween(1, 5); action = randomAlphaOfLengthBetween(1, 5); step = randomAlphaOfLengthBetween(1, 5); - break; - case 3: - phaseTime = randomValueOtherThan(phaseTime, () -> randomLongBetween(0, 100000)); - break; - case 4: - actionTime = randomValueOtherThan(actionTime, () -> randomLongBetween(0, 100000)); - break; - case 5: - stepTime = randomValueOtherThan(stepTime, () -> randomLongBetween(0, 100000)); - break; - case 6: + } + case 3 -> phaseTime = randomValueOtherThan(phaseTime, () -> randomLongBetween(0, 100000)); + case 4 -> actionTime = randomValueOtherThan(actionTime, () -> randomLongBetween(0, 100000)); + case 5 -> stepTime = randomValueOtherThan(stepTime, () -> randomLongBetween(0, 100000)); + case 6 -> { if (Strings.hasLength(failedStep) == false) { failedStep = randomAlphaOfLength(10); } else if (randomBoolean()) { - failedStep = failedStep + randomAlphaOfLengthBetween(1, 5); + failedStep += randomAlphaOfLengthBetween(1, 5); } else { failedStep = null; } - break; - case 7: - policyTime = randomValueOtherThan(policyTime, () -> randomLongBetween(0, 100000)); - break; - case 8: + } + case 7 -> policyTime = randomValueOtherThan(policyTime, () -> randomLongBetween(0, 100000)); + case 8 -> { if (Strings.hasLength(stepInfo) == false) { stepInfo = new BytesArray(randomByteArrayOfLength(100)); } else if (randomBoolean()) { @@ -237,31 +230,36 @@ protected IndexLifecycleExplainResponse mutateInstance(IndexLifecycleExplainResp } else { stepInfo = null; } - break; - case 9: - phaseExecutionInfo = randomValueOtherThan( - phaseExecutionInfo, - () -> PhaseExecutionInfoTests.randomPhaseExecutionInfo("") - ); - break; - case 10: + } + case 9 -> { + if (Strings.hasLength(previousStepInfo) == false) { + previousStepInfo = new BytesArray(randomByteArrayOfLength(100)); + } else if (randomBoolean()) { + previousStepInfo = randomValueOtherThan( + previousStepInfo, + () -> new BytesArray(new RandomStepInfo(() -> randomAlphaOfLength(10)).toString()) + ); + } else { + previousStepInfo = null; + } + } + case 10 -> phaseExecutionInfo = randomValueOtherThan( + phaseExecutionInfo, + () -> PhaseExecutionInfoTests.randomPhaseExecutionInfo("") + ); + case 11 -> { return IndexLifecycleExplainResponse.newUnmanagedIndexResponse(index); - case 11: + } + case 12 -> { isAutoRetryableError = true; failedStepRetryCount = randomValueOtherThan(failedStepRetryCount, () -> randomInt(10)); - break; - case 12: - repositoryName = randomValueOtherThan(repositoryName, () -> randomAlphaOfLengthBetween(5, 10)); - break; - case 13: - snapshotName = randomValueOtherThan(snapshotName, () -> randomAlphaOfLengthBetween(5, 10)); - break; - case 14: - shrinkIndexName = randomValueOtherThan(shrinkIndexName, () -> randomAlphaOfLengthBetween(5, 10)); - break; - default: - throw new AssertionError("Illegal randomisation branch"); + } + case 13 -> repositoryName = randomValueOtherThan(repositoryName, () -> randomAlphaOfLengthBetween(5, 10)); + case 14 -> snapshotName = randomValueOtherThan(snapshotName, () -> randomAlphaOfLengthBetween(5, 10)); + case 15 -> shrinkIndexName = randomValueOtherThan(shrinkIndexName, () -> randomAlphaOfLengthBetween(5, 10)); + default -> throw new AssertionError("Illegal randomisation branch"); } + return IndexLifecycleExplainResponse.newManagedIndexResponse( index, indexCreationDate, @@ -280,6 +278,7 @@ protected IndexLifecycleExplainResponse mutateInstance(IndexLifecycleExplainResp snapshotName, shrinkIndexName, stepInfo, + previousStepInfo, phaseExecutionInfo ); } else { diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ilm/LifecycleExecutionStateTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ilm/LifecycleExecutionStateTests.java index 1758c3729e373..dd7e88b14ef5e 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ilm/LifecycleExecutionStateTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ilm/LifecycleExecutionStateTests.java @@ -67,11 +67,7 @@ public void testEmptyValuesAreNotSerialized() { public void testEqualsAndHashcode() { LifecycleExecutionState original = LifecycleExecutionState.fromCustomMetadata(createCustomMetadata()); - EqualsHashCodeTestUtils.checkEqualsAndHashCode( - original, - toCopy -> LifecycleExecutionState.builder(toCopy).build(), - LifecycleExecutionStateTests::mutate - ); + EqualsHashCodeTestUtils.checkEqualsAndHashCode(original, toCopy -> LifecycleExecutionState.builder(toCopy).build(), this::mutate); } public void testGetCurrentStepKey() { @@ -133,78 +129,46 @@ public void testGetCurrentStepKey() { assertNull(error6.getMessage()); } - private static LifecycleExecutionState mutate(LifecycleExecutionState toMutate) { + private LifecycleExecutionState mutate(LifecycleExecutionState toMutate) { LifecycleExecutionState.Builder newState = LifecycleExecutionState.builder(toMutate); - switch (randomIntBetween(0, 17)) { - case 0: - newState.setPhase(randomValueOtherThan(toMutate.phase(), () -> randomAlphaOfLengthBetween(5, 20))); - break; - case 1: - newState.setAction(randomValueOtherThan(toMutate.action(), () -> randomAlphaOfLengthBetween(5, 20))); - break; - case 2: - newState.setStep(randomValueOtherThan(toMutate.step(), () -> randomAlphaOfLengthBetween(5, 20))); - break; - case 3: - newState.setPhaseDefinition(randomValueOtherThan(toMutate.phaseDefinition(), () -> randomAlphaOfLengthBetween(5, 20))); - break; - case 4: - newState.setFailedStep(randomValueOtherThan(toMutate.failedStep(), () -> randomAlphaOfLengthBetween(5, 20))); - break; - case 5: - newState.setStepInfo(randomValueOtherThan(toMutate.stepInfo(), () -> randomAlphaOfLengthBetween(5, 20))); - break; - case 6: - newState.setPhaseTime(randomValueOtherThan(toMutate.phaseTime(), ESTestCase::randomLong)); - break; - case 7: - newState.setActionTime(randomValueOtherThan(toMutate.actionTime(), ESTestCase::randomLong)); - break; - case 8: - newState.setStepTime(randomValueOtherThan(toMutate.stepTime(), ESTestCase::randomLong)); - break; - case 9: - newState.setIndexCreationDate(randomValueOtherThan(toMutate.lifecycleDate(), ESTestCase::randomLong)); - break; - case 10: - newState.setShrinkIndexName(randomValueOtherThan(toMutate.shrinkIndexName(), () -> randomAlphaOfLengthBetween(5, 20))); - break; - case 11: - newState.setSnapshotRepository( - randomValueOtherThan(toMutate.snapshotRepository(), () -> randomAlphaOfLengthBetween(5, 20)) - ); - break; - case 12: - newState.setSnapshotIndexName(randomValueOtherThan(toMutate.snapshotIndexName(), () -> randomAlphaOfLengthBetween(5, 20))); - break; - case 13: - newState.setSnapshotName(randomValueOtherThan(toMutate.snapshotName(), () -> randomAlphaOfLengthBetween(5, 20))); - break; - case 14: - newState.setDownsampleIndexName( - randomValueOtherThan(toMutate.downsampleIndexName(), () -> randomAlphaOfLengthBetween(5, 20)) - ); - break; - case 15: - newState.setIsAutoRetryableError(randomValueOtherThan(toMutate.isAutoRetryableError(), ESTestCase::randomBoolean)); - break; - case 16: - newState.setFailedStepRetryCount(randomValueOtherThan(toMutate.failedStepRetryCount(), ESTestCase::randomInt)); - break; - case 17: - return LifecycleExecutionState.builder().build(); - default: - throw new IllegalStateException("unknown randomization branch"); + switch (randomIntBetween(0, 18)) { + case 0 -> newState.setPhase(randomValueOtherThan(toMutate.phase(), this::randomString)); + case 1 -> newState.setAction(randomValueOtherThan(toMutate.action(), this::randomString)); + case 2 -> newState.setStep(randomValueOtherThan(toMutate.step(), this::randomString)); + case 3 -> newState.setPhaseDefinition(randomValueOtherThan(toMutate.phaseDefinition(), this::randomString)); + case 4 -> newState.setFailedStep(randomValueOtherThan(toMutate.failedStep(), this::randomString)); + case 5 -> newState.setStepInfo(randomValueOtherThan(toMutate.stepInfo(), this::randomString)); + case 6 -> newState.setPreviousStepInfo(randomValueOtherThan(toMutate.previousStepInfo(), this::randomString)); + case 7 -> newState.setPhaseTime(randomValueOtherThan(toMutate.phaseTime(), ESTestCase::randomLong)); + case 8 -> newState.setActionTime(randomValueOtherThan(toMutate.actionTime(), ESTestCase::randomLong)); + case 9 -> newState.setStepTime(randomValueOtherThan(toMutate.stepTime(), ESTestCase::randomLong)); + case 10 -> newState.setIndexCreationDate(randomValueOtherThan(toMutate.lifecycleDate(), ESTestCase::randomLong)); + case 11 -> newState.setShrinkIndexName(randomValueOtherThan(toMutate.shrinkIndexName(), this::randomString)); + case 12 -> newState.setSnapshotRepository(randomValueOtherThan(toMutate.snapshotRepository(), this::randomString)); + case 13 -> newState.setSnapshotIndexName(randomValueOtherThan(toMutate.snapshotIndexName(), this::randomString)); + case 14 -> newState.setSnapshotName(randomValueOtherThan(toMutate.snapshotName(), this::randomString)); + case 15 -> newState.setDownsampleIndexName(randomValueOtherThan(toMutate.downsampleIndexName(), this::randomString)); + case 16 -> newState.setIsAutoRetryableError(randomValueOtherThan(toMutate.isAutoRetryableError(), ESTestCase::randomBoolean)); + case 17 -> newState.setFailedStepRetryCount(randomValueOtherThan(toMutate.failedStepRetryCount(), ESTestCase::randomInt)); + case 18 -> { + return LifecycleExecutionState.EMPTY_STATE; + } + default -> throw new IllegalStateException("unknown randomization branch"); } return newState.build(); } + private String randomString() { + return randomAlphaOfLengthBetween(5, 20); + } + static Map createCustomMetadata() { String phase = randomAlphaOfLengthBetween(5, 20); String action = randomAlphaOfLengthBetween(5, 20); String step = randomAlphaOfLengthBetween(5, 20); String failedStep = randomAlphaOfLengthBetween(5, 20); String stepInfo = randomAlphaOfLengthBetween(15, 50); + String previousStepInfo = randomAlphaOfLengthBetween(15, 50); String phaseDefinition = randomAlphaOfLengthBetween(15, 50); String repositoryName = randomAlphaOfLengthBetween(10, 20); String snapshotName = randomAlphaOfLengthBetween(10, 20); @@ -220,6 +184,7 @@ static Map createCustomMetadata() { customMetadata.put("step", step); customMetadata.put("failed_step", failedStep); customMetadata.put("step_info", stepInfo); + customMetadata.put("previous_step_info", previousStepInfo); customMetadata.put("phase_definition", phaseDefinition); customMetadata.put("creation_date", String.valueOf(indexCreationDate)); customMetadata.put("phase_time", String.valueOf(phaseTime)); diff --git a/x-pack/plugin/core/template-resources/src/main/resources/logs@settings-logsdb.json b/x-pack/plugin/core/template-resources/src/main/resources/logs@settings-logsdb.json deleted file mode 100644 index eabdd6fb9fad2..0000000000000 --- a/x-pack/plugin/core/template-resources/src/main/resources/logs@settings-logsdb.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "template": { - "settings": { - "index": { - "lifecycle": { - "name": "logs" - }, - "mode": "logsdb", - "codec": "best_compression", - "mapping": { - "ignore_malformed": true, - "total_fields": { - "ignore_dynamic_beyond_limit": true - } - }, - "default_pipeline": "logs@default-pipeline" - } - } - }, - "_meta": { - "description": "default settings for the logs index template installed by x-pack", - "managed": true - }, - "version": ${xpack.stack.template.version}, - "deprecated": ${xpack.stack.template.deprecated} -} diff --git a/x-pack/plugin/ent-search/src/test/java/org/elasticsearch/xpack/application/analytics/action/PutAnalyticsCollectionResponseBWCSerializingTests.java b/x-pack/plugin/ent-search/src/test/java/org/elasticsearch/xpack/application/analytics/action/PutAnalyticsCollectionResponseBWCSerializingTests.java index f6a13477acae7..f4b85af251c57 100644 --- a/x-pack/plugin/ent-search/src/test/java/org/elasticsearch/xpack/application/analytics/action/PutAnalyticsCollectionResponseBWCSerializingTests.java +++ b/x-pack/plugin/ent-search/src/test/java/org/elasticsearch/xpack/application/analytics/action/PutAnalyticsCollectionResponseBWCSerializingTests.java @@ -8,8 +8,8 @@ package org.elasticsearch.xpack.application.analytics.action; import org.elasticsearch.TransportVersion; -import org.elasticsearch.action.support.master.AcknowledgedResponse; import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.test.rest.TestResponseParsers; import org.elasticsearch.xcontent.XContentParser; import org.elasticsearch.xpack.core.ml.AbstractBWCSerializationTestCase; @@ -41,7 +41,7 @@ protected PutAnalyticsCollectionAction.Response mutateInstance(PutAnalyticsColle @Override protected PutAnalyticsCollectionAction.Response doParseInstance(XContentParser parser) throws IOException { - return new PutAnalyticsCollectionAction.Response(AcknowledgedResponse.fromXContent(parser).isAcknowledged(), this.name); + return new PutAnalyticsCollectionAction.Response(TestResponseParsers.parseAcknowledgedResponse(parser).isAcknowledged(), this.name); } @Override diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/util/StringUtils.java b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/util/StringUtils.java index cd0ade2054ce6..1bfd94730c4fc 100644 --- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/util/StringUtils.java +++ b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/util/StringUtils.java @@ -14,6 +14,7 @@ import org.elasticsearch.common.network.InetAddresses; import org.elasticsearch.core.Tuple; import org.elasticsearch.search.builder.SearchSourceBuilder; +import org.elasticsearch.transport.RemoteClusterAware; import org.elasticsearch.xcontent.ToXContent; import org.elasticsearch.xcontent.XContentBuilder; import org.elasticsearch.xcontent.XContentFactory; @@ -27,7 +28,6 @@ import java.util.StringJoiner; import static java.util.stream.Collectors.toList; -import static org.elasticsearch.transport.RemoteClusterAware.REMOTE_CLUSTER_INDEX_SEPARATOR; import static org.elasticsearch.transport.RemoteClusterAware.buildRemoteIndexName; import static org.elasticsearch.xpack.esql.core.util.NumericUtils.isUnsignedLong; @@ -378,10 +378,8 @@ public static String ordinal(int i) { } public static Tuple splitQualifiedIndex(String indexName) { - int separatorOffset = indexName.indexOf(REMOTE_CLUSTER_INDEX_SEPARATOR); - return separatorOffset > 0 - ? Tuple.tuple(indexName.substring(0, separatorOffset), indexName.substring(separatorOffset + 1)) - : Tuple.tuple(null, indexName); + String[] split = RemoteClusterAware.splitIndexName(indexName); + return Tuple.tuple(split[0], split[1]); } public static String qualifyAndJoinIndices(String cluster, String[] indices) { @@ -393,7 +391,7 @@ public static String qualifyAndJoinIndices(String cluster, String[] indices) { } public static boolean isQualified(String indexWildcard) { - return indexWildcard.indexOf(REMOTE_CLUSTER_INDEX_SEPARATOR) > 0; + return RemoteClusterAware.isRemoteIndexName(indexWildcard); } public static boolean isInteger(String value) { diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/MaxFloatAggregator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/MaxFloatAggregator.java index eea436541069e..f5d4537c3c370 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/MaxFloatAggregator.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/MaxFloatAggregator.java @@ -16,7 +16,7 @@ class MaxFloatAggregator { public static float init() { - return Float.MIN_VALUE; + return -Float.MAX_VALUE; } public static float combine(float current, float v) { diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/MaxFloatGroupingAggregatorFunctionTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/MaxFloatGroupingAggregatorFunctionTests.java index 0abcb05a91af6..e4da581a59136 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/MaxFloatGroupingAggregatorFunctionTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/MaxFloatGroupingAggregatorFunctionTests.java @@ -27,7 +27,8 @@ public class MaxFloatGroupingAggregatorFunctionTests extends GroupingAggregatorF protected SourceOperator simpleInput(BlockFactory blockFactory, int end) { return new LongFloatTupleBlockSourceOperator( blockFactory, - LongStream.range(0, end).mapToObj(l -> Tuple.tuple(randomLongBetween(0, 4), randomFloat())) + LongStream.range(0, end) + .mapToObj(l -> Tuple.tuple(randomLongBetween(0, 4), randomFloatBetween(-Float.MAX_VALUE, Float.MAX_VALUE, true))) ); } diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/date.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/date.csv-spec index b0a47bd34592c..f52829741ed6e 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/date.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/date.csv-spec @@ -554,7 +554,7 @@ emp_no:integer | year:long | month:long | day:long dateFormatLocale from employees | where emp_no == 10049 or emp_no == 10050 | sort emp_no | eval birth_month = date_format("MMMM", birth_date) | keep emp_no, birth_date, birth_month; -warningRegex:Date format \[MMMM\] contains textual field specifiers that could change in JDK 23 +warningRegex:Date format \[MMMM\] contains textual field specifiers that could change in JDK 23.* ignoreOrder:true emp_no:integer | birth_date:datetime | birth_month:keyword diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/date_nanos.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/date_nanos.csv-spec index 883010eb484db..ad7149b0f742f 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/date_nanos.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/date_nanos.csv-spec @@ -204,7 +204,7 @@ null date nanos to long, index version required_capability: to_date_nanos -FROM date_nanos | WHERE millis > "2020-02-02" | EVAL l = TO_LONG(nanos) | KEEP l; +FROM date_nanos | WHERE millis > "2020-02-02" | EVAL l = TO_LONG(nanos) | SORT nanos DESC | KEEP l; l:long 1698069301543123456 @@ -219,7 +219,7 @@ l:long long to date nanos, index version required_capability: to_date_nanos -FROM date_nanos | WHERE millis > "2020-02-02" | EVAL d = TO_DATE_NANOS(num) | KEEP d; +FROM date_nanos | WHERE millis > "2020-02-02" | EVAL d = TO_DATE_NANOS(num) | SORT nanos DESC | KEEP d; d:date_nanos 2023-10-23T13:55:01.543123456Z @@ -234,7 +234,7 @@ d:date_nanos date_nanos to date nanos, index version required_capability: to_date_nanos -FROM date_nanos | WHERE millis > "2020-02-02" | EVAL d = TO_DATE_NANOS(nanos) | KEEP d; +FROM date_nanos | WHERE millis > "2020-02-02" | EVAL d = TO_DATE_NANOS(nanos) | SORT nanos DESC | KEEP d; d:date_nanos 2023-10-23T13:55:01.543123456Z diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java index 63462d1721f71..90957f55141b9 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java @@ -34,10 +34,6 @@ import org.elasticsearch.xpack.esql.core.expression.function.scalar.ScalarFunction; import org.elasticsearch.xpack.esql.core.expression.predicate.BinaryOperator; import org.elasticsearch.xpack.esql.core.expression.predicate.operator.comparison.BinaryComparison; -import org.elasticsearch.xpack.esql.core.rule.ParameterizedRule; -import org.elasticsearch.xpack.esql.core.rule.ParameterizedRuleExecutor; -import org.elasticsearch.xpack.esql.core.rule.Rule; -import org.elasticsearch.xpack.esql.core.rule.RuleExecutor; import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.core.type.DataType; import org.elasticsearch.xpack.esql.core.type.EsField; @@ -81,6 +77,10 @@ import org.elasticsearch.xpack.esql.plan.logical.local.EsqlProject; import org.elasticsearch.xpack.esql.plan.logical.local.LocalRelation; import org.elasticsearch.xpack.esql.plan.logical.local.LocalSupplier; +import org.elasticsearch.xpack.esql.rule.ParameterizedRule; +import org.elasticsearch.xpack.esql.rule.ParameterizedRuleExecutor; +import org.elasticsearch.xpack.esql.rule.Rule; +import org.elasticsearch.xpack.esql.rule.RuleExecutor; import org.elasticsearch.xpack.esql.session.Configuration; import org.elasticsearch.xpack.esql.stats.FeatureMetric; import org.elasticsearch.xpack.esql.type.EsqlDataTypeConverter; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/AnalyzerRules.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/AnalyzerRules.java index 89f9d694e3a16..4889583d1b8a9 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/AnalyzerRules.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/AnalyzerRules.java @@ -9,9 +9,9 @@ import org.elasticsearch.xpack.esql.core.expression.Attribute; import org.elasticsearch.xpack.esql.core.expression.UnresolvedAttribute; -import org.elasticsearch.xpack.esql.core.rule.ParameterizedRule; -import org.elasticsearch.xpack.esql.core.rule.Rule; import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan; +import org.elasticsearch.xpack.esql.rule.ParameterizedRule; +import org.elasticsearch.xpack.esql.rule.Rule; import java.util.ArrayList; import java.util.Collection; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalLogicalPlanOptimizer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalLogicalPlanOptimizer.java index 8c54b61dc803d..44334ff112bad 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalLogicalPlanOptimizer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalLogicalPlanOptimizer.java @@ -7,7 +7,6 @@ package org.elasticsearch.xpack.esql.optimizer; -import org.elasticsearch.xpack.esql.core.rule.ParameterizedRuleExecutor; import org.elasticsearch.xpack.esql.optimizer.rules.logical.PropagateEmptyRelation; import org.elasticsearch.xpack.esql.optimizer.rules.logical.local.InferIsNotNull; import org.elasticsearch.xpack.esql.optimizer.rules.logical.local.InferNonNullAggConstraint; @@ -15,6 +14,7 @@ import org.elasticsearch.xpack.esql.optimizer.rules.logical.local.ReplaceMissingFieldWithNull; import org.elasticsearch.xpack.esql.optimizer.rules.logical.local.ReplaceTopNWithLimitAndSort; import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan; +import org.elasticsearch.xpack.esql.rule.ParameterizedRuleExecutor; import java.util.ArrayList; import java.util.List; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizer.java index 20f3e5c9150e5..48bafd8eef00e 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizer.java @@ -9,8 +9,6 @@ import org.elasticsearch.xpack.esql.VerificationException; import org.elasticsearch.xpack.esql.common.Failure; -import org.elasticsearch.xpack.esql.core.rule.ParameterizedRuleExecutor; -import org.elasticsearch.xpack.esql.core.rule.Rule; import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.EnableSpatialDistancePushdown; import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.InsertFieldExtraction; import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.PushFiltersToSource; @@ -20,6 +18,8 @@ import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.ReplaceSourceAttributes; import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.SpatialDocValuesExtraction; import org.elasticsearch.xpack.esql.plan.physical.PhysicalPlan; +import org.elasticsearch.xpack.esql.rule.ParameterizedRuleExecutor; +import org.elasticsearch.xpack.esql.rule.Rule; import java.util.ArrayList; import java.util.Collection; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizer.java index 459e3f4d0284c..bfbf5a8f0c66f 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizer.java @@ -9,7 +9,6 @@ import org.elasticsearch.xpack.esql.VerificationException; import org.elasticsearch.xpack.esql.common.Failures; -import org.elasticsearch.xpack.esql.core.rule.ParameterizedRuleExecutor; import org.elasticsearch.xpack.esql.core.type.DataType; import org.elasticsearch.xpack.esql.optimizer.rules.logical.AddDefaultTopN; import org.elasticsearch.xpack.esql.optimizer.rules.logical.BooleanFunctionEqualsElimination; @@ -58,6 +57,7 @@ import org.elasticsearch.xpack.esql.optimizer.rules.logical.SubstituteSurrogates; import org.elasticsearch.xpack.esql.optimizer.rules.logical.TranslateMetricsAggregate; import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan; +import org.elasticsearch.xpack.esql.rule.ParameterizedRuleExecutor; import java.util.List; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/PhysicalOptimizerRules.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/PhysicalOptimizerRules.java index 644bfa7b807ef..482a89b50c865 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/PhysicalOptimizerRules.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/PhysicalOptimizerRules.java @@ -8,11 +8,11 @@ package org.elasticsearch.xpack.esql.optimizer; import org.elasticsearch.xpack.esql.core.expression.Expression; -import org.elasticsearch.xpack.esql.core.rule.ParameterizedRule; -import org.elasticsearch.xpack.esql.core.rule.Rule; import org.elasticsearch.xpack.esql.core.util.ReflectionUtils; import org.elasticsearch.xpack.esql.optimizer.rules.logical.OptimizerRules.TransformDirection; import org.elasticsearch.xpack.esql.plan.physical.PhysicalPlan; +import org.elasticsearch.xpack.esql.rule.ParameterizedRule; +import org.elasticsearch.xpack.esql.rule.Rule; public class PhysicalOptimizerRules { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/PhysicalPlanOptimizer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/PhysicalPlanOptimizer.java index 03b9705fefc79..19f6bc810d01f 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/PhysicalPlanOptimizer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/PhysicalPlanOptimizer.java @@ -9,11 +9,11 @@ import org.elasticsearch.xpack.esql.VerificationException; import org.elasticsearch.xpack.esql.common.Failure; -import org.elasticsearch.xpack.esql.core.rule.ParameterizedRuleExecutor; -import org.elasticsearch.xpack.esql.core.rule.RuleExecutor; import org.elasticsearch.xpack.esql.optimizer.rules.physical.ProjectAwayColumns; import org.elasticsearch.xpack.esql.plan.physical.FragmentExec; import org.elasticsearch.xpack.esql.plan.physical.PhysicalPlan; +import org.elasticsearch.xpack.esql.rule.ParameterizedRuleExecutor; +import org.elasticsearch.xpack.esql.rule.RuleExecutor; import java.util.Collection; import java.util.List; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/OptimizerRules.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/OptimizerRules.java index f087fab06828e..2a0b2a6af36aa 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/OptimizerRules.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/OptimizerRules.java @@ -7,10 +7,10 @@ package org.elasticsearch.xpack.esql.optimizer.rules.logical; import org.elasticsearch.xpack.esql.core.expression.Expression; -import org.elasticsearch.xpack.esql.core.rule.ParameterizedRule; -import org.elasticsearch.xpack.esql.core.rule.Rule; import org.elasticsearch.xpack.esql.core.util.ReflectionUtils; import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan; +import org.elasticsearch.xpack.esql.rule.ParameterizedRule; +import org.elasticsearch.xpack.esql.rule.Rule; public final class OptimizerRules { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PropagateEvalFoldables.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PropagateEvalFoldables.java index 139f192d3c14e..73eaa9220fd84 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PropagateEvalFoldables.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PropagateEvalFoldables.java @@ -12,10 +12,10 @@ import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.expression.Literal; import org.elasticsearch.xpack.esql.core.expression.ReferenceAttribute; -import org.elasticsearch.xpack.esql.core.rule.Rule; import org.elasticsearch.xpack.esql.plan.logical.Eval; import org.elasticsearch.xpack.esql.plan.logical.Filter; import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan; +import org.elasticsearch.xpack.esql.rule.Rule; /** * Replace any reference attribute with its source, if it does not affect the result. diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PruneColumns.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PruneColumns.java index 62f4e391f13ec..e01608c546090 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PruneColumns.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PruneColumns.java @@ -13,7 +13,6 @@ import org.elasticsearch.xpack.esql.core.expression.EmptyAttribute; import org.elasticsearch.xpack.esql.core.expression.Expressions; import org.elasticsearch.xpack.esql.core.expression.NamedExpression; -import org.elasticsearch.xpack.esql.core.rule.Rule; import org.elasticsearch.xpack.esql.core.util.Holder; import org.elasticsearch.xpack.esql.plan.logical.Aggregate; import org.elasticsearch.xpack.esql.plan.logical.Eval; @@ -23,6 +22,7 @@ import org.elasticsearch.xpack.esql.plan.logical.local.LocalRelation; import org.elasticsearch.xpack.esql.plan.logical.local.LocalSupplier; import org.elasticsearch.xpack.esql.planner.PlannerUtils; +import org.elasticsearch.xpack.esql.rule.Rule; import java.util.ArrayList; import java.util.List; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/ReplaceAliasingEvalWithProject.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/ReplaceAliasingEvalWithProject.java index e57a95f0f7dad..dc9421a22a69c 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/ReplaceAliasingEvalWithProject.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/ReplaceAliasingEvalWithProject.java @@ -12,12 +12,12 @@ import org.elasticsearch.xpack.esql.core.expression.AttributeMap; import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.expression.NamedExpression; -import org.elasticsearch.xpack.esql.core.rule.Rule; import org.elasticsearch.xpack.esql.core.util.Holder; import org.elasticsearch.xpack.esql.plan.logical.Aggregate; import org.elasticsearch.xpack.esql.plan.logical.Eval; import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan; import org.elasticsearch.xpack.esql.plan.logical.Project; +import org.elasticsearch.xpack.esql.rule.Rule; import java.util.ArrayList; import java.util.List; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/SetAsOptimized.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/SetAsOptimized.java index c9a2b44e40ebf..f31b1ef0ceaf4 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/SetAsOptimized.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/SetAsOptimized.java @@ -7,8 +7,8 @@ package org.elasticsearch.xpack.esql.optimizer.rules.logical; -import org.elasticsearch.xpack.esql.core.rule.Rule; import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan; +import org.elasticsearch.xpack.esql.rule.Rule; public final class SetAsOptimized extends Rule { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/InferIsNotNull.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/InferIsNotNull.java index 0e5bb74d1cdf9..d161071fe2839 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/InferIsNotNull.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/InferIsNotNull.java @@ -13,11 +13,11 @@ import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.expression.predicate.Predicates; import org.elasticsearch.xpack.esql.core.expression.predicate.nulls.IsNotNull; -import org.elasticsearch.xpack.esql.core.rule.Rule; import org.elasticsearch.xpack.esql.core.util.CollectionUtils; import org.elasticsearch.xpack.esql.expression.function.scalar.conditional.Case; import org.elasticsearch.xpack.esql.expression.function.scalar.nulls.Coalesce; import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan; +import org.elasticsearch.xpack.esql.rule.Rule; import java.util.LinkedHashSet; import java.util.Set; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/ReplaceMissingFieldWithNull.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/ReplaceMissingFieldWithNull.java index b13667465ce63..0fa6d61a0ca9b 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/ReplaceMissingFieldWithNull.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/ReplaceMissingFieldWithNull.java @@ -12,7 +12,6 @@ import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; import org.elasticsearch.xpack.esql.core.expression.Literal; import org.elasticsearch.xpack.esql.core.expression.NamedExpression; -import org.elasticsearch.xpack.esql.core.rule.ParameterizedRule; import org.elasticsearch.xpack.esql.core.type.DataType; import org.elasticsearch.xpack.esql.optimizer.LocalLogicalOptimizerContext; import org.elasticsearch.xpack.esql.plan.logical.Aggregate; @@ -25,6 +24,7 @@ import org.elasticsearch.xpack.esql.plan.logical.RegexExtract; import org.elasticsearch.xpack.esql.plan.logical.TopN; import org.elasticsearch.xpack.esql.plan.logical.local.LocalRelation; +import org.elasticsearch.xpack.esql.rule.ParameterizedRule; import org.elasticsearch.xpack.esql.stats.SearchStats; import java.util.ArrayList; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/ProjectAwayColumns.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/ProjectAwayColumns.java index bee27acd06ec0..290ae2d3ff1be 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/ProjectAwayColumns.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/ProjectAwayColumns.java @@ -12,7 +12,6 @@ import org.elasticsearch.xpack.esql.core.expression.AttributeSet; import org.elasticsearch.xpack.esql.core.expression.Expressions; import org.elasticsearch.xpack.esql.core.expression.Literal; -import org.elasticsearch.xpack.esql.core.rule.Rule; import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.core.util.Holder; import org.elasticsearch.xpack.esql.plan.logical.Aggregate; @@ -22,6 +21,7 @@ import org.elasticsearch.xpack.esql.plan.physical.FragmentExec; import org.elasticsearch.xpack.esql.plan.physical.PhysicalPlan; import org.elasticsearch.xpack.esql.plan.physical.UnaryExec; +import org.elasticsearch.xpack.esql.rule.Rule; import java.util.ArrayList; import java.util.List; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/InsertFieldExtraction.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/InsertFieldExtraction.java index 7186a5194a262..c215e86b0045a 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/InsertFieldExtraction.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/InsertFieldExtraction.java @@ -12,12 +12,12 @@ import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; import org.elasticsearch.xpack.esql.core.expression.MetadataAttribute; import org.elasticsearch.xpack.esql.core.expression.TypedAttribute; -import org.elasticsearch.xpack.esql.core.rule.Rule; import org.elasticsearch.xpack.esql.optimizer.rules.physical.ProjectAwayColumns; import org.elasticsearch.xpack.esql.plan.physical.AggregateExec; import org.elasticsearch.xpack.esql.plan.physical.FieldExtractExec; import org.elasticsearch.xpack.esql.plan.physical.PhysicalPlan; import org.elasticsearch.xpack.esql.plan.physical.UnaryExec; +import org.elasticsearch.xpack.esql.rule.Rule; import java.util.LinkedHashSet; import java.util.LinkedList; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/TableIdentifier.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/TableIdentifier.java index ceefe4e254557..532d93eec48af 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/TableIdentifier.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/TableIdentifier.java @@ -10,6 +10,8 @@ import java.util.Objects; +import static org.elasticsearch.transport.RemoteClusterAware.REMOTE_CLUSTER_INDEX_SEPARATOR; + public class TableIdentifier { private final Source source; @@ -55,7 +57,7 @@ public Source source() { } public String qualifiedIndex() { - return cluster != null ? cluster + ":" + index : index; + return cluster != null ? cluster + REMOTE_CLUSTER_INDEX_SEPARATOR + index : index; } @Override @@ -63,7 +65,7 @@ public String toString() { StringBuilder builder = new StringBuilder(); if (cluster != null) { builder.append(cluster); - builder.append(":"); + builder.append(REMOTE_CLUSTER_INDEX_SEPARATOR); } builder.append(index); return builder.toString(); diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/rule/ParameterizedRule.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/rule/ParameterizedRule.java similarity index 92% rename from x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/rule/ParameterizedRule.java rename to x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/rule/ParameterizedRule.java index 5aa7318cb74b1..ba771d503da08 100644 --- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/rule/ParameterizedRule.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/rule/ParameterizedRule.java @@ -5,7 +5,7 @@ * 2.0. */ -package org.elasticsearch.xpack.esql.core.rule; +package org.elasticsearch.xpack.esql.rule; import org.elasticsearch.xpack.esql.core.tree.Node; diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/rule/ParameterizedRuleExecutor.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/rule/ParameterizedRuleExecutor.java similarity index 95% rename from x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/rule/ParameterizedRuleExecutor.java rename to x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/rule/ParameterizedRuleExecutor.java index bfce2b42c0328..fedef03799093 100644 --- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/rule/ParameterizedRuleExecutor.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/rule/ParameterizedRuleExecutor.java @@ -5,7 +5,7 @@ * 2.0. */ -package org.elasticsearch.xpack.esql.core.rule; +package org.elasticsearch.xpack.esql.rule; import org.elasticsearch.xpack.esql.core.tree.Node; diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/rule/Rule.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/rule/Rule.java similarity index 96% rename from x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/rule/Rule.java rename to x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/rule/Rule.java index 163b1f89f2abb..d8b16d350e3ea 100644 --- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/rule/Rule.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/rule/Rule.java @@ -4,7 +4,7 @@ * 2.0; you may not use this file except in compliance with the Elastic License * 2.0. */ -package org.elasticsearch.xpack.esql.core.rule; +package org.elasticsearch.xpack.esql.rule; import org.elasticsearch.logging.LogManager; import org.elasticsearch.logging.Logger; diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/rule/RuleExecutionException.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/rule/RuleExecutionException.java similarity index 91% rename from x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/rule/RuleExecutionException.java rename to x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/rule/RuleExecutionException.java index 393fd3765a01a..218d14297e956 100644 --- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/rule/RuleExecutionException.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/rule/RuleExecutionException.java @@ -4,7 +4,7 @@ * 2.0; you may not use this file except in compliance with the Elastic License * 2.0. */ -package org.elasticsearch.xpack.esql.core.rule; +package org.elasticsearch.xpack.esql.rule; import org.elasticsearch.xpack.esql.core.QlServerException; diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/rule/RuleExecutor.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/rule/RuleExecutor.java similarity index 99% rename from x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/rule/RuleExecutor.java rename to x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/rule/RuleExecutor.java index ba873e690be7e..3d73c0d45e9a0 100644 --- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/rule/RuleExecutor.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/rule/RuleExecutor.java @@ -4,7 +4,7 @@ * 2.0; you may not use this file except in compliance with the Elastic License * 2.0. */ -package org.elasticsearch.xpack.esql.core.rule; +package org.elasticsearch.xpack.esql.rule; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/TestPhysicalPlanOptimizer.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/TestPhysicalPlanOptimizer.java index 9c8886dbf0b6e..e26779e075b68 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/TestPhysicalPlanOptimizer.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/TestPhysicalPlanOptimizer.java @@ -7,8 +7,8 @@ package org.elasticsearch.xpack.esql.optimizer; -import org.elasticsearch.xpack.esql.core.rule.RuleExecutor; import org.elasticsearch.xpack.esql.plan.physical.PhysicalPlan; +import org.elasticsearch.xpack.esql.rule.RuleExecutor; public class TestPhysicalPlanOptimizer extends PhysicalPlanOptimizer { diff --git a/x-pack/plugin/frozen-indices/src/main/java/org/elasticsearch/xpack/frozen/rest/action/RestFreezeIndexAction.java b/x-pack/plugin/frozen-indices/src/main/java/org/elasticsearch/xpack/frozen/rest/action/RestFreezeIndexAction.java index 0daf2d8a1ebf5..f3e57d61d1b36 100644 --- a/x-pack/plugin/frozen-indices/src/main/java/org/elasticsearch/xpack/frozen/rest/action/RestFreezeIndexAction.java +++ b/x-pack/plugin/frozen-indices/src/main/java/org/elasticsearch/xpack/frozen/rest/action/RestFreezeIndexAction.java @@ -13,6 +13,7 @@ import org.elasticsearch.client.internal.node.NodeClient; import org.elasticsearch.common.Strings; import org.elasticsearch.core.RestApiVersion; +import org.elasticsearch.core.UpdateForV9; import org.elasticsearch.protocol.xpack.frozen.FreezeRequest; import org.elasticsearch.rest.BaseRestHandler; import org.elasticsearch.rest.RestRequest; @@ -37,11 +38,17 @@ public final class RestFreezeIndexAction extends BaseRestHandler { private static final String UNFREEZE_DEPRECATED = "Frozen indices are deprecated because they provide no benefit given improvements " + "in heap memory utilization. They will be removed in a future release."; + @UpdateForV9 + // these routes were ".deprecated" in RestApiVersion.V_8 which will require use of REST API compatibility headers to access + // this API in v9. It is unclear if this was intentional for v9, and the code has been updated to ".deprecateAndKeep" which will + // continue to emit deprecations warnings but will not require any special headers to access the API in v9. + // Please review and update the code and tests as needed. The original code remains commented out below for reference. @Override public List routes() { return List.of( Route.builder(POST, "/{index}/_freeze").deprecated(FREEZE_REMOVED, RestApiVersion.V_7).build(), - Route.builder(POST, "/{index}/_unfreeze").deprecated(UNFREEZE_DEPRECATED, RestApiVersion.V_8).build() + // Route.builder(POST, "/{index}/_unfreeze").deprecated(UNFREEZE_DEPRECATED, RestApiVersion.V_8).build() + Route.builder(POST, "/{index}/_unfreeze").deprecateAndKeep(UNFREEZE_DEPRECATED).build() ); } diff --git a/x-pack/plugin/graph/src/test/java/org/elasticsearch/xpack/graph/rest/action/RestGraphActionTests.java b/x-pack/plugin/graph/src/test/java/org/elasticsearch/xpack/graph/rest/action/RestGraphActionTests.java deleted file mode 100644 index 4961efd7253ec..0000000000000 --- a/x-pack/plugin/graph/src/test/java/org/elasticsearch/xpack/graph/rest/action/RestGraphActionTests.java +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. - */ - -package org.elasticsearch.xpack.graph.rest.action; - -import org.elasticsearch.common.bytes.BytesArray; -import org.elasticsearch.core.RestApiVersion; -import org.elasticsearch.core.Tuple; -import org.elasticsearch.protocol.xpack.graph.GraphExploreRequest; -import org.elasticsearch.protocol.xpack.graph.GraphExploreResponse; -import org.elasticsearch.rest.RestRequest; -import org.elasticsearch.test.rest.FakeRestRequest; -import org.elasticsearch.test.rest.RestActionTestCase; -import org.elasticsearch.xcontent.XContentType; -import org.junit.Before; -import org.mockito.Mockito; - -import java.util.Collections; -import java.util.List; -import java.util.Map; - -import static org.hamcrest.Matchers.instanceOf; - -public final class RestGraphActionTests extends RestActionTestCase { - private final List compatibleMediaType = Collections.singletonList(randomCompatibleMediaType(RestApiVersion.V_7)); - - @Before - public void setUpAction() { - controller().registerHandler(new RestGraphAction()); - verifyingClient.setExecuteVerifier((actionType, request) -> { - assertThat(request, instanceOf(GraphExploreRequest.class)); - return Mockito.mock(GraphExploreResponse.class); - }); - } - - public void testTypeInPath() { - for (Tuple methodAndPath : List.of( - Tuple.tuple(RestRequest.Method.GET, "/some_index/some_type/_graph/explore"), - Tuple.tuple(RestRequest.Method.POST, "/some_index/some_type/_graph/explore"), - Tuple.tuple(RestRequest.Method.GET, "/some_index/some_type/_xpack/graph/_explore"), - Tuple.tuple(RestRequest.Method.POST, "/some_index/some_type/_xpack/graph/_explore") - )) { - - RestRequest request = new FakeRestRequest.Builder(xContentRegistry()).withHeaders( - Map.of( - "Accept", - compatibleMediaType, - "Content-Type", - Collections.singletonList(compatibleMediaType(XContentType.VND_JSON, RestApiVersion.V_7)) - ) - ).withMethod(methodAndPath.v1()).withPath(methodAndPath.v2()).withContent(new BytesArray("{}"), null).build(); - - dispatchRequest(request); - assertCriticalWarnings(RestGraphAction.TYPES_DEPRECATION_MESSAGE); - } - } -} diff --git a/x-pack/plugin/ilm/qa/multi-node/src/javaRestTest/java/org/elasticsearch/xpack/ilm/ExplainLifecycleIT.java b/x-pack/plugin/ilm/qa/multi-node/src/javaRestTest/java/org/elasticsearch/xpack/ilm/ExplainLifecycleIT.java index dc8c248bbbad6..ec8f7c230b1d3 100644 --- a/x-pack/plugin/ilm/qa/multi-node/src/javaRestTest/java/org/elasticsearch/xpack/ilm/ExplainLifecycleIT.java +++ b/x-pack/plugin/ilm/qa/multi-node/src/javaRestTest/java/org/elasticsearch/xpack/ilm/ExplainLifecycleIT.java @@ -30,6 +30,7 @@ import org.elasticsearch.xpack.core.ilm.ShrinkAction; import org.junit.Before; +import java.util.Formatter; import java.util.HashMap; import java.util.Locale; import java.util.Map; @@ -42,6 +43,7 @@ import static org.elasticsearch.xpack.TimeSeriesRestDriver.explainIndex; import static org.hamcrest.Matchers.allOf; import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.greaterThan; import static org.hamcrest.Matchers.greaterThanOrEqualTo; import static org.hamcrest.Matchers.hasKey; import static org.hamcrest.Matchers.is; @@ -257,6 +259,64 @@ public void testExplainOrder() throws Exception { ); } + public void testStepInfoPreservedOnAutoRetry() throws Exception { + String policyName = "policy-" + randomAlphaOfLength(5).toLowerCase(Locale.ROOT); + + Request createPolice = new Request("PUT", "_ilm/policy/" + policyName); + createPolice.setJsonEntity(""" + { + "policy": { + "phases": { + "hot": { + "actions": { + "rollover": { + "max_docs": 1 + } + } + } + } + } + } + """); + assertOK(client().performRequest(createPolice)); + + String aliasName = "step-info-test"; + String indexName = aliasName + "-" + randomAlphaOfLength(5).toLowerCase(Locale.ROOT); + + Request templateRequest = new Request("PUT", "_index_template/template_" + policyName); + + String templateBodyTemplate = """ + { + "index_patterns": ["%s-*"], + "template": { + "settings": { + "index.lifecycle.name": "%s", + "index.lifecycle.rollover_alias": "%s" + } + } + } + """; + Formatter formatter = new Formatter(Locale.ROOT); + templateRequest.setJsonEntity(formatter.format(templateBodyTemplate, aliasName, policyName, aliasName).toString()); + + assertOK(client().performRequest(templateRequest)); + + Request indexRequest = new Request("POST", "/" + indexName + "/_doc/1"); + indexRequest.setJsonEntity("{\"test\":\"value\"}"); + assertOK(client().performRequest(indexRequest)); + + assertBusy(() -> { + Map explainIndex = explainIndex(client(), indexName); + assertThat(explainIndex.get("failed_step_retry_count"), notNullValue()); + assertThat(explainIndex.get("previous_step_info"), notNullValue()); + assertThat((int) explainIndex.get("failed_step_retry_count"), greaterThan(0)); + assertThat( + explainIndex.get("previous_step_info").toString(), + containsString("rollover_alias [" + aliasName + "] does not point to index [" + indexName + "]") + ); + }); + } + private void assertUnmanagedIndex(Map explainIndexMap) { assertThat(explainIndexMap.get("managed"), is(false)); assertThat(explainIndexMap.get("time_since_index_creation"), is(nullValue())); diff --git a/x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/ilm/IndexLifecycleTransition.java b/x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/ilm/IndexLifecycleTransition.java index a87f2d4d2151e..b3f29535020bf 100644 --- a/x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/ilm/IndexLifecycleTransition.java +++ b/x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/ilm/IndexLifecycleTransition.java @@ -289,6 +289,7 @@ private static LifecycleExecutionState updateExecutionStateToStep( // clear any step info or error-related settings from the current step updatedState.setFailedStep(null); + updatedState.setPreviousStepInfo(existingState.stepInfo()); updatedState.setStepInfo(null); updatedState.setIsAutoRetryableError(null); updatedState.setFailedStepRetryCount(null); @@ -389,6 +390,7 @@ public static LifecycleExecutionState moveStateToNextActionAndUpdateCachedPhase( updatedState.setStep(nextStep.name()); updatedState.setStepTime(nowAsMillis); updatedState.setFailedStep(null); + updatedState.setPreviousStepInfo(existingState.stepInfo()); updatedState.setStepInfo(null); updatedState.setIsAutoRetryableError(null); updatedState.setFailedStepRetryCount(null); diff --git a/x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/ilm/action/TransportExplainLifecycleAction.java b/x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/ilm/action/TransportExplainLifecycleAction.java index 383dc6622f280..c50ea682ca9a2 100644 --- a/x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/ilm/action/TransportExplainLifecycleAction.java +++ b/x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/ilm/action/TransportExplainLifecycleAction.java @@ -127,10 +127,15 @@ static IndexLifecycleExplainResponse getIndexLifecycleExplainResponse( String policyName = indexMetadata.getLifecyclePolicyName(); String currentPhase = lifecycleState.phase(); String stepInfo = lifecycleState.stepInfo(); + String previousStepInfo = lifecycleState.previousStepInfo(); BytesArray stepInfoBytes = null; if (stepInfo != null) { stepInfoBytes = new BytesArray(stepInfo); } + BytesArray previousStepInfoBytes = null; + if (previousStepInfo != null) { + previousStepInfoBytes = new BytesArray(previousStepInfo); + } Long indexCreationDate = indexMetadata.getCreationDate(); // parse existing phase steps from the phase definition in the index settings @@ -182,6 +187,7 @@ static IndexLifecycleExplainResponse getIndexLifecycleExplainResponse( lifecycleState.snapshotName(), lifecycleState.shrinkIndexName(), stepInfoBytes, + previousStepInfoBytes, phaseExecutionInfo ); } else { diff --git a/x-pack/plugin/ilm/src/test/java/org/elasticsearch/xpack/ilm/IndexLifecycleTransitionTests.java b/x-pack/plugin/ilm/src/test/java/org/elasticsearch/xpack/ilm/IndexLifecycleTransitionTests.java index 9449e0c0574dc..37d586240eb7a 100644 --- a/x-pack/plugin/ilm/src/test/java/org/elasticsearch/xpack/ilm/IndexLifecycleTransitionTests.java +++ b/x-pack/plugin/ilm/src/test/java/org/elasticsearch/xpack/ilm/IndexLifecycleTransitionTests.java @@ -896,7 +896,7 @@ public void testMoveClusterStateToFailedNotOnError() { ); } - public void testMoveClusterStateToPreviouslyFailedStepAsAutomaticRetry() { + public void testMoveClusterStateToPreviouslyFailedStepAsAutomaticRetryAndSetsPreviousStepInfo() { String indexName = "my_index"; String policyName = "my_policy"; long now = randomNonNegativeLong(); @@ -921,6 +921,8 @@ public void testMoveClusterStateToPreviouslyFailedStepAsAutomaticRetry() { lifecycleState.setStep(errorStepKey.name()); lifecycleState.setStepTime(now); lifecycleState.setFailedStep(failedStepKey.name()); + String initialStepInfo = randomAlphaOfLengthBetween(10, 50); + lifecycleState.setStepInfo(initialStepInfo); ClusterState clusterState = buildClusterState( indexName, indexSettingsBuilder, @@ -938,6 +940,7 @@ public void testMoveClusterStateToPreviouslyFailedStepAsAutomaticRetry() { IndexLifecycleRunnerTests.assertClusterStateOnNextStep(clusterState, index, errorStepKey, failedStepKey, nextClusterState, now); LifecycleExecutionState executionState = nextClusterState.metadata().index(indexName).getLifecycleExecutionState(); assertThat(executionState.failedStepRetryCount(), is(1)); + assertThat(executionState.previousStepInfo(), is(initialStepInfo)); } public void testMoveToFailedStepDoesntRefreshCachedPhaseWhenUnsafe() { diff --git a/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/TextEmbeddingCrudIT.java b/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/TextEmbeddingCrudIT.java index 6c15b42dc65d5..01e8c30e3bf27 100644 --- a/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/TextEmbeddingCrudIT.java +++ b/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/TextEmbeddingCrudIT.java @@ -24,7 +24,7 @@ public class TextEmbeddingCrudIT extends InferenceBaseRestTest { public void testPutE5Small_withNoModelVariant() { { - String inferenceEntityId = randomAlphaOfLength(10).toLowerCase(); + String inferenceEntityId = "testPutE5Small_withNoModelVariant"; expectThrows( org.elasticsearch.client.ResponseException.class, () -> putTextEmbeddingModel(inferenceEntityId, noModelIdVariantJsonEntity()) @@ -33,7 +33,7 @@ public void testPutE5Small_withNoModelVariant() { } public void testPutE5Small_withPlatformAgnosticVariant() throws IOException { - String inferenceEntityId = randomAlphaOfLength(10).toLowerCase(); + String inferenceEntityId = "teste5mall_withplatformagnosticvariant"; putTextEmbeddingModel(inferenceEntityId, platformAgnosticModelVariantJsonEntity()); var models = getTrainedModel("_all"); assertThat(models.toString(), containsString("deployment_id=" + inferenceEntityId)); @@ -50,9 +50,8 @@ public void testPutE5Small_withPlatformAgnosticVariant() throws IOException { deleteTextEmbeddingModel(inferenceEntityId); } - @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/105198") public void testPutE5Small_withPlatformSpecificVariant() throws IOException { - String inferenceEntityId = randomAlphaOfLength(10).toLowerCase(); + String inferenceEntityId = "teste5mall_withplatformspecificvariant"; if ("linux-x86_64".equals(Platforms.PLATFORM_NAME)) { putTextEmbeddingModel(inferenceEntityId, platformSpecificModelVariantJsonEntity()); var models = getTrainedModel("_all"); @@ -77,7 +76,7 @@ public void testPutE5Small_withPlatformSpecificVariant() throws IOException { } public void testPutE5Small_withFakeModelVariant() { - String inferenceEntityId = randomAlphaOfLength(10).toLowerCase(); + String inferenceEntityId = "teste5mall_withfakevariant"; expectThrows( org.elasticsearch.client.ResponseException.class, () -> putTextEmbeddingModel(inferenceEntityId, fakeModelVariantJsonEntity()) @@ -112,7 +111,7 @@ private Map putTextEmbeddingModel(String inferenceEntityId, Stri private String noModelIdVariantJsonEntity() { return """ { - "service": "text_embedding", + "service": "elasticsearch", "service_settings": { "num_allocations": 1, "num_threads": 1 diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java index fd330a8cf6cc6..30ccb48d5c709 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java @@ -10,6 +10,7 @@ import org.elasticsearch.features.FeatureSpecification; import org.elasticsearch.features.NodeFeature; import org.elasticsearch.xpack.inference.mapper.SemanticTextFieldMapper; +import org.elasticsearch.xpack.inference.queries.SemanticQueryBuilder; import org.elasticsearch.xpack.inference.rank.random.RandomRankRetrieverBuilder; import org.elasticsearch.xpack.inference.rank.textsimilarity.TextSimilarityRankRetrieverBuilder; @@ -25,7 +26,8 @@ public Set getFeatures() { return Set.of( TextSimilarityRankRetrieverBuilder.TEXT_SIMILARITY_RERANKER_RETRIEVER_SUPPORTED, RandomRankRetrieverBuilder.RANDOM_RERANKER_RETRIEVER_SUPPORTED, - SemanticTextFieldMapper.SEMANTIC_TEXT_SEARCH_INFERENCE_ID + SemanticTextFieldMapper.SEMANTIC_TEXT_SEARCH_INFERENCE_ID, + SemanticQueryBuilder.SEMANTIC_TEXT_INNER_HITS ); } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/action/elastic/ElasticInferenceServiceActionCreator.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/action/elastic/ElasticInferenceServiceActionCreator.java index ea2295979c480..c8ada6e535b63 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/action/elastic/ElasticInferenceServiceActionCreator.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/action/elastic/ElasticInferenceServiceActionCreator.java @@ -13,6 +13,7 @@ import org.elasticsearch.xpack.inference.external.http.sender.Sender; import org.elasticsearch.xpack.inference.services.ServiceComponents; import org.elasticsearch.xpack.inference.services.elastic.ElasticInferenceServiceSparseEmbeddingsModel; +import org.elasticsearch.xpack.inference.telemetry.TraceContext; import java.util.Objects; @@ -24,14 +25,17 @@ public class ElasticInferenceServiceActionCreator implements ElasticInferenceSer private final ServiceComponents serviceComponents; - public ElasticInferenceServiceActionCreator(Sender sender, ServiceComponents serviceComponents) { + private final TraceContext traceContext; + + public ElasticInferenceServiceActionCreator(Sender sender, ServiceComponents serviceComponents, TraceContext traceContext) { this.sender = Objects.requireNonNull(sender); this.serviceComponents = Objects.requireNonNull(serviceComponents); + this.traceContext = traceContext; } @Override public ExecutableAction create(ElasticInferenceServiceSparseEmbeddingsModel model) { - var requestManager = new ElasticInferenceServiceSparseEmbeddingsRequestManager(model, serviceComponents); + var requestManager = new ElasticInferenceServiceSparseEmbeddingsRequestManager(model, serviceComponents, traceContext); var errorMessage = constructFailedToSendRequestMessage(model.uri(), "Elastic Inference Service sparse embeddings"); return new SenderExecutableAction(sender, requestManager, errorMessage); } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/sender/ElasticInferenceServiceSparseEmbeddingsRequestManager.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/sender/ElasticInferenceServiceSparseEmbeddingsRequestManager.java index b59ac54d5cbb6..e7ee41525f07d 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/sender/ElasticInferenceServiceSparseEmbeddingsRequestManager.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/sender/ElasticInferenceServiceSparseEmbeddingsRequestManager.java @@ -19,6 +19,7 @@ import org.elasticsearch.xpack.inference.external.response.elastic.ElasticInferenceServiceSparseEmbeddingsResponseEntity; import org.elasticsearch.xpack.inference.services.ServiceComponents; import org.elasticsearch.xpack.inference.services.elastic.ElasticInferenceServiceSparseEmbeddingsModel; +import org.elasticsearch.xpack.inference.telemetry.TraceContext; import java.util.List; import java.util.function.Supplier; @@ -35,6 +36,8 @@ public class ElasticInferenceServiceSparseEmbeddingsRequestManager extends Elast private final Truncator truncator; + private final TraceContext traceContext; + private static ResponseHandler createSparseEmbeddingsHandler() { return new ElasticInferenceServiceResponseHandler( "Elastic Inference Service sparse embeddings", @@ -44,11 +47,13 @@ private static ResponseHandler createSparseEmbeddingsHandler() { public ElasticInferenceServiceSparseEmbeddingsRequestManager( ElasticInferenceServiceSparseEmbeddingsModel model, - ServiceComponents serviceComponents + ServiceComponents serviceComponents, + TraceContext traceContext ) { super(serviceComponents.threadPool(), model); this.model = model; this.truncator = serviceComponents.truncator(); + this.traceContext = traceContext; } @Override @@ -64,7 +69,8 @@ public void execute( ElasticInferenceServiceSparseEmbeddingsRequest request = new ElasticInferenceServiceSparseEmbeddingsRequest( truncator, truncatedInput, - model + model, + traceContext ); execute(new ExecutableInferenceRequest(requestSender, logger, request, HANDLER, hasRequestCompletedFunction, listener)); } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/request/elastic/ElasticInferenceServiceSparseEmbeddingsRequest.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/request/elastic/ElasticInferenceServiceSparseEmbeddingsRequest.java index 41a2ef1c3ccda..d445a779f8230 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/request/elastic/ElasticInferenceServiceSparseEmbeddingsRequest.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/request/elastic/ElasticInferenceServiceSparseEmbeddingsRequest.java @@ -12,11 +12,13 @@ import org.apache.http.entity.ByteArrayEntity; import org.apache.http.message.BasicHeader; import org.elasticsearch.common.Strings; +import org.elasticsearch.tasks.Task; import org.elasticsearch.xcontent.XContentType; import org.elasticsearch.xpack.inference.common.Truncator; import org.elasticsearch.xpack.inference.external.request.HttpRequest; import org.elasticsearch.xpack.inference.external.request.Request; import org.elasticsearch.xpack.inference.services.elastic.ElasticInferenceServiceSparseEmbeddingsModel; +import org.elasticsearch.xpack.inference.telemetry.TraceContext; import java.net.URI; import java.nio.charset.StandardCharsets; @@ -31,15 +33,19 @@ public class ElasticInferenceServiceSparseEmbeddingsRequest implements ElasticIn private final Truncator.TruncationResult truncationResult; private final Truncator truncator; + private final TraceContext traceContext; + public ElasticInferenceServiceSparseEmbeddingsRequest( Truncator truncator, Truncator.TruncationResult truncationResult, - ElasticInferenceServiceSparseEmbeddingsModel model + ElasticInferenceServiceSparseEmbeddingsModel model, + TraceContext traceContext ) { this.truncator = truncator; this.truncationResult = truncationResult; this.model = Objects.requireNonNull(model); this.uri = model.uri(); + this.traceContext = traceContext; } @Override @@ -50,6 +56,10 @@ public HttpRequest createHttpRequest() { ByteArrayEntity byteEntity = new ByteArrayEntity(requestEntity.getBytes(StandardCharsets.UTF_8)); httpPost.setEntity(byteEntity); + if (traceContext != null) { + propagateTraceContext(httpPost); + } + httpPost.setHeader(new BasicHeader(HttpHeaders.CONTENT_TYPE, XContentType.JSON.mediaType())); return new HttpRequest(httpPost, getInferenceEntityId()); @@ -65,11 +75,15 @@ public URI getURI() { return this.uri; } + public TraceContext getTraceContext() { + return traceContext; + } + @Override public Request truncate() { var truncatedInput = truncator.truncate(truncationResult.input()); - return new ElasticInferenceServiceSparseEmbeddingsRequest(truncator, truncatedInput, model); + return new ElasticInferenceServiceSparseEmbeddingsRequest(truncator, truncatedInput, model, traceContext); } @Override @@ -77,4 +91,16 @@ public boolean[] getTruncationInfo() { return truncationResult.truncated().clone(); } + private void propagateTraceContext(HttpPost httpPost) { + var traceParent = traceContext.traceParent(); + var traceState = traceContext.traceState(); + + if (traceParent != null) { + httpPost.setHeader(Task.TRACE_PARENT_HTTP_HEADER, traceParent); + } + + if (traceState != null) { + httpPost.setHeader(Task.TRACE_STATE, traceState); + } + } } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java index 0483296cd2c6a..e0ad044f597ab 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java @@ -40,6 +40,7 @@ import org.elasticsearch.index.mapper.ValueFetcher; import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper; import org.elasticsearch.index.mapper.vectors.SparseVectorFieldMapper; +import org.elasticsearch.index.query.InnerHitBuilder; import org.elasticsearch.index.query.MatchNoneQueryBuilder; import org.elasticsearch.index.query.NestedQueryBuilder; import org.elasticsearch.index.query.QueryBuilder; @@ -54,6 +55,7 @@ import org.elasticsearch.xcontent.XContentParserConfiguration; import org.elasticsearch.xpack.core.ml.inference.results.MlTextEmbeddingResults; import org.elasticsearch.xpack.core.ml.inference.results.TextExpansionResults; +import org.elasticsearch.xpack.inference.queries.SemanticQueryInnerHitBuilder; import java.io.IOException; import java.util.ArrayList; @@ -468,7 +470,12 @@ public boolean fieldHasValue(FieldInfos fieldInfos) { return fieldInfos.fieldInfo(getEmbeddingsFieldName(name())) != null; } - public QueryBuilder semanticQuery(InferenceResults inferenceResults, float boost, String queryName) { + public QueryBuilder semanticQuery( + InferenceResults inferenceResults, + float boost, + String queryName, + SemanticQueryInnerHitBuilder semanticInnerHitBuilder + ) { String nestedFieldPath = getChunksFieldName(name()); String inferenceResultsFieldName = getEmbeddingsFieldName(name()); QueryBuilder childQueryBuilder; @@ -524,7 +531,10 @@ public QueryBuilder semanticQuery(InferenceResults inferenceResults, float boost }; } - return new NestedQueryBuilder(nestedFieldPath, childQueryBuilder, ScoreMode.Max).boost(boost).queryName(queryName); + InnerHitBuilder innerHitBuilder = semanticInnerHitBuilder != null ? semanticInnerHitBuilder.toInnerHitBuilder() : null; + return new NestedQueryBuilder(nestedFieldPath, childQueryBuilder, ScoreMode.Max).boost(boost) + .queryName(queryName) + .innerHit(innerHitBuilder); } private String generateQueryInferenceResultsTypeMismatchMessage(InferenceResults inferenceResults, String expectedResultsType) { diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilder.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilder.java index 9f7fcb1ef407c..901de30145f7d 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilder.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilder.java @@ -16,6 +16,8 @@ import org.elasticsearch.cluster.metadata.InferenceFieldMetadata; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.core.Nullable; +import org.elasticsearch.features.NodeFeature; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.query.AbstractQueryBuilder; import org.elasticsearch.index.query.MatchNoneQueryBuilder; @@ -44,35 +46,46 @@ import java.util.Map; import java.util.Objects; +import static org.elasticsearch.TransportVersions.SEMANTIC_QUERY_INNER_HITS; import static org.elasticsearch.xcontent.ConstructingObjectParser.constructorArg; +import static org.elasticsearch.xcontent.ConstructingObjectParser.optionalConstructorArg; import static org.elasticsearch.xpack.core.ClientHelper.ML_ORIGIN; import static org.elasticsearch.xpack.core.ClientHelper.executeAsyncWithOrigin; public class SemanticQueryBuilder extends AbstractQueryBuilder { + public static final NodeFeature SEMANTIC_TEXT_INNER_HITS = new NodeFeature("semantic_text.inner_hits"); + public static final String NAME = "semantic"; private static final ParseField FIELD_FIELD = new ParseField("field"); private static final ParseField QUERY_FIELD = new ParseField("query"); + private static final ParseField INNER_HITS_FIELD = new ParseField("inner_hits"); private static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>( NAME, false, - args -> new SemanticQueryBuilder((String) args[0], (String) args[1]) + args -> new SemanticQueryBuilder((String) args[0], (String) args[1], (SemanticQueryInnerHitBuilder) args[2]) ); static { PARSER.declareString(constructorArg(), FIELD_FIELD); PARSER.declareString(constructorArg(), QUERY_FIELD); + PARSER.declareObject(optionalConstructorArg(), (p, c) -> SemanticQueryInnerHitBuilder.fromXContent(p), INNER_HITS_FIELD); declareStandardFields(PARSER); } private final String fieldName; private final String query; + private final SemanticQueryInnerHitBuilder innerHitBuilder; private final SetOnce inferenceResultsSupplier; private final InferenceResults inferenceResults; private final boolean noInferenceResults; public SemanticQueryBuilder(String fieldName, String query) { + this(fieldName, query, null); + } + + public SemanticQueryBuilder(String fieldName, String query, @Nullable SemanticQueryInnerHitBuilder innerHitBuilder) { if (fieldName == null) { throw new IllegalArgumentException("[" + NAME + "] requires a " + FIELD_FIELD.getPreferredName() + " value"); } @@ -81,15 +94,25 @@ public SemanticQueryBuilder(String fieldName, String query) { } this.fieldName = fieldName; this.query = query; + this.innerHitBuilder = innerHitBuilder; this.inferenceResults = null; this.inferenceResultsSupplier = null; this.noInferenceResults = false; + + if (this.innerHitBuilder != null) { + this.innerHitBuilder.setFieldName(fieldName); + } } public SemanticQueryBuilder(StreamInput in) throws IOException { super(in); this.fieldName = in.readString(); this.query = in.readString(); + if (in.getTransportVersion().onOrAfter(SEMANTIC_QUERY_INNER_HITS)) { + this.innerHitBuilder = in.readOptionalWriteable(SemanticQueryInnerHitBuilder::new); + } else { + this.innerHitBuilder = null; + } this.inferenceResults = in.readOptionalNamedWriteable(InferenceResults.class); this.noInferenceResults = in.readBoolean(); this.inferenceResultsSupplier = null; @@ -102,6 +125,21 @@ protected void doWriteTo(StreamOutput out) throws IOException { } out.writeString(fieldName); out.writeString(query); + if (out.getTransportVersion().onOrAfter(SEMANTIC_QUERY_INNER_HITS)) { + out.writeOptionalWriteable(innerHitBuilder); + } else if (innerHitBuilder != null) { + throw new IllegalStateException( + "Transport version must be at least [" + + SEMANTIC_QUERY_INNER_HITS.toReleaseVersion() + + "] to use [ " + + INNER_HITS_FIELD.getPreferredName() + + "] in [" + + NAME + + "], current transport version is [" + + out.getTransportVersion().toReleaseVersion() + + "]. Are you running a mixed-version cluster?" + ); + } out.writeOptionalNamedWriteable(inferenceResults); out.writeBoolean(noInferenceResults); } @@ -114,6 +152,7 @@ private SemanticQueryBuilder( ) { this.fieldName = other.fieldName; this.query = other.query; + this.innerHitBuilder = other.innerHitBuilder; this.boost = other.boost; this.queryName = other.queryName; this.inferenceResultsSupplier = inferenceResultsSupplier; @@ -121,6 +160,10 @@ private SemanticQueryBuilder( this.noInferenceResults = noInferenceResults; } + public SemanticQueryInnerHitBuilder innerHit() { + return innerHitBuilder; + } + @Override public String getWriteableName() { return NAME; @@ -140,6 +183,9 @@ protected void doXContent(XContentBuilder builder, Params params) throws IOExcep builder.startObject(NAME); builder.field(FIELD_FIELD.getPreferredName(), fieldName); builder.field(QUERY_FIELD.getPreferredName(), query); + if (innerHitBuilder != null) { + builder.field(INNER_HITS_FIELD.getPreferredName(), innerHitBuilder); + } boostAndQueryNameToXContent(builder); builder.endObject(); } @@ -166,7 +212,7 @@ private QueryBuilder doRewriteBuildSemanticQuery(SearchExecutionContext searchEx ); } - return semanticTextFieldType.semanticQuery(inferenceResults, boost(), queryName()); + return semanticTextFieldType.semanticQuery(inferenceResults, boost(), queryName(), innerHitBuilder); } else { throw new IllegalArgumentException( "Field [" + fieldName + "] of type [" + fieldType.typeName() + "] does not support " + NAME + " queries" @@ -301,11 +347,12 @@ private static String getInferenceIdForForField(Collection indexM protected boolean doEquals(SemanticQueryBuilder other) { return Objects.equals(fieldName, other.fieldName) && Objects.equals(query, other.query) + && Objects.equals(innerHitBuilder, other.innerHitBuilder) && Objects.equals(inferenceResults, other.inferenceResults); } @Override protected int doHashCode() { - return Objects.hash(fieldName, query, inferenceResults); + return Objects.hash(fieldName, query, innerHitBuilder, inferenceResults); } } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryInnerHitBuilder.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryInnerHitBuilder.java new file mode 100644 index 0000000000000..776ce990665ac --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryInnerHitBuilder.java @@ -0,0 +1,132 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.queries; + +import org.elasticsearch.common.Strings; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.index.query.InnerHitBuilder; +import org.elasticsearch.search.builder.SearchSourceBuilder; +import org.elasticsearch.search.fetch.subphase.FetchSourceContext; +import org.elasticsearch.xcontent.ObjectParser; +import org.elasticsearch.xcontent.ToXContentObject; +import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xcontent.XContentParser; +import org.elasticsearch.xpack.inference.mapper.SemanticTextField; + +import java.io.IOException; +import java.util.Objects; + +import static org.elasticsearch.index.query.InnerHitBuilder.DEFAULT_FROM; +import static org.elasticsearch.index.query.InnerHitBuilder.DEFAULT_SIZE; + +public class SemanticQueryInnerHitBuilder implements Writeable, ToXContentObject { + private static final ObjectParser PARSER = new ObjectParser<>( + "semantic_query_inner_hits", + SemanticQueryInnerHitBuilder::new + ); + + static { + PARSER.declareInt(SemanticQueryInnerHitBuilder::setFrom, SearchSourceBuilder.FROM_FIELD); + PARSER.declareInt(SemanticQueryInnerHitBuilder::setSize, SearchSourceBuilder.SIZE_FIELD); + } + + private String fieldName; + private int from = DEFAULT_FROM; + private int size = DEFAULT_SIZE; + + public SemanticQueryInnerHitBuilder() { + this.fieldName = null; + } + + public SemanticQueryInnerHitBuilder(StreamInput in) throws IOException { + fieldName = in.readOptionalString(); + from = in.readVInt(); + size = in.readVInt(); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeOptionalString(fieldName); + out.writeVInt(from); + out.writeVInt(size); + } + + public String getFieldName() { + return fieldName; + } + + public void setFieldName(String fieldName) { + this.fieldName = fieldName; + } + + public int getFrom() { + return from; + } + + public SemanticQueryInnerHitBuilder setFrom(int from) { + this.from = from; + return this; + } + + public int getSize() { + return size; + } + + public SemanticQueryInnerHitBuilder setSize(int size) { + this.size = size; + return this; + } + + public InnerHitBuilder toInnerHitBuilder() { + if (fieldName == null) { + throw new IllegalStateException("fieldName must have a value"); + } + + return new InnerHitBuilder(fieldName).setFrom(from) + .setSize(size) + .setFetchSourceContext(FetchSourceContext.of(true, null, new String[] { SemanticTextField.getEmbeddingsFieldName(fieldName) })); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + // Don't include name in XContent because it is hard-coded + builder.startObject(); + if (from != DEFAULT_FROM) { + builder.field(SearchSourceBuilder.FROM_FIELD.getPreferredName(), from); + } + if (size != DEFAULT_SIZE) { + builder.field(SearchSourceBuilder.SIZE_FIELD.getPreferredName(), size); + } + builder.endObject(); + return builder; + } + + public static SemanticQueryInnerHitBuilder fromXContent(XContentParser parser) throws IOException { + return PARSER.parse(parser, new SemanticQueryInnerHitBuilder(), null); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + SemanticQueryInnerHitBuilder that = (SemanticQueryInnerHitBuilder) o; + return from == that.from && size == that.size && Objects.equals(fieldName, that.fieldName); + } + + @Override + public int hashCode() { + return Objects.hash(fieldName, from, size); + } + + @Override + public String toString() { + return Strings.toString(this, true, true); + } +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceService.java index 103ddd4c5c5ea..abbe893823b96 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceService.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceService.java @@ -23,6 +23,7 @@ import org.elasticsearch.inference.ModelSecrets; import org.elasticsearch.inference.TaskType; import org.elasticsearch.rest.RestStatus; +import org.elasticsearch.tasks.Task; import org.elasticsearch.xpack.core.inference.results.ErrorChunkedInferenceResults; import org.elasticsearch.xpack.core.inference.results.InferenceChunkedSparseEmbeddingResults; import org.elasticsearch.xpack.core.inference.results.SparseEmbeddingResults; @@ -34,6 +35,7 @@ import org.elasticsearch.xpack.inference.services.ConfigurationParseContext; import org.elasticsearch.xpack.inference.services.SenderService; import org.elasticsearch.xpack.inference.services.ServiceComponents; +import org.elasticsearch.xpack.inference.telemetry.TraceContext; import java.util.List; import java.util.Map; @@ -75,8 +77,13 @@ protected void doInfer( return; } + // We extract the trace context here as it's sufficient to propagate the trace information of the REST request, + // which handles the request to the inference API overall (including the outgoing request, which is started in a new thread + // generating a different "traceparent" as every task and every REST request creates a new span). + var currentTraceInfo = getCurrentTraceInfo(); + ElasticInferenceServiceModel elasticInferenceServiceModel = (ElasticInferenceServiceModel) model; - var actionCreator = new ElasticInferenceServiceActionCreator(getSender(), getServiceComponents()); + var actionCreator = new ElasticInferenceServiceActionCreator(getSender(), getServiceComponents(), currentTraceInfo); var action = elasticInferenceServiceModel.accept(actionCreator, taskSettings); action.execute(inputs, timeout, listener); @@ -258,4 +265,13 @@ private ElasticInferenceServiceSparseEmbeddingsModel updateModelWithEmbeddingDet return new ElasticInferenceServiceSparseEmbeddingsModel(model, serviceSettings); } + + private TraceContext getCurrentTraceInfo() { + var threadPool = getServiceComponents().threadPool(); + + var traceParent = threadPool.getThreadContext().getHeader(Task.TRACE_PARENT); + var traceState = threadPool.getThreadContext().getHeader(Task.TRACE_STATE); + + return new TraceContext(traceParent, traceState); + } } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalService.java index 93408c067098b..675bc275c8bd1 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalService.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalService.java @@ -201,9 +201,7 @@ private void e5Case( MULTILINGUAL_E5_SMALL_MODEL_ID ) ); - } - - if (modelVariantDoesNotMatchArchitecturesAndIsNotPlatformAgnostic(platformArchitectures, esServiceSettingsBuilder.getModelId())) { + } else if (modelVariantValidForArchitecture(platformArchitectures, esServiceSettingsBuilder.getModelId()) == false) { throw new IllegalArgumentException( "Error parsing request config, model id does not match any models available on this platform. Was [" + esServiceSettingsBuilder.getModelId() @@ -224,17 +222,19 @@ private void e5Case( ); } - private static boolean modelVariantDoesNotMatchArchitecturesAndIsNotPlatformAgnostic( - Set platformArchitectures, - String modelId - ) { + static boolean modelVariantValidForArchitecture(Set platformArchitectures, String modelId) { + if (modelId.equals(MULTILINGUAL_E5_SMALL_MODEL_ID)) { + // platform agnostic model is always compatible + return true; + } + return modelId.equals( selectDefaultModelVariantBasedOnClusterArchitecture( platformArchitectures, MULTILINGUAL_E5_SMALL_MODEL_ID_LINUX_X86, MULTILINGUAL_E5_SMALL_MODEL_ID ) - ) && modelId.equals(MULTILINGUAL_E5_SMALL_MODEL_ID) == false; + ); } @Override diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/telemetry/TraceContext.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/telemetry/TraceContext.java new file mode 100644 index 0000000000000..05654ed146f16 --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/telemetry/TraceContext.java @@ -0,0 +1,10 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.telemetry; + +public record TraceContext(String traceParent, String traceState) {} diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/action/elastic/ElasticInferenceServiceActionCreatorTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/action/elastic/ElasticInferenceServiceActionCreatorTests.java index 1081a60ba6866..02b09917d0065 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/action/elastic/ElasticInferenceServiceActionCreatorTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/action/elastic/ElasticInferenceServiceActionCreatorTests.java @@ -25,6 +25,7 @@ import org.elasticsearch.xpack.inference.logging.ThrottlerManager; import org.elasticsearch.xpack.inference.results.SparseEmbeddingResultsTests; import org.elasticsearch.xpack.inference.services.elastic.ElasticInferenceServiceSparseEmbeddingsModelTests; +import org.elasticsearch.xpack.inference.telemetry.TraceContext; import org.junit.After; import org.junit.Before; @@ -89,7 +90,7 @@ public void testExecute_ReturnsSuccessfulResponse_ForElserAction() throws IOExce webServer.enqueue(new MockResponse().setResponseCode(200).setBody(responseJson)); var model = ElasticInferenceServiceSparseEmbeddingsModelTests.createModel(getUrl(webServer)); - var actionCreator = new ElasticInferenceServiceActionCreator(sender, createWithEmptySettings(threadPool)); + var actionCreator = new ElasticInferenceServiceActionCreator(sender, createWithEmptySettings(threadPool), createTraceContext()); var action = actionCreator.create(model); PlainActionFuture listener = new PlainActionFuture<>(); @@ -145,7 +146,7 @@ public void testSend_FailsFromInvalidResponseFormat_ForElserAction() throws IOEx webServer.enqueue(new MockResponse().setResponseCode(200).setBody(responseJson)); var model = ElasticInferenceServiceSparseEmbeddingsModelTests.createModel(getUrl(webServer)); - var actionCreator = new ElasticInferenceServiceActionCreator(sender, createWithEmptySettings(threadPool)); + var actionCreator = new ElasticInferenceServiceActionCreator(sender, createWithEmptySettings(threadPool), createTraceContext()); var action = actionCreator.create(model); PlainActionFuture listener = new PlainActionFuture<>(); @@ -197,7 +198,7 @@ public void testExecute_ReturnsSuccessfulResponse_AfterTruncating() throws IOExc webServer.enqueue(new MockResponse().setResponseCode(200).setBody(responseJson)); var model = ElasticInferenceServiceSparseEmbeddingsModelTests.createModel(getUrl(webServer)); - var actionCreator = new ElasticInferenceServiceActionCreator(sender, createWithEmptySettings(threadPool)); + var actionCreator = new ElasticInferenceServiceActionCreator(sender, createWithEmptySettings(threadPool), createTraceContext()); var action = actionCreator.create(model); PlainActionFuture listener = new PlainActionFuture<>(); @@ -257,7 +258,7 @@ public void testExecute_TruncatesInputBeforeSending() throws IOException { // truncated to 1 token = 3 characters var model = ElasticInferenceServiceSparseEmbeddingsModelTests.createModel(getUrl(webServer), 1); - var actionCreator = new ElasticInferenceServiceActionCreator(sender, createWithEmptySettings(threadPool)); + var actionCreator = new ElasticInferenceServiceActionCreator(sender, createWithEmptySettings(threadPool), createTraceContext()); var action = actionCreator.create(model); PlainActionFuture listener = new PlainActionFuture<>(); @@ -286,4 +287,8 @@ public void testExecute_TruncatesInputBeforeSending() throws IOException { } } + private TraceContext createTraceContext() { + return new TraceContext(randomAlphaOfLength(10), randomAlphaOfLength(10)); + } + } diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/request/elastic/ElasticInferenceServiceSparseEmbeddingsRequestTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/request/elastic/ElasticInferenceServiceSparseEmbeddingsRequestTests.java index 0f2c859fb62d5..9d3bbe2ed12ae 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/request/elastic/ElasticInferenceServiceSparseEmbeddingsRequestTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/request/elastic/ElasticInferenceServiceSparseEmbeddingsRequestTests.java @@ -9,11 +9,13 @@ import org.apache.http.HttpHeaders; import org.apache.http.client.methods.HttpPost; +import org.elasticsearch.tasks.Task; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.xcontent.XContentType; import org.elasticsearch.xpack.inference.common.Truncator; import org.elasticsearch.xpack.inference.common.TruncatorTests; import org.elasticsearch.xpack.inference.services.elastic.ElasticInferenceServiceSparseEmbeddingsModelTests; +import org.elasticsearch.xpack.inference.telemetry.TraceContext; import java.io.IOException; import java.util.List; @@ -42,6 +44,23 @@ public void testCreateHttpRequest() throws IOException { assertThat(requestMap.get("input"), is(List.of(input))); } + public void testTraceContextPropagatedThroughHTTPHeaders() { + var url = "http://eis-gateway.com"; + var input = "input"; + + var request = createRequest(url, input); + var httpRequest = request.createHttpRequest(); + + assertThat(httpRequest.httpRequestBase(), instanceOf(HttpPost.class)); + var httpPost = (HttpPost) httpRequest.httpRequestBase(); + + var traceParent = request.getTraceContext().traceParent(); + var traceState = request.getTraceContext().traceState(); + + assertThat(httpPost.getLastHeader(Task.TRACE_PARENT_HTTP_HEADER).getValue(), is(traceParent)); + assertThat(httpPost.getLastHeader(Task.TRACE_STATE).getValue(), is(traceState)); + } + public void testTruncate_ReducesInputTextSizeByHalf() throws IOException { var url = "http://eis-gateway.com"; var input = "abcd"; @@ -75,7 +94,8 @@ public ElasticInferenceServiceSparseEmbeddingsRequest createRequest(String url, return new ElasticInferenceServiceSparseEmbeddingsRequest( TruncatorTests.createTruncator(), new Truncator.TruncationResult(List.of(input), new boolean[] { false }), - embeddingsModel + embeddingsModel, + new TraceContext(randomAlphaOfLength(10), randomAlphaOfLength(10)) ); } } diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilderTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilderTests.java index f54ce89183079..47ac33a5cf9ab 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilderTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilderTests.java @@ -31,7 +31,9 @@ import org.elasticsearch.index.mapper.ParsedDocument; import org.elasticsearch.index.mapper.SourceToParse; import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper; +import org.elasticsearch.index.query.InnerHitContextBuilder; import org.elasticsearch.index.query.MatchNoneQueryBuilder; +import org.elasticsearch.index.query.NestedQueryBuilder; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.QueryRewriteContext; import org.elasticsearch.index.query.SearchExecutionContext; @@ -62,7 +64,9 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; +import java.util.HashMap; import java.util.List; +import java.util.Map; import static org.apache.lucene.search.BooleanClause.Occur.FILTER; import static org.apache.lucene.search.BooleanClause.Occur.MUST; @@ -165,7 +169,14 @@ protected SemanticQueryBuilder doCreateTestQueryBuilder() { queryTokens.add(randomAlphaOfLength(QUERY_TOKEN_LENGTH)); } - SemanticQueryBuilder builder = new SemanticQueryBuilder(SEMANTIC_TEXT_FIELD, String.join(" ", queryTokens)); + SemanticQueryInnerHitBuilder innerHitBuilder = null; + if (randomBoolean()) { + innerHitBuilder = new SemanticQueryInnerHitBuilder(); + innerHitBuilder.setFrom(randomIntBetween(0, 100)); + innerHitBuilder.setSize(randomIntBetween(0, 100)); + } + + SemanticQueryBuilder builder = new SemanticQueryBuilder(SEMANTIC_TEXT_FIELD, String.join(" ", queryTokens), innerHitBuilder); if (randomBoolean()) { builder.boost((float) randomDoubleBetween(0.1, 10.0, true)); } @@ -190,6 +201,21 @@ protected void doAssertLuceneQuery(SemanticQueryBuilder queryBuilder, Query quer case SPARSE_EMBEDDING -> assertSparseEmbeddingLuceneQuery(nestedQuery.getChildQuery()); case TEXT_EMBEDDING -> assertTextEmbeddingLuceneQuery(nestedQuery.getChildQuery()); } + + if (queryBuilder.innerHit() != null) { + // Rewrite to a nested query + QueryBuilder rewrittenQueryBuilder = rewriteQuery(queryBuilder, createQueryRewriteContext(), createSearchExecutionContext()); + assertThat(rewrittenQueryBuilder, instanceOf(NestedQueryBuilder.class)); + + NestedQueryBuilder nestedQueryBuilder = (NestedQueryBuilder) rewrittenQueryBuilder; + Map innerHitInternals = new HashMap<>(); + InnerHitContextBuilder.extractInnerHits(nestedQueryBuilder, innerHitInternals); + assertThat(innerHitInternals.size(), equalTo(1)); + + InnerHitContextBuilder innerHits = innerHitInternals.get(queryBuilder.innerHit().getFieldName()); + assertNotNull(innerHits); + assertThat(innerHits.innerHitBuilder(), equalTo(queryBuilder.innerHit().toInnerHitBuilder())); + } } private void assertSparseEmbeddingLuceneQuery(Query query) { @@ -312,6 +338,20 @@ public void testToXContent() throws IOException { "query": "bar" } }""", queryBuilder); + + SemanticQueryInnerHitBuilder innerHitBuilder = new SemanticQueryInnerHitBuilder().setFrom(1).setSize(2); + queryBuilder = new SemanticQueryBuilder("foo", "bar", innerHitBuilder); + checkGeneratedJson(""" + { + "semantic": { + "field": "foo", + "query": "bar", + "inner_hits": { + "from": 1, + "size": 2 + } + } + }""", queryBuilder); } public void testSerializingQueryWhenNoInferenceId() throws IOException { diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalServiceTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalServiceTests.java index 257616033f080..8569117c348b1 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalServiceTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalServiceTests.java @@ -65,6 +65,8 @@ import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicReference; +import static org.elasticsearch.xpack.inference.services.elasticsearch.ElasticsearchInternalService.MULTILINGUAL_E5_SMALL_MODEL_ID; +import static org.elasticsearch.xpack.inference.services.elasticsearch.ElasticsearchInternalService.MULTILINGUAL_E5_SMALL_MODEL_ID_LINUX_X86; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.hasSize; import static org.hamcrest.Matchers.instanceOf; @@ -167,17 +169,12 @@ public void testParseRequestConfig_E5() { ElasticsearchInternalServiceSettings.NUM_THREADS, 4, ElasticsearchInternalServiceSettings.MODEL_ID, - ElasticsearchInternalService.MULTILINGUAL_E5_SMALL_MODEL_ID + MULTILINGUAL_E5_SMALL_MODEL_ID ) ) ); - var e5ServiceSettings = new MultilingualE5SmallInternalServiceSettings( - 1, - 4, - ElasticsearchInternalService.MULTILINGUAL_E5_SMALL_MODEL_ID, - null - ); + var e5ServiceSettings = new MultilingualE5SmallInternalServiceSettings(1, 4, MULTILINGUAL_E5_SMALL_MODEL_ID, null); service.parseRequestConfig( randomInferenceEntityId, @@ -201,7 +198,7 @@ public void testParseRequestConfig_E5() { ElasticsearchInternalServiceSettings.NUM_THREADS, 4, ElasticsearchInternalServiceSettings.MODEL_ID, - ElasticsearchInternalService.MULTILINGUAL_E5_SMALL_MODEL_ID, + MULTILINGUAL_E5_SMALL_MODEL_ID, "not_a_valid_service_setting", randomAlphaOfLength(10) ) @@ -435,19 +432,14 @@ public void testParsePersistedConfig() { ElasticsearchInternalServiceSettings.NUM_THREADS, 4, ElasticsearchInternalServiceSettings.MODEL_ID, - ElasticsearchInternalService.MULTILINGUAL_E5_SMALL_MODEL_ID, + MULTILINGUAL_E5_SMALL_MODEL_ID, ServiceFields.DIMENSIONS, 1 ) ) ); - var e5ServiceSettings = new MultilingualE5SmallInternalServiceSettings( - 1, - 4, - ElasticsearchInternalService.MULTILINGUAL_E5_SMALL_MODEL_ID, - null - ); + var e5ServiceSettings = new MultilingualE5SmallInternalServiceSettings(1, 4, MULTILINGUAL_E5_SMALL_MODEL_ID, null); MultilingualE5SmallModel parsedModel = (MultilingualE5SmallModel) service.parsePersistedConfig( randomInferenceEntityId, @@ -950,6 +942,31 @@ public void testParseRequestConfigEland_SetsDimensionsToOne() { assertThat(model, is(expectedModel)); } + public void testModelVariantDoesNotMatchArchitecturesAndIsNotPlatformAgnostic() { + { + var architectures = Set.of("Aarch64"); + assertFalse( + ElasticsearchInternalService.modelVariantValidForArchitecture(architectures, MULTILINGUAL_E5_SMALL_MODEL_ID_LINUX_X86) + ); + + assertTrue(ElasticsearchInternalService.modelVariantValidForArchitecture(architectures, MULTILINGUAL_E5_SMALL_MODEL_ID)); + } + { + var architectures = Set.of("linux-x86_64"); + assertTrue( + ElasticsearchInternalService.modelVariantValidForArchitecture(architectures, MULTILINGUAL_E5_SMALL_MODEL_ID_LINUX_X86) + ); + assertTrue(ElasticsearchInternalService.modelVariantValidForArchitecture(architectures, MULTILINGUAL_E5_SMALL_MODEL_ID)); + } + { + var architectures = Set.of("linux-x86_64", "Aarch64"); + assertFalse( + ElasticsearchInternalService.modelVariantValidForArchitecture(architectures, MULTILINGUAL_E5_SMALL_MODEL_ID_LINUX_X86) + ); + assertTrue(ElasticsearchInternalService.modelVariantValidForArchitecture(architectures, MULTILINGUAL_E5_SMALL_MODEL_ID)); + } + } + private ElasticsearchInternalService createService(Client client) { var context = new InferenceServiceExtension.InferenceServiceFactoryContext(client); return new ElasticsearchInternalService(context); diff --git a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/40_semantic_text_query.yml b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/40_semantic_text_query.yml index 2070b3752791a..4d90d8faeb3f3 100644 --- a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/40_semantic_text_query.yml +++ b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/40_semantic_text_query.yml @@ -122,6 +122,147 @@ setup: - close_to: { hits.hits.0._score: { value: 3.7837332e17, error: 1e10 } } - length: { hits.hits.0._source.inference_field.inference.chunks: 2 } +--- +"Query using a sparse embedding model and inner hits": + - requires: + cluster_features: "semantic_text.inner_hits" + reason: semantic_text inner hits support added in 8.16.0 + + - skip: + features: [ "headers", "close_to" ] + + - do: + index: + index: test-sparse-index + id: doc_1 + body: + inference_field: ["inference test", "another inference test", "yet another inference test"] + non_inference_field: "non inference test" + refresh: true + + - do: + headers: + # Force JSON content type so that we use a parser that interprets the floating-point score as a double + Content-Type: application/json + search: + index: test-sparse-index + body: + query: + semantic: + field: "inference_field" + query: "inference test" + inner_hits: {} + + - match: { hits.total.value: 1 } + - match: { hits.hits.0._id: "doc_1" } + - close_to: { hits.hits.0._score: { value: 3.7837332e17, error: 1e10 } } + - length: { hits.hits.0._source.inference_field.inference.chunks: 3 } + - match: { hits.hits.0.inner_hits.inference_field.hits.total.value: 3 } + - length: { hits.hits.0.inner_hits.inference_field.hits.hits: 3 } + - match: { hits.hits.0.inner_hits.inference_field.hits.hits.0._source.text: "another inference test" } + - not_exists: hits.hits.0.inner_hits.inference_field.hits.hits.0._source.embeddings + - match: { hits.hits.0.inner_hits.inference_field.hits.hits.1._source.text: "yet another inference test" } + - not_exists: hits.hits.0.inner_hits.inference_field.hits.hits.1._source.embeddings + - match: { hits.hits.0.inner_hits.inference_field.hits.hits.2._source.text: "inference test" } + - not_exists: hits.hits.0.inner_hits.inference_field.hits.hits.2._source.embeddings + + - do: + headers: + # Force JSON content type so that we use a parser that interprets the floating-point score as a double + Content-Type: application/json + search: + index: test-sparse-index + body: + query: + semantic: + field: "inference_field" + query: "inference test" + inner_hits: { + "size": 1 + } + + - match: { hits.total.value: 1 } + - match: { hits.hits.0._id: "doc_1" } + - close_to: { hits.hits.0._score: { value: 3.7837332e17, error: 1e10 } } + - length: { hits.hits.0._source.inference_field.inference.chunks: 3 } + - match: { hits.hits.0.inner_hits.inference_field.hits.total.value: 3 } + - length: { hits.hits.0.inner_hits.inference_field.hits.hits: 1 } + - match: { hits.hits.0.inner_hits.inference_field.hits.hits.0._source.text: "another inference test" } + - not_exists: hits.hits.0.inner_hits.inference_field.hits.hits.0._source.embeddings + + - do: + headers: + # Force JSON content type so that we use a parser that interprets the floating-point score as a double + Content-Type: application/json + search: + index: test-sparse-index + body: + query: + semantic: + field: "inference_field" + query: "inference test" + inner_hits: { + "from": 1 + } + + - match: { hits.total.value: 1 } + - match: { hits.hits.0._id: "doc_1" } + - close_to: { hits.hits.0._score: { value: 3.7837332e17, error: 1e10 } } + - length: { hits.hits.0._source.inference_field.inference.chunks: 3 } + - match: { hits.hits.0.inner_hits.inference_field.hits.total.value: 3 } + - length: { hits.hits.0.inner_hits.inference_field.hits.hits: 2 } + - match: { hits.hits.0.inner_hits.inference_field.hits.hits.0._source.text: "yet another inference test" } + - not_exists: hits.hits.0.inner_hits.inference_field.hits.hits.0._source.embeddings + - match: { hits.hits.0.inner_hits.inference_field.hits.hits.1._source.text: "inference test" } + - not_exists: hits.hits.0.inner_hits.inference_field.hits.hits.1._source.embeddings + + - do: + headers: + # Force JSON content type so that we use a parser that interprets the floating-point score as a double + Content-Type: application/json + search: + index: test-sparse-index + body: + query: + semantic: + field: "inference_field" + query: "inference test" + inner_hits: { + "from": 1, + "size": 1 + } + + - match: { hits.total.value: 1 } + - match: { hits.hits.0._id: "doc_1" } + - close_to: { hits.hits.0._score: { value: 3.7837332e17, error: 1e10 } } + - length: { hits.hits.0._source.inference_field.inference.chunks: 3 } + - match: { hits.hits.0.inner_hits.inference_field.hits.total.value: 3 } + - length: { hits.hits.0.inner_hits.inference_field.hits.hits: 1 } + - match: { hits.hits.0.inner_hits.inference_field.hits.hits.0._source.text: "yet another inference test" } + - not_exists: hits.hits.0.inner_hits.inference_field.hits.hits.0._source.embeddings + + - do: + headers: + # Force JSON content type so that we use a parser that interprets the floating-point score as a double + Content-Type: application/json + search: + index: test-sparse-index + body: + query: + semantic: + field: "inference_field" + query: "inference test" + inner_hits: { + "from": 3 + } + + - match: { hits.total.value: 1 } + - match: { hits.hits.0._id: "doc_1" } + - close_to: { hits.hits.0._score: { value: 3.7837332e17, error: 1e10 } } + - length: { hits.hits.0._source.inference_field.inference.chunks: 3 } + - match: { hits.hits.0.inner_hits.inference_field.hits.total.value: 0 } # Hits total drops to zero when you page off the end + - length: { hits.hits.0.inner_hits.inference_field.hits.hits: 0 } + --- "Numeric query using a sparse embedding model": - skip: @@ -250,6 +391,147 @@ setup: - close_to: { hits.hits.0._score: { value: 1.0, error: 0.0001 } } - length: { hits.hits.0._source.inference_field.inference.chunks: 2 } +--- +"Query using a dense embedding model and inner hits": + - requires: + cluster_features: "semantic_text.inner_hits" + reason: semantic_text inner hits support added in 8.16.0 + + - skip: + features: [ "headers", "close_to" ] + + - do: + index: + index: test-dense-index + id: doc_1 + body: + inference_field: ["inference test", "another inference test", "yet another inference test"] + non_inference_field: "non inference test" + refresh: true + + - do: + headers: + # Force JSON content type so that we use a parser that interprets the floating-point score as a double + Content-Type: application/json + search: + index: test-dense-index + body: + query: + semantic: + field: "inference_field" + query: "inference test" + inner_hits: {} + + - match: { hits.total.value: 1 } + - match: { hits.hits.0._id: "doc_1" } + - close_to: { hits.hits.0._score: { value: 1.0, error: 0.0001 } } + - length: { hits.hits.0._source.inference_field.inference.chunks: 3 } + - match: { hits.hits.0.inner_hits.inference_field.hits.total.value: 3 } + - length: { hits.hits.0.inner_hits.inference_field.hits.hits: 3 } + - match: { hits.hits.0.inner_hits.inference_field.hits.hits.0._source.text: "inference test" } + - not_exists: hits.hits.0.inner_hits.inference_field.hits.hits.0._source.embeddings + - match: { hits.hits.0.inner_hits.inference_field.hits.hits.1._source.text: "yet another inference test" } + - not_exists: hits.hits.0.inner_hits.inference_field.hits.hits.1._source.embeddings + - match: { hits.hits.0.inner_hits.inference_field.hits.hits.2._source.text: "another inference test" } + - not_exists: hits.hits.0.inner_hits.inference_field.hits.hits.2._source.embeddings + + - do: + headers: + # Force JSON content type so that we use a parser that interprets the floating-point score as a double + Content-Type: application/json + search: + index: test-dense-index + body: + query: + semantic: + field: "inference_field" + query: "inference test" + inner_hits: { + "size": 1 + } + + - match: { hits.total.value: 1 } + - match: { hits.hits.0._id: "doc_1" } + - close_to: { hits.hits.0._score: { value: 1.0, error: 0.0001 } } + - length: { hits.hits.0._source.inference_field.inference.chunks: 3 } + - match: { hits.hits.0.inner_hits.inference_field.hits.total.value: 3 } + - length: { hits.hits.0.inner_hits.inference_field.hits.hits: 1 } + - match: { hits.hits.0.inner_hits.inference_field.hits.hits.0._source.text: "inference test" } + - not_exists: hits.hits.0.inner_hits.inference_field.hits.hits.0._source.embeddings + + - do: + headers: + # Force JSON content type so that we use a parser that interprets the floating-point score as a double + Content-Type: application/json + search: + index: test-dense-index + body: + query: + semantic: + field: "inference_field" + query: "inference test" + inner_hits: { + "from": 1 + } + + - match: { hits.total.value: 1 } + - match: { hits.hits.0._id: "doc_1" } + - close_to: { hits.hits.0._score: { value: 1.0, error: 0.0001 } } + - length: { hits.hits.0._source.inference_field.inference.chunks: 3 } + - match: { hits.hits.0.inner_hits.inference_field.hits.total.value: 3 } + - length: { hits.hits.0.inner_hits.inference_field.hits.hits: 2 } + - match: { hits.hits.0.inner_hits.inference_field.hits.hits.0._source.text: "yet another inference test" } + - not_exists: hits.hits.0.inner_hits.inference_field.hits.hits.0._source.embeddings + - match: { hits.hits.0.inner_hits.inference_field.hits.hits.1._source.text: "another inference test" } + - not_exists: hits.hits.0.inner_hits.inference_field.hits.hits.1._source.embeddings + + - do: + headers: + # Force JSON content type so that we use a parser that interprets the floating-point score as a double + Content-Type: application/json + search: + index: test-dense-index + body: + query: + semantic: + field: "inference_field" + query: "inference test" + inner_hits: { + "from": 1, + "size": 1 + } + + - match: { hits.total.value: 1 } + - match: { hits.hits.0._id: "doc_1" } + - close_to: { hits.hits.0._score: { value: 1.0, error: 0.0001 } } + - length: { hits.hits.0._source.inference_field.inference.chunks: 3 } + - match: { hits.hits.0.inner_hits.inference_field.hits.total.value: 3 } + - length: { hits.hits.0.inner_hits.inference_field.hits.hits: 1 } + - match: { hits.hits.0.inner_hits.inference_field.hits.hits.0._source.text: "yet another inference test" } + - not_exists: hits.hits.0.inner_hits.inference_field.hits.hits.0._source.embeddings + + - do: + headers: + # Force JSON content type so that we use a parser that interprets the floating-point score as a double + Content-Type: application/json + search: + index: test-dense-index + body: + query: + semantic: + field: "inference_field" + query: "inference test" + inner_hits: { + "from": 3 + } + + - match: { hits.total.value: 1 } + - match: { hits.hits.0._id: "doc_1" } + - close_to: { hits.hits.0._score: { value: 1.0, error: 0.0001 } } + - length: { hits.hits.0._source.inference_field.inference.chunks: 3 } + - match: { hits.hits.0.inner_hits.inference_field.hits.total.value: 0 } # Hits total drops to zero when you page off the end + - length: { hits.hits.0.inner_hits.inference_field.hits.hits: 0 } + --- "Numeric query using a dense embedding model": - skip: @@ -478,6 +760,101 @@ setup: - close_to: { hits.hits.0._score: { value: 3.7837332e17, error: 1e10 } } - length: { hits.hits.0._source.inference_field.inference.chunks: 2 } +--- +"Query multiple semantic text fields with inner hits": + - requires: + cluster_features: "semantic_text.inner_hits" + reason: semantic_text inner hits support added in 8.16.0 + + - do: + indices.create: + index: test-multi-semantic-text-field-index + body: + mappings: + properties: + inference_field_1: + type: semantic_text + inference_id: sparse-inference-id + inference_field_2: + type: semantic_text + inference_id: sparse-inference-id + + - do: + index: + index: test-multi-semantic-text-field-index + id: doc_1 + body: + inference_field_1: [ "inference test 1", "another inference test 1" ] + inference_field_2: [ "inference test 2", "another inference test 2", "yet another inference test 2" ] + refresh: true + + - do: + search: + index: test-multi-semantic-text-field-index + body: + query: + bool: + must: + - semantic: + field: "inference_field_1" + query: "inference test" + inner_hits: { } + - semantic: + field: "inference_field_2" + query: "inference test" + inner_hits: { } + + - match: { hits.total.value: 1 } + - match: { hits.hits.0._id: "doc_1" } + - length: { hits.hits.0._source.inference_field_1.inference.chunks: 2 } + - length: { hits.hits.0._source.inference_field_2.inference.chunks: 3 } + - match: { hits.hits.0.inner_hits.inference_field_1.hits.total.value: 2 } + - length: { hits.hits.0.inner_hits.inference_field_1.hits.hits: 2 } + - match: { hits.hits.0.inner_hits.inference_field_2.hits.total.value: 3 } + - length: { hits.hits.0.inner_hits.inference_field_2.hits.hits: 3 } + +--- +"Query semantic text field in object with inner hits": + - requires: + cluster_features: "semantic_text.inner_hits" + reason: semantic_text inner hits support added in 8.16.0 + + - do: + indices.create: + index: test-semantic-text-in-object-index + body: + mappings: + properties: + container: + properties: + inference_field: + type: semantic_text + inference_id: sparse-inference-id + + - do: + index: + index: test-semantic-text-in-object-index + id: doc_1 + body: + container.inference_field: ["inference test", "another inference test", "yet another inference test"] + refresh: true + + - do: + search: + index: test-semantic-text-in-object-index + body: + query: + semantic: + field: "container.inference_field" + query: "inference test" + inner_hits: {} + + - match: { hits.total.value: 1 } + - match: { hits.hits.0._id: "doc_1" } + - exists: hits.hits.0.inner_hits.container\.inference_field + - match: { hits.hits.0.inner_hits.container\.inference_field.hits.total.value: 3 } + - length: { hits.hits.0.inner_hits.container\.inference_field.hits.hits: 3 } + --- "Query the wrong field type": - do: @@ -839,3 +1216,41 @@ setup: - match: { error.type: "resource_not_found_exception" } - match: { error.reason: "Inference endpoint not found [invalid-inference-id]" } + +--- +"Query using inner hits with invalid args": + - requires: + cluster_features: "semantic_text.inner_hits" + reason: semantic_text inner hits support added in 8.16.0 + + - do: + catch: bad_request + search: + index: test-sparse-index + body: + query: + semantic: + field: "inference_field" + query: "inference test" + inner_hits: { + "from": -1 + } + + - match: { error.root_cause.0.type: "illegal_argument_exception" } + - match: { error.root_cause.0.reason: "illegal from value, at least 0 or higher" } + + - do: + catch: bad_request + search: + index: test-sparse-index + body: + query: + semantic: + field: "inference_field" + query: "inference test" + inner_hits: { + "size": -1 + } + + - match: { error.root_cause.0.type: "illegal_argument_exception" } + - match: { error.root_cause.0.reason: "illegal size value, at least 0 or higher" } diff --git a/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/LogsDBPlugin.java b/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/LogsDBPlugin.java index 5cb7bf9e75252..5a70c2f4c5ab9 100644 --- a/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/LogsDBPlugin.java +++ b/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/LogsDBPlugin.java @@ -17,7 +17,7 @@ import java.util.Collection; import java.util.List; -import static org.elasticsearch.xpack.cluster.settings.ClusterSettings.CLUSTER_LOGSDB_ENABLED; +import static org.elasticsearch.xpack.logsdb.LogsdbIndexModeSettingsProvider.CLUSTER_LOGSDB_ENABLED; import static org.elasticsearch.xpack.logsdb.SyntheticSourceLicenseService.FALLBACK_SETTING; public class LogsDBPlugin extends Plugin { diff --git a/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/LogsdbIndexModeSettingsProvider.java b/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/LogsdbIndexModeSettingsProvider.java index 3f6bb66dfa438..4bef45f2103be 100644 --- a/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/LogsdbIndexModeSettingsProvider.java +++ b/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/LogsdbIndexModeSettingsProvider.java @@ -12,6 +12,7 @@ import org.elasticsearch.cluster.metadata.MetadataIndexTemplateService; import org.elasticsearch.common.compress.CompressedXContent; import org.elasticsearch.common.regex.Regex; +import org.elasticsearch.common.settings.Setting; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.IndexMode; import org.elasticsearch.index.IndexSettingProvider; @@ -21,9 +22,13 @@ import java.util.List; import java.util.Locale; -import static org.elasticsearch.xpack.cluster.settings.ClusterSettings.CLUSTER_LOGSDB_ENABLED; - final class LogsdbIndexModeSettingsProvider implements IndexSettingProvider { + static final Setting CLUSTER_LOGSDB_ENABLED = Setting.boolSetting( + "cluster.logsdb.enabled", + false, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); private static final String LOGS_PATTERN = "logs-*-*"; private volatile boolean isLogsdbEnabled; diff --git a/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/SyntheticSourceIndexSettingsProvider.java b/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/SyntheticSourceIndexSettingsProvider.java index 5b7792de0622a..6ffd76566ae82 100644 --- a/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/SyntheticSourceIndexSettingsProvider.java +++ b/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/SyntheticSourceIndexSettingsProvider.java @@ -43,8 +43,11 @@ public Settings getAdditionalIndexSettings( Settings indexTemplateAndCreateRequestSettings, List combinedTemplateMappings ) { + // This index name is used when validating component and index templates, we should skip this check in that case. + // (See MetadataIndexTemplateService#validateIndexTemplateV2(...) method) + boolean isTemplateValidation = "validate-index-name".equals(indexName); if (newIndexHasSyntheticSourceUsage(indexTemplateAndCreateRequestSettings) - && syntheticSourceLicenseService.fallbackToStoredSource()) { + && syntheticSourceLicenseService.fallbackToStoredSource(isTemplateValidation)) { LOGGER.debug("creation of index [{}] with synthetic source without it being allowed", indexName); // TODO: handle falling back to stored source } diff --git a/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/SyntheticSourceLicenseService.java b/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/SyntheticSourceLicenseService.java index 4e3e916762fab..e62fd6a998ee3 100644 --- a/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/SyntheticSourceLicenseService.java +++ b/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/SyntheticSourceLicenseService.java @@ -46,12 +46,16 @@ public SyntheticSourceLicenseService(Settings settings) { /** * @return whether synthetic source mode should fallback to stored source. */ - public boolean fallbackToStoredSource() { + public boolean fallbackToStoredSource(boolean isTemplateValidation) { if (syntheticSourceFallback) { return true; } - return SYNTHETIC_SOURCE_FEATURE.check(licenseState) == false; + if (isTemplateValidation) { + return SYNTHETIC_SOURCE_FEATURE.checkWithoutTracking(licenseState) == false; + } else { + return SYNTHETIC_SOURCE_FEATURE.check(licenseState) == false; + } } void setSyntheticSourceFallback(boolean syntheticSourceFallback) { diff --git a/x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/SyntheticSourceLicenseServiceTests.java b/x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/SyntheticSourceLicenseServiceTests.java index 2ca3a8d57f2eb..430ee75eb3561 100644 --- a/x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/SyntheticSourceLicenseServiceTests.java +++ b/x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/SyntheticSourceLicenseServiceTests.java @@ -10,6 +10,7 @@ import org.elasticsearch.common.settings.Settings; import org.elasticsearch.license.MockLicenseState; import org.elasticsearch.test.ESTestCase; +import org.mockito.Mockito; import static org.mockito.ArgumentMatchers.any; import static org.mockito.Mockito.mock; @@ -22,7 +23,17 @@ public void testLicenseAllowsSyntheticSource() { when(licenseState.isAllowed(any())).thenReturn(true); var licenseService = new SyntheticSourceLicenseService(Settings.EMPTY); licenseService.setLicenseState(licenseState); - assertFalse("synthetic source is allowed, so not fallback to stored source", licenseService.fallbackToStoredSource()); + assertFalse("synthetic source is allowed, so not fallback to stored source", licenseService.fallbackToStoredSource(false)); + Mockito.verify(licenseState, Mockito.times(1)).featureUsed(any()); + } + + public void testLicenseAllowsSyntheticSourceTemplateValidation() { + MockLicenseState licenseState = mock(MockLicenseState.class); + when(licenseState.isAllowed(any())).thenReturn(true); + var licenseService = new SyntheticSourceLicenseService(Settings.EMPTY); + licenseService.setLicenseState(licenseState); + assertFalse("synthetic source is allowed, so not fallback to stored source", licenseService.fallbackToStoredSource(true)); + Mockito.verify(licenseState, Mockito.never()).featureUsed(any()); } public void testDefaultDisallow() { @@ -30,7 +41,8 @@ public void testDefaultDisallow() { when(licenseState.isAllowed(any())).thenReturn(false); var licenseService = new SyntheticSourceLicenseService(Settings.EMPTY); licenseService.setLicenseState(licenseState); - assertTrue("synthetic source is not allowed, so fallback to stored source", licenseService.fallbackToStoredSource()); + assertTrue("synthetic source is not allowed, so fallback to stored source", licenseService.fallbackToStoredSource(false)); + Mockito.verify(licenseState, Mockito.never()).featureUsed(any()); } public void testFallback() { @@ -41,8 +53,9 @@ public void testFallback() { licenseService.setSyntheticSourceFallback(true); assertTrue( "synthetic source is allowed, but fallback has been enabled, so fallback to stored source", - licenseService.fallbackToStoredSource() + licenseService.fallbackToStoredSource(false) ); + Mockito.verifyNoInteractions(licenseState); } } diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/inference/RestDeleteTrainedModelAction.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/inference/RestDeleteTrainedModelAction.java index e010bd67dff75..ad3e752a3ea3e 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/inference/RestDeleteTrainedModelAction.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/inference/RestDeleteTrainedModelAction.java @@ -8,8 +8,8 @@ import org.elasticsearch.action.support.master.AcknowledgedRequest; import org.elasticsearch.client.internal.node.NodeClient; -import org.elasticsearch.core.RestApiVersion; import org.elasticsearch.core.TimeValue; +import org.elasticsearch.core.UpdateForV9; import org.elasticsearch.rest.BaseRestHandler; import org.elasticsearch.rest.RestRequest; import org.elasticsearch.rest.Scope; @@ -28,11 +28,20 @@ @ServerlessScope(Scope.PUBLIC) public class RestDeleteTrainedModelAction extends BaseRestHandler { + @UpdateForV9 + // one or more routes use ".replaces" with RestApiVersion.V_8 which will require use of REST API compatibility headers to access + // that route in v9. It is unclear if this was intentional for v9, and the code has been updated to ".deprecateAndKeep" which will + // continue to emit deprecations warnings but will not require any special headers to access the API in v9. + // Please review and update the code and tests as needed. The original code remains commented out below for reference. @Override public List routes() { return List.of( - Route.builder(DELETE, BASE_PATH + "trained_models/{" + TrainedModelConfig.MODEL_ID + "}") - .replaces(DELETE, BASE_PATH + "inference/{" + TrainedModelConfig.MODEL_ID + "}", RestApiVersion.V_8) + // Route.builder(DELETE, BASE_PATH + "trained_models/{" + TrainedModelConfig.MODEL_ID + "}") + // .replaces(DELETE, BASE_PATH + "inference/{" + TrainedModelConfig.MODEL_ID + "}", RestApiVersion.V_8) + // .build() + new Route(DELETE, BASE_PATH + "trained_models/{" + TrainedModelConfig.MODEL_ID + "}"), + Route.builder(DELETE, BASE_PATH + "inference/{" + TrainedModelConfig.MODEL_ID + "}") + .deprecateAndKeep("Use the trained_models API instead.") .build() ); } diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/inference/RestGetTrainedModelsAction.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/inference/RestGetTrainedModelsAction.java index ae7b26ebad0e4..dfbe375f0d1fc 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/inference/RestGetTrainedModelsAction.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/inference/RestGetTrainedModelsAction.java @@ -11,7 +11,7 @@ import org.elasticsearch.common.Strings; import org.elasticsearch.common.logging.DeprecationCategory; import org.elasticsearch.common.logging.DeprecationLogger; -import org.elasticsearch.core.RestApiVersion; +import org.elasticsearch.core.UpdateForV9; import org.elasticsearch.rest.BaseRestHandler; import org.elasticsearch.rest.RestChannel; import org.elasticsearch.rest.RestRequest; @@ -49,13 +49,24 @@ public class RestGetTrainedModelsAction extends BaseRestHandler { private static final DeprecationLogger deprecationLogger = DeprecationLogger.getLogger(RestGetTrainedModelsAction.class); private static final String INCLUDE_MODEL_DEFINITION = "include_model_definition"; + @UpdateForV9 + // one or more routes use ".replaces" with RestApiVersion.V_8 which will require use of REST API compatibility headers to access + // that route in v9. It is unclear if this was intentional for v9, and the code has been updated to ".deprecateAndKeep" which will + // continue to emit deprecations warnings but will not require any special headers to access the API in v9. + // Please review and update the code and tests as needed. The original code remains commented out below for reference. @Override public List routes() { return List.of( - Route.builder(GET, BASE_PATH + "trained_models/{" + TrainedModelConfig.MODEL_ID + "}") - .replaces(GET, BASE_PATH + "inference/{" + TrainedModelConfig.MODEL_ID + "}", RestApiVersion.V_8) + // Route.builder(GET, BASE_PATH + "trained_models/{" + TrainedModelConfig.MODEL_ID + "}") + // .replaces(GET, BASE_PATH + "inference/{" + TrainedModelConfig.MODEL_ID + "}", RestApiVersion.V_8) + // .build(), + // Route.builder(GET, BASE_PATH + "trained_models").replaces(GET, BASE_PATH + "inference", RestApiVersion.V_8).build() + new Route(GET, BASE_PATH + "trained_models/{" + TrainedModelConfig.MODEL_ID + "}"), + Route.builder(GET, BASE_PATH + "inference/{" + TrainedModelConfig.MODEL_ID + "}") + .deprecateAndKeep("Use the trained_models API instead.") .build(), - Route.builder(GET, BASE_PATH + "trained_models").replaces(GET, BASE_PATH + "inference", RestApiVersion.V_8).build() + new Route(GET, BASE_PATH + "trained_models"), + Route.builder(GET, BASE_PATH + "inference").deprecateAndKeep("Use the trained_models API instead.").build() ); } diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/inference/RestGetTrainedModelsStatsAction.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/inference/RestGetTrainedModelsStatsAction.java index 9c44728fb75e2..3c192d80f7485 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/inference/RestGetTrainedModelsStatsAction.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/inference/RestGetTrainedModelsStatsAction.java @@ -9,7 +9,7 @@ import org.elasticsearch.client.internal.node.NodeClient; import org.elasticsearch.cluster.metadata.Metadata; import org.elasticsearch.common.Strings; -import org.elasticsearch.core.RestApiVersion; +import org.elasticsearch.core.UpdateForV9; import org.elasticsearch.rest.BaseRestHandler; import org.elasticsearch.rest.RestRequest; import org.elasticsearch.rest.Scope; @@ -30,15 +30,26 @@ @ServerlessScope(Scope.PUBLIC) public class RestGetTrainedModelsStatsAction extends BaseRestHandler { + @UpdateForV9 + // one or more routes use ".replaces" with RestApiVersion.V_8 which will require use of REST API compatibility headers to access + // that route in v9. It is unclear if this was intentional for v9, and the code has been updated to ".deprecateAndKeep" which will + // continue to emit deprecations warnings but will not require any special headers to access the API in v9. + // Please review and update the code and tests as needed. The original code remains commented out below for reference. @Override public List routes() { return List.of( - Route.builder(GET, BASE_PATH + "trained_models/{" + TrainedModelConfig.MODEL_ID + "}/_stats") - .replaces(GET, BASE_PATH + "inference/{" + TrainedModelConfig.MODEL_ID + "}/_stats", RestApiVersion.V_8) + // Route.builder(GET, BASE_PATH + "trained_models/{" + TrainedModelConfig.MODEL_ID + "}/_stats") + // .replaces(GET, BASE_PATH + "inference/{" + TrainedModelConfig.MODEL_ID + "}/_stats", RestApiVersion.V_8) + // .build(), + // Route.builder(GET, BASE_PATH + "trained_models/_stats") + // .replaces(GET, BASE_PATH + "inference/_stats", RestApiVersion.V_8) + // .build() + new Route(GET, BASE_PATH + "trained_models/{" + TrainedModelConfig.MODEL_ID + "}/_stats"), + Route.builder(GET, BASE_PATH + "inference/{" + TrainedModelConfig.MODEL_ID + "}/_stats") + .deprecateAndKeep("Use the trained_models API instead.") .build(), - Route.builder(GET, BASE_PATH + "trained_models/_stats") - .replaces(GET, BASE_PATH + "inference/_stats", RestApiVersion.V_8) - .build() + new Route(GET, BASE_PATH + "trained_models/_stats"), + Route.builder(GET, BASE_PATH + "inference/_stats").deprecateAndKeep("Use the trained_models API instead.").build() ); } diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/inference/RestInferTrainedModelDeploymentAction.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/inference/RestInferTrainedModelDeploymentAction.java index 61f319a2157c4..7327f7426e00c 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/inference/RestInferTrainedModelDeploymentAction.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/inference/RestInferTrainedModelDeploymentAction.java @@ -10,8 +10,8 @@ import org.elasticsearch.action.ActionListener; import org.elasticsearch.client.internal.node.NodeClient; import org.elasticsearch.common.ValidationException; -import org.elasticsearch.core.RestApiVersion; import org.elasticsearch.core.TimeValue; +import org.elasticsearch.core.UpdateForV9; import org.elasticsearch.rest.BaseRestHandler; import org.elasticsearch.rest.RestRequest; import org.elasticsearch.rest.action.RestCancellableNodeClient; @@ -37,11 +37,29 @@ public String getName() { return "xpack_ml_infer_trained_models_deployment_action"; } + @UpdateForV9 // these routes were ".deprecated" in RestApiVersion.V_8 which will require use of REST API compatibility headers to access + // this API in v9. It is unclear if this was intentional for v9, and the code has been updated to ".deprecateAndKeep" which will + // continue to emit deprecations warnings but will not require any special headers to access the API in v9. + // Please review and update the code and tests as needed. The original code remains commented out below for reference. @Override public List routes() { return Collections.singletonList( + // Route.builder(POST, PATH) + // .deprecated( + // "[" + // + POST.name() + // + " " + // + PATH + // + "] is deprecated! Use [" + // + POST.name() + // + " " + // + RestInferTrainedModelAction.PATH + // + "] instead.", + // RestApiVersion.V_8 + // ) + // .build() Route.builder(POST, PATH) - .deprecated( + .deprecateAndKeep( "[" + POST.name() + " " @@ -50,8 +68,7 @@ public List routes() { + POST.name() + " " + RestInferTrainedModelAction.PATH - + "] instead.", - RestApiVersion.V_8 + + "] instead." ) .build() ); diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/inference/RestPutTrainedModelAction.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/inference/RestPutTrainedModelAction.java index e57d5912752d2..13d46b8878679 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/inference/RestPutTrainedModelAction.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/inference/RestPutTrainedModelAction.java @@ -7,7 +7,7 @@ package org.elasticsearch.xpack.ml.rest.inference; import org.elasticsearch.client.internal.node.NodeClient; -import org.elasticsearch.core.RestApiVersion; +import org.elasticsearch.core.UpdateForV9; import org.elasticsearch.rest.BaseRestHandler; import org.elasticsearch.rest.RestRequest; import org.elasticsearch.rest.Scope; @@ -27,11 +27,20 @@ @ServerlessScope(Scope.PUBLIC) public class RestPutTrainedModelAction extends BaseRestHandler { + @UpdateForV9 + // one or more routes use ".replaces" with RestApiVersion.V_8 which will require use of REST API compatibility headers to access + // that route in v9. It is unclear if this was intentional for v9, and the code has been updated to ".deprecateAndKeep" which will + // continue to emit deprecations warnings but will not require any special headers to access the API in v9. + // Please review and update the code and tests as needed. The original code remains commented out below for reference. @Override public List routes() { return List.of( - Route.builder(PUT, BASE_PATH + "trained_models/{" + TrainedModelConfig.MODEL_ID + "}") - .replaces(PUT, BASE_PATH + "inference/{" + TrainedModelConfig.MODEL_ID + "}", RestApiVersion.V_8) + // Route.builder(PUT, BASE_PATH + "trained_models/{" + TrainedModelConfig.MODEL_ID + "}") + // .replaces(PUT, BASE_PATH + "inference/{" + TrainedModelConfig.MODEL_ID + "}", RestApiVersion.V_8) + // .build() + new Route(PUT, BASE_PATH + "trained_models/{" + TrainedModelConfig.MODEL_ID + "}"), + Route.builder(PUT, BASE_PATH + "inference/{" + TrainedModelConfig.MODEL_ID + "}") + .deprecateAndKeep("Use the trained_models API instead.") .build() ); } diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/job/RestPostDataAction.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/job/RestPostDataAction.java index 3d7b2d392836a..41462b016a60a 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/job/RestPostDataAction.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/job/RestPostDataAction.java @@ -8,6 +8,7 @@ import org.elasticsearch.client.internal.node.NodeClient; import org.elasticsearch.core.RestApiVersion; +import org.elasticsearch.core.UpdateForV9; import org.elasticsearch.rest.BaseRestHandler; import org.elasticsearch.rest.RestRequest; import org.elasticsearch.rest.RestStatus; @@ -27,12 +28,17 @@ public class RestPostDataAction extends BaseRestHandler { private static final String DEFAULT_RESET_START = ""; private static final String DEFAULT_RESET_END = ""; + @UpdateForV9 // these routes were ".deprecated" in RestApiVersion.V_8 which will require use of REST API compatibility headers to access + // this API in v9. It is unclear if this was intentional for v9, and the code has been updated to ".deprecateAndKeep" which will + // continue to emit deprecations warnings but will not require any special headers to access the API in v9. + // Please review and update the code and tests as needed. The original code remains commented out below for reference. @Override public List routes() { final String msg = "Posting data directly to anomaly detection jobs is deprecated, " + "in a future major version it will be compulsory to use a datafeed"; return List.of( - Route.builder(POST, BASE_PATH + "anomaly_detectors/{" + Job.ID + "}/_data").deprecated(msg, RestApiVersion.V_8).build(), + // Route.builder(POST, BASE_PATH + "anomaly_detectors/{" + Job.ID + "}/_data").deprecated(msg, RestApiVersion.V_8).build(), + Route.builder(POST, BASE_PATH + "anomaly_detectors/{" + Job.ID + "}/_data").deprecateAndKeep(msg).build(), Route.builder(POST, PRE_V7_BASE_PATH + "anomaly_detectors/{" + Job.ID + "}/_data").deprecated(msg, RestApiVersion.V_7).build() ); } diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/adaptiveallocations/AdaptiveAllocationsScalerTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/adaptiveallocations/AdaptiveAllocationsScalerTests.java index 44aaba88c58a8..7d98aaf67a7f3 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/adaptiveallocations/AdaptiveAllocationsScalerTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/adaptiveallocations/AdaptiveAllocationsScalerTests.java @@ -148,6 +148,8 @@ public void testAutoscaling_maxAllocationsSafeguard() { } public void testAutoscaling_scaleDownToZeroAllocations() { + assumeTrue("Should only run if adaptive allocations feature flag is enabled", ScaleToZeroFeatureFlag.isEnabled()); + AdaptiveAllocationsScaler adaptiveAllocationsScaler = new AdaptiveAllocationsScaler("test-deployment", 1); // 1 hour with 1 request per 1 seconds, so don't scale. for (int i = 0; i < 3600; i++) { diff --git a/x-pack/plugin/monitoring/src/test/java/org/elasticsearch/xpack/monitoring/action/TransportMonitoringBulkActionTests.java b/x-pack/plugin/monitoring/src/test/java/org/elasticsearch/xpack/monitoring/action/TransportMonitoringBulkActionTests.java index cd05c9bf0d754..33470a35486a1 100644 --- a/x-pack/plugin/monitoring/src/test/java/org/elasticsearch/xpack/monitoring/action/TransportMonitoringBulkActionTests.java +++ b/x-pack/plugin/monitoring/src/test/java/org/elasticsearch/xpack/monitoring/action/TransportMonitoringBulkActionTests.java @@ -126,10 +126,7 @@ public void testExecuteWithGlobalBlock() throws Exception { final MonitoringBulkRequest request = randomRequest(); assertThat( - asInstanceOf( - ClusterBlockException.class, - safeAwaitFailure(MonitoringBulkResponse.class, l -> action.execute(null, request, l)) - ), + safeAwaitFailure(ClusterBlockException.class, MonitoringBulkResponse.class, l -> action.execute(null, request, l)), hasToString(containsString("ClusterBlockException: blocked by: [SERVICE_UNAVAILABLE/2/no master]")) ); } @@ -175,9 +172,10 @@ public void testExecuteEmptyRequest() { ); assertThat( - asInstanceOf( + safeAwaitFailure( ActionRequestValidationException.class, - safeAwaitFailure(MonitoringBulkResponse.class, l -> action.execute(null, new MonitoringBulkRequest(), l)) + MonitoringBulkResponse.class, + l -> action.execute(null, new MonitoringBulkRequest(), l) ), hasToString(containsString("no monitoring documents added")) ); diff --git a/x-pack/plugin/monitoring/src/test/java/org/elasticsearch/xpack/monitoring/collector/cluster/ClusterStatsMonitoringDocTests.java b/x-pack/plugin/monitoring/src/test/java/org/elasticsearch/xpack/monitoring/collector/cluster/ClusterStatsMonitoringDocTests.java index 6afea6faa607e..73ceafc0a24b9 100644 --- a/x-pack/plugin/monitoring/src/test/java/org/elasticsearch/xpack/monitoring/collector/cluster/ClusterStatsMonitoringDocTests.java +++ b/x-pack/plugin/monitoring/src/test/java/org/elasticsearch/xpack/monitoring/collector/cluster/ClusterStatsMonitoringDocTests.java @@ -434,7 +434,8 @@ public void testToXContent() throws IOException { MappingStats.of(metadata, () -> {}), AnalysisStats.of(metadata, () -> {}), VersionStats.of(metadata, singletonList(mockNodeResponse)), - ClusterSnapshotStats.EMPTY + ClusterSnapshotStats.EMPTY, + null ); final MonitoringDoc.Node node = new MonitoringDoc.Node("_uuid", "_host", "_addr", "_ip", "_name", 1504169190855L); diff --git a/x-pack/plugin/ql/src/main/java/org/elasticsearch/xpack/ql/plan/TableIdentifier.java b/x-pack/plugin/ql/src/main/java/org/elasticsearch/xpack/ql/plan/TableIdentifier.java index 188bd4cce9c13..ad3322ce4501d 100644 --- a/x-pack/plugin/ql/src/main/java/org/elasticsearch/xpack/ql/plan/TableIdentifier.java +++ b/x-pack/plugin/ql/src/main/java/org/elasticsearch/xpack/ql/plan/TableIdentifier.java @@ -10,6 +10,8 @@ import java.util.Objects; +import static org.elasticsearch.transport.RemoteClusterAware.REMOTE_CLUSTER_INDEX_SEPARATOR; + public class TableIdentifier { private final Source source; @@ -55,7 +57,7 @@ public Source source() { } public String qualifiedIndex() { - return cluster != null ? cluster + ":" + index : index; + return cluster != null ? cluster + REMOTE_CLUSTER_INDEX_SEPARATOR + index : index; } @Override @@ -63,7 +65,7 @@ public String toString() { StringBuilder builder = new StringBuilder(); if (cluster != null) { builder.append(cluster); - builder.append(":"); + builder.append(REMOTE_CLUSTER_INDEX_SEPARATOR); } builder.append(index); return builder.toString(); diff --git a/x-pack/plugin/ql/src/main/java/org/elasticsearch/xpack/ql/util/StringUtils.java b/x-pack/plugin/ql/src/main/java/org/elasticsearch/xpack/ql/util/StringUtils.java index dad3c8574dc4a..f03e3a111d189 100644 --- a/x-pack/plugin/ql/src/main/java/org/elasticsearch/xpack/ql/util/StringUtils.java +++ b/x-pack/plugin/ql/src/main/java/org/elasticsearch/xpack/ql/util/StringUtils.java @@ -14,6 +14,7 @@ import org.elasticsearch.common.network.InetAddresses; import org.elasticsearch.core.Tuple; import org.elasticsearch.search.builder.SearchSourceBuilder; +import org.elasticsearch.transport.RemoteClusterAware; import org.elasticsearch.xcontent.ToXContent; import org.elasticsearch.xcontent.XContentBuilder; import org.elasticsearch.xcontent.XContentFactory; @@ -27,7 +28,6 @@ import java.util.StringJoiner; import static java.util.stream.Collectors.toList; -import static org.elasticsearch.transport.RemoteClusterAware.REMOTE_CLUSTER_INDEX_SEPARATOR; import static org.elasticsearch.transport.RemoteClusterAware.buildRemoteIndexName; import static org.elasticsearch.xpack.ql.util.NumericUtils.isUnsignedLong; @@ -375,10 +375,8 @@ public static String ordinal(int i) { } public static Tuple splitQualifiedIndex(String indexName) { - int separatorOffset = indexName.indexOf(REMOTE_CLUSTER_INDEX_SEPARATOR); - return separatorOffset > 0 - ? Tuple.tuple(indexName.substring(0, separatorOffset), indexName.substring(separatorOffset + 1)) - : Tuple.tuple(null, indexName); + String[] split = RemoteClusterAware.splitIndexName(indexName); + return Tuple.tuple(split[0], split[1]); } public static String qualifyAndJoinIndices(String cluster, String[] indices) { @@ -390,6 +388,6 @@ public static String qualifyAndJoinIndices(String cluster, String[] indices) { } public static boolean isQualified(String indexWildcard) { - return indexWildcard.indexOf(REMOTE_CLUSTER_INDEX_SEPARATOR) > 0; + return RemoteClusterAware.isRemoteIndexName(indexWildcard); } } diff --git a/x-pack/plugin/redact/src/main/java/org/elasticsearch/xpack/redact/RedactProcessor.java b/x-pack/plugin/redact/src/main/java/org/elasticsearch/xpack/redact/RedactProcessor.java index 04a423c7ea330..187126fb31e3e 100644 --- a/x-pack/plugin/redact/src/main/java/org/elasticsearch/xpack/redact/RedactProcessor.java +++ b/x-pack/plugin/redact/src/main/java/org/elasticsearch/xpack/redact/RedactProcessor.java @@ -55,6 +55,12 @@ public class RedactProcessor extends AbstractProcessor { private static final String DEFAULT_REDACTED_START = "<"; private static final String DEFAULT_REDACTED_END = ">"; + protected static final String REDACT_KEY = "_redact"; + protected static final String IS_REDACTED_KEY = "_is_redacted"; + protected static final String METADATA_PATH_REDACT = IngestDocument.INGEST_KEY + "." + REDACT_KEY; + // indicates if document has been redacted, path: _ingest._redact._is_redacted + protected static final String METADATA_PATH_REDACT_IS_REDACTED = METADATA_PATH_REDACT + "." + IS_REDACTED_KEY; + private final String redactField; private final List groks; private final boolean ignoreMissing; @@ -65,6 +71,8 @@ public class RedactProcessor extends AbstractProcessor { private final XPackLicenseState licenseState; private final boolean skipIfUnlicensed; + private final boolean traceRedact; + RedactProcessor( String tag, String description, @@ -76,7 +84,8 @@ public class RedactProcessor extends AbstractProcessor { String redactedEndToken, MatcherWatchdog matcherWatchdog, XPackLicenseState licenseState, - boolean skipIfUnlicensed + boolean skipIfUnlicensed, + boolean traceRedact ) { super(tag, description); this.redactField = redactField; @@ -94,6 +103,7 @@ public class RedactProcessor extends AbstractProcessor { } this.licenseState = licenseState; this.skipIfUnlicensed = skipIfUnlicensed; + this.traceRedact = traceRedact; } @Override @@ -128,6 +138,8 @@ public IngestDocument execute(IngestDocument ingestDocument) { try { String redacted = matchRedact(fieldValue, groks, redactedStartToken, redactedEndToken); ingestDocument.setFieldValue(redactField, redacted); + updateMetadataIfNecessary(ingestDocument, fieldValue, redacted); + return ingestDocument; } catch (RuntimeException e) { // grok throws a RuntimeException when the watchdog interrupts the match @@ -203,6 +215,21 @@ private static void matchRepeat(Grok grok, byte[] utf8Bytes, RegionTrackingMatch } while (offset != length); } + private void updateMetadataIfNecessary(IngestDocument ingestDocument, String fieldValue, String redacted) { + if (traceRedact == false || fieldValue == null) { + return; + } + + Boolean isRedactedMetadata = ingestDocument.getFieldValue(METADATA_PATH_REDACT_IS_REDACTED, Boolean.class, true); + boolean alreadyRedacted = Boolean.TRUE.equals(isRedactedMetadata); + boolean isRedacted = fieldValue.equals(redacted) == false; + + // document newly redacted + if (alreadyRedacted == false && isRedacted) { + ingestDocument.setFieldValue(METADATA_PATH_REDACT_IS_REDACTED, true); + } + } + /** * A Grok capture extractor which tracks matched regions * and the Grok pattern name for redaction later. @@ -389,6 +416,8 @@ public RedactProcessor create( String redactStart = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "prefix", DEFAULT_REDACTED_START); String redactEnd = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "suffix", DEFAULT_REDACTED_END); + boolean traceRedact = ConfigurationUtils.readBooleanProperty(TYPE, processorTag, config, "trace_redact", false); + if (matchPatterns == null || matchPatterns.isEmpty()) { throw newConfigurationException(TYPE, processorTag, "patterns", "List of patterns must not be empty"); } @@ -406,7 +435,8 @@ public RedactProcessor create( redactEnd, matcherWatchdog, licenseState, - skipIfUnlicensed + skipIfUnlicensed, + traceRedact ); } catch (Exception e) { throw newConfigurationException( diff --git a/x-pack/plugin/redact/src/test/java/org/elasticsearch/xpack/redact/RedactProcessorFactoryTests.java b/x-pack/plugin/redact/src/test/java/org/elasticsearch/xpack/redact/RedactProcessorFactoryTests.java index 376e7caa8137d..affcc72614aa8 100644 --- a/x-pack/plugin/redact/src/test/java/org/elasticsearch/xpack/redact/RedactProcessorFactoryTests.java +++ b/x-pack/plugin/redact/src/test/java/org/elasticsearch/xpack/redact/RedactProcessorFactoryTests.java @@ -68,6 +68,7 @@ public void testConfigKeysRemoved() throws Exception { config.put("patterns", List.of("%{MY_PATTERN:name}!")); config.put("pattern_definitions", Map.of("MY_PATTERN", "foo")); config.put("ignore_missing", true); + config.put("trace_redact", true); config.put("extra", "unused"); factory.create(null, null, null, config); diff --git a/x-pack/plugin/redact/src/test/java/org/elasticsearch/xpack/redact/RedactProcessorTests.java b/x-pack/plugin/redact/src/test/java/org/elasticsearch/xpack/redact/RedactProcessorTests.java index a775adb7a4c15..3f44957201ef0 100644 --- a/x-pack/plugin/redact/src/test/java/org/elasticsearch/xpack/redact/RedactProcessorTests.java +++ b/x-pack/plugin/redact/src/test/java/org/elasticsearch/xpack/redact/RedactProcessorTests.java @@ -259,7 +259,8 @@ public void testLicenseChecks() throws Exception { ">", MatcherWatchdog.noop(), notAllowed, - false // set skip_if_unlicensed to false, we do not want to skip, we do want to fail + false, // set skip_if_unlicensed to false, we do not want to skip, we do want to fail + false ); assertThat(processor.getSkipIfUnlicensed(), equalTo(false)); var ingestDoc = createIngestDoc(Map.of("not_the_field", "fieldValue")); @@ -314,6 +315,118 @@ public void testLicenseChanges() throws Exception { } } + @SuppressWarnings("unchecked") + public void testTraceRedact() throws Exception { + var config = new HashMap(); + config.put("field", "to_redact"); + config.put("patterns", List.of("%{EMAILADDRESS:REDACTED}")); + config.put("trace_redact", true); + { + var processor = new RedactProcessor.Factory(mockLicenseState(), MatcherWatchdog.noop()).create( + null, + "t", + "d", + new HashMap<>(config) + ); + var message = "this should not be redacted"; + var ingestDoc = createIngestDoc(Map.of("to_redact", message)); + var redactedDoc = processor.execute(ingestDoc); + + assertEquals(message, redactedDoc.getFieldValue("to_redact", String.class)); + assertNull(redactedDoc.getFieldValue(RedactProcessor.METADATA_PATH_REDACT_IS_REDACTED, Boolean.class, true)); + } + { + var processor = new RedactProcessor.Factory(mockLicenseState(), MatcherWatchdog.noop()).create( + null, + "t", + "d", + new HashMap<>(config) + ); + var ingestDoc = createIngestDoc(Map.of("to_redact", "thisisanemail@address.com will be redacted")); + var redactedDoc = processor.execute(ingestDoc); + + assertEquals(" will be redacted", redactedDoc.getFieldValue("to_redact", String.class)); + // validate ingest metadata path correctly resolved + assertTrue(redactedDoc.getFieldValue(RedactProcessor.METADATA_PATH_REDACT_IS_REDACTED, Boolean.class)); + // validate ingest metadata structure correct + var ingestMeta = redactedDoc.getIngestMetadata(); + assertTrue(ingestMeta.containsKey(RedactProcessor.REDACT_KEY)); + var redactMetadata = (HashMap) ingestMeta.get(RedactProcessor.REDACT_KEY); + assertTrue(redactMetadata.containsKey(RedactProcessor.IS_REDACTED_KEY)); + assertTrue((Boolean) redactMetadata.get(RedactProcessor.IS_REDACTED_KEY)); + } + { + var configNoTrace = new HashMap(); + configNoTrace.put("field", "to_redact"); + configNoTrace.put("patterns", List.of("%{EMAILADDRESS:REDACTED}")); + + var processor = new RedactProcessor.Factory(mockLicenseState(), MatcherWatchdog.noop()).create(null, "t", "d", configNoTrace); + var ingestDoc = createIngestDoc(Map.of("to_redact", "thisisanemail@address.com will be redacted")); + var redactedDoc = processor.execute(ingestDoc); + + assertEquals(" will be redacted", redactedDoc.getFieldValue("to_redact", String.class)); + assertNull(redactedDoc.getFieldValue(RedactProcessor.METADATA_PATH_REDACT_IS_REDACTED, Boolean.class, true)); + } + } + + public void testTraceRedactMultipleProcessors() throws Exception { + var configRedact = new HashMap(); + configRedact.put("field", "to_redact"); + configRedact.put("patterns", List.of("%{EMAILADDRESS:REDACTED}")); + configRedact.put("trace_redact", true); + + var configNoRedact = new HashMap(); + configNoRedact.put("field", "to_redact"); + configNoRedact.put("patterns", List.of("%{IP:REDACTED}")); // not in the doc + configNoRedact.put("trace_redact", true); + + // first processor does not redact doc, second one does + { + var processorRedact = new RedactProcessor.Factory(mockLicenseState(), MatcherWatchdog.noop()).create( + null, + "t1", + "d", + new HashMap<>(configRedact) + ); + var processorNoRedact = new RedactProcessor.Factory(mockLicenseState(), MatcherWatchdog.noop()).create( + null, + "t2", + "d", + new HashMap<>(configNoRedact) + ); + var ingestDocWithEmail = createIngestDoc(Map.of("to_redact", "thisisanemail@address.com will be redacted")); + + var docNotRedacted = processorNoRedact.execute(ingestDocWithEmail); + assertNull(docNotRedacted.getFieldValue(RedactProcessor.METADATA_PATH_REDACT_IS_REDACTED, Boolean.class, true)); + + var docRedacted = processorRedact.execute(docNotRedacted); + assertTrue(docRedacted.getFieldValue(RedactProcessor.METADATA_PATH_REDACT_IS_REDACTED, Boolean.class)); + } + // first processor redacts doc, second one does not + { + var processorRedact = new RedactProcessor.Factory(mockLicenseState(), MatcherWatchdog.noop()).create( + null, + "t1", + "d", + new HashMap<>(configRedact) + ); + var processorNoRedact = new RedactProcessor.Factory(mockLicenseState(), MatcherWatchdog.noop()).create( + null, + "t2", + "d", + new HashMap<>(configNoRedact) + ); + var ingestDocWithEmail = createIngestDoc(Map.of("to_redact", "thisisanemail@address.com will be redacted")); + + var docRedacted = processorRedact.execute(ingestDocWithEmail); + assertTrue(docRedacted.getFieldValue(RedactProcessor.METADATA_PATH_REDACT_IS_REDACTED, Boolean.class)); + + // validate does not override already redacted doc metadata + var docRedactedAlready = processorNoRedact.execute(docRedacted); + assertTrue(docRedactedAlready.getFieldValue(RedactProcessor.METADATA_PATH_REDACT_IS_REDACTED, Boolean.class)); + } + } + public void testMergeLongestRegion() { var r = List.of( new RedactProcessor.RegionTrackingMatchExtractor.Replacement(10, 20, "first"), diff --git a/x-pack/plugin/security/qa/audit/src/javaRestTest/java/org/elasticsearch/xpack/security/audit/AuditIT.java b/x-pack/plugin/security/qa/audit/src/javaRestTest/java/org/elasticsearch/xpack/security/audit/AuditIT.java index e6af9c634e72f..2c329db5e3b50 100644 --- a/x-pack/plugin/security/qa/audit/src/javaRestTest/java/org/elasticsearch/xpack/security/audit/AuditIT.java +++ b/x-pack/plugin/security/qa/audit/src/javaRestTest/java/org/elasticsearch/xpack/security/audit/AuditIT.java @@ -8,9 +8,7 @@ package org.elasticsearch.xpack.security.audit; import org.elasticsearch.client.Request; -import org.elasticsearch.client.RequestOptions; import org.elasticsearch.client.Response; -import org.elasticsearch.client.WarningsHandler; import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.io.Streams; import org.elasticsearch.common.settings.SecureString; @@ -81,14 +79,14 @@ protected String getTestRestCluster() { } public void testAuditAuthenticationSuccess() throws Exception { - final Request request = new Request("GET", randomFrom("/_security/_authenticate", "/_xpack/security/_authenticate")); + final Request request = new Request("GET", "/_security/_authenticate"); executeAndVerifyAudit(request, AuditLevel.AUTHENTICATION_SUCCESS, event -> { assertThat(event, hasEntry(LoggingAuditTrail.AUTHENTICATION_TYPE_FIELD_NAME, "REALM")); }); } public void testAuditAuthenticationFailure() throws Exception { - final Request request = new Request("GET", randomFrom("/_security/_authenticate", "/_xpack/security/_authenticate")); + final Request request = new Request("GET", "/_security/_authenticate"); String basicAuth = basicAuthHeaderValue(API_USER, new SecureString(new char[0])); request.setOptions(request.getOptions().toBuilder().addHeader("Authorization", basicAuth).addParameter("ignore", "401")); executeAndVerifyAudit(request, AuditLevel.AUTHENTICATION_FAILED, event -> {}); @@ -96,7 +94,7 @@ public void testAuditAuthenticationFailure() throws Exception { public void testFilteringOfRequestBodies() throws Exception { final String username = randomAlphaOfLength(4) + randomIntBetween(100, 999); - final Request request = new Request(randomFrom("PUT", "POST"), randomFrom("/_security/user/", "/_xpack/security/user/") + username); + final Request request = new Request(randomFrom("PUT", "POST"), "/_security/user/" + username); final String password = randomAlphaOfLength(4) + randomIntBetween(10, 99) + randomAlphaOfLength(4); request.setJsonEntity("{ \"password\":\"" + password + "\", \"roles\":[\"superuser\"] }"); executeAndVerifyAudit(request, AuditLevel.AUTHENTICATION_SUCCESS, event -> { @@ -141,15 +139,6 @@ private void executeAndVerifyAudit(Request request, AuditLevel eventType, Checke } private static Response executeRequest(Request request) throws IOException { - if (request.getEndpoint().startsWith("/_xpack/security/")) { - final RequestOptions options = request.getOptions() - .toBuilder() - .addHeader("Content-Type", "application/json; compatible-with=7") - .addHeader("Accept", "application/json; compatible-with=7") - .setWarningsHandler(WarningsHandler.PERMISSIVE) - .build(); - request.setOptions(options); - } return client().performRequest(request); } diff --git a/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authz/IndicesAndAliasesResolver.java b/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authz/IndicesAndAliasesResolver.java index 42a1d89a9aa00..d5cbbe8b349a7 100644 --- a/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authz/IndicesAndAliasesResolver.java +++ b/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authz/IndicesAndAliasesResolver.java @@ -169,11 +169,9 @@ ResolvedIndices resolveIndicesAndAliasesWithoutWildcards(String action, IndicesR // and no remote clusters are configured that match it if (split.getLocal().isEmpty() && split.getRemote().isEmpty()) { for (String indexExpression : indices) { - String[] clusterAndIndex = indexExpression.split(":", 2); - if (clusterAndIndex.length == 2) { - if (clusterAndIndex[0].contains("*")) { - throw new NoSuchRemoteClusterException(clusterAndIndex[0]); - } + String[] clusterAndIndex = RemoteClusterAware.splitIndexName(indexExpression); + if (clusterAndIndex[0] != null && clusterAndIndex[0].contains("*")) { + throw new NoSuchRemoteClusterException(clusterAndIndex[0]); } } } diff --git a/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authz/interceptor/SearchRequestCacheDisablingInterceptor.java b/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authz/interceptor/SearchRequestCacheDisablingInterceptor.java index d10057ec7e740..d8ec078507bfe 100644 --- a/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authz/interceptor/SearchRequestCacheDisablingInterceptor.java +++ b/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authz/interceptor/SearchRequestCacheDisablingInterceptor.java @@ -11,6 +11,7 @@ import org.elasticsearch.common.util.concurrent.ThreadContext; import org.elasticsearch.license.XPackLicenseState; import org.elasticsearch.threadpool.ThreadPool; +import org.elasticsearch.transport.RemoteClusterAware; import org.elasticsearch.transport.TransportActionProxy; import org.elasticsearch.xpack.core.security.authz.AuthorizationEngine; import org.elasticsearch.xpack.core.security.authz.AuthorizationServiceField; @@ -18,7 +19,6 @@ import java.util.Arrays; -import static org.elasticsearch.transport.RemoteClusterAware.REMOTE_CLUSTER_INDEX_SEPARATOR; import static org.elasticsearch.xpack.core.security.SecurityField.DOCUMENT_LEVEL_SECURITY_FEATURE; import static org.elasticsearch.xpack.core.security.SecurityField.FIELD_LEVEL_SECURITY_FEATURE; @@ -55,6 +55,6 @@ && hasRemoteIndices(searchRequest) // package private for test static boolean hasRemoteIndices(SearchRequest request) { - return Arrays.stream(request.indices()).anyMatch(name -> name.indexOf(REMOTE_CLUSTER_INDEX_SEPARATOR) >= 0); + return Arrays.stream(request.indices()).anyMatch(RemoteClusterAware::isRemoteIndexName); } } diff --git a/x-pack/plugin/snapshot-repo-test-kit/src/internalClusterTest/java/org/elasticsearch/repositories/blobstore/testkit/analyze/RepositoryAnalysisFailureIT.java b/x-pack/plugin/snapshot-repo-test-kit/src/internalClusterTest/java/org/elasticsearch/repositories/blobstore/testkit/analyze/RepositoryAnalysisFailureIT.java index 1e9b7f23c60d5..b8acd9808a35e 100644 --- a/x-pack/plugin/snapshot-repo-test-kit/src/internalClusterTest/java/org/elasticsearch/repositories/blobstore/testkit/analyze/RepositoryAnalysisFailureIT.java +++ b/x-pack/plugin/snapshot-repo-test-kit/src/internalClusterTest/java/org/elasticsearch/repositories/blobstore/testkit/analyze/RepositoryAnalysisFailureIT.java @@ -480,9 +480,10 @@ public boolean acceptsEmptyRegister() { } private RepositoryVerificationException analyseRepositoryExpectFailure(RepositoryAnalyzeAction.Request request) { - return asInstanceOf( + return safeAwaitAndUnwrapFailure( RepositoryVerificationException.class, - ExceptionsHelper.unwrapCause(safeAwaitFailure(RepositoryAnalyzeAction.Response.class, l -> analyseRepository(request, l))) + RepositoryAnalyzeAction.Response.class, + l -> analyseRepository(request, l) ); } diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/redact/10_redact_processor.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/redact/10_redact_processor.yml index 559d87879faad..e864d191a3ec1 100644 --- a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/redact/10_redact_processor.yml +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/redact/10_redact_processor.yml @@ -24,7 +24,7 @@ index: test id: "1" pipeline: "pipeline-using-a-redact-processor" - body: {to_redact: "0.0.0.1 is my secret IP to redact"} + body: { to_redact: "0.0.0.1 is my secret IP to redact" } - do: get: @@ -96,3 +96,25 @@ } - length: { docs: 1 } - match: { docs.0.doc._source.to_redact: "==*EMAIL*== will be redacted" } +--- +"Test redact with trace_redact": + - do: + ingest.simulate: + body: > + { + "pipeline": { + "processors": [ + { + "redact": { + "field": "to_redact", + "patterns": ["%{EMAILADDRESS:EMAIL}", "%{IP:IP_ADDRESS}"], + "trace_redact": true + } + } + ] + }, + "docs": [{"_source": {"to_redact": "this-email@address.com will be redacted"}}] + } + - length: { docs: 1 } + - match: { docs.0.doc._source.to_redact: " will be redacted" } + - match: { docs.0.doc._ingest._redact._is_redacted: true } diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/wildcard/30_ignore_above_synthetic_source.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/wildcard/30_ignore_above_synthetic_source.yml index f5c9f3d92369a..2e3ba773fb0f2 100644 --- a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/wildcard/30_ignore_above_synthetic_source.yml +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/wildcard/30_ignore_above_synthetic_source.yml @@ -49,7 +49,7 @@ wildcard field type ignore_above: - length: { hits.hits: 1 } - match: { hits.hits.0._source.a_wildcard: "foo bar" } - match: { hits.hits.0._source.b_wildcard: "the quick brown" } - - match: { hits.hits.0._source.c_wildcard: ["bar", "foo"] } + - match: { hits.hits.0._source.c_wildcard: ["bar", "foo", "jumps over the lazy dog"] } - match: { hits.hits.0._source.d_wildcard: ["bar", "foo", "the quick"] } - match: { hits.hits.0.fields.a_wildcard.0: "foo bar" } - match: { hits.hits.0.fields.b_wildcard.0: "the quick brown" } diff --git a/x-pack/plugin/stack/src/main/java/org/elasticsearch/xpack/stack/StackTemplateRegistry.java b/x-pack/plugin/stack/src/main/java/org/elasticsearch/xpack/stack/StackTemplateRegistry.java index b45f17e434388..ce1b664a46887 100644 --- a/x-pack/plugin/stack/src/main/java/org/elasticsearch/xpack/stack/StackTemplateRegistry.java +++ b/x-pack/plugin/stack/src/main/java/org/elasticsearch/xpack/stack/StackTemplateRegistry.java @@ -35,8 +35,6 @@ import java.util.List; import java.util.Map; -import static org.elasticsearch.xpack.cluster.settings.ClusterSettings.CLUSTER_LOGSDB_ENABLED; - public class StackTemplateRegistry extends IndexTemplateRegistry { private static final Logger logger = LogManager.getLogger(StackTemplateRegistry.class); @@ -130,10 +128,10 @@ public StackTemplateRegistry( this.clusterService = clusterService; this.featureService = featureService; this.stackTemplateEnabled = STACK_TEMPLATES_ENABLED.get(nodeSettings); - this.componentTemplateConfigs = loadComponentTemplateConfigs(CLUSTER_LOGSDB_ENABLED.get(nodeSettings)); + this.componentTemplateConfigs = loadComponentTemplateConfigs(); } - private Map loadComponentTemplateConfigs(boolean logsDbEnabled) { + private Map loadComponentTemplateConfigs() { final Map componentTemplates = new HashMap<>(); for (IndexTemplateConfig config : List.of( new IndexTemplateConfig( @@ -159,7 +157,7 @@ private Map loadComponentTemplateConfigs(boolean logs ), new IndexTemplateConfig( LOGS_SETTINGS_COMPONENT_TEMPLATE_NAME, - logsDbEnabled ? "/logs@settings-logsdb.json" : "/logs@settings.json", + "/logs@settings.json", REGISTRY_VERSION, TEMPLATE_VERSION_VARIABLE, Map.of("xpack.stack.template.deprecated", "false") diff --git a/x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/rest/RestFindStructureAction.java b/x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/rest/RestFindStructureAction.java index 65325f2268ed2..03b18744eba2a 100644 --- a/x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/rest/RestFindStructureAction.java +++ b/x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/rest/RestFindStructureAction.java @@ -8,7 +8,7 @@ import org.elasticsearch.ElasticsearchParseException; import org.elasticsearch.client.internal.node.NodeClient; -import org.elasticsearch.core.RestApiVersion; +import org.elasticsearch.core.UpdateForV9; import org.elasticsearch.rest.BaseRestHandler; import org.elasticsearch.rest.RestRequest; import org.elasticsearch.rest.Scope; @@ -27,10 +27,17 @@ @ServerlessScope(Scope.INTERNAL) public class RestFindStructureAction extends BaseRestHandler { + @UpdateForV9 + // one or more routes use ".replaces" with RestApiVersion.V_8 which will require use of REST API compatibility headers to access + // that route in v9. It is unclear if this was intentional for v9, and the code has been updated to ".deprecateAndKeep" which will + // continue to emit deprecations warnings but will not require any special headers to access the API in v9. + // Please review and update the code and tests as needed. The original code remains commented out below for reference. @Override public List routes() { return List.of( - Route.builder(POST, BASE_PATH + "find_structure").replaces(POST, "/_ml/find_file_structure", RestApiVersion.V_8).build() + // Route.builder(POST, BASE_PATH + "find_structure").replaces(POST, "/_ml/find_file_structure", RestApiVersion.V_8).build() + new Route(POST, BASE_PATH + "find_structure"), + Route.builder(POST, "/_ml/find_file_structure").deprecateAndKeep("Use the _text_structure API instead.").build() ); } diff --git a/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapper.java b/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapper.java index 1e97e64371586..7784e7ffdda12 100644 --- a/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapper.java +++ b/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapper.java @@ -994,7 +994,7 @@ public FieldMapper.Builder getMergeBuilder() { protected SyntheticSourceSupport syntheticSourceSupport() { var layers = new ArrayList(); layers.add(new WildcardSyntheticFieldLoader()); - if (ignoreAbove != ignoreAboveDefault) { + if (ignoreAbove != Integer.MAX_VALUE) { layers.add(new CompositeSyntheticFieldLoader.StoredFieldLayer(originalName()) { @Override protected void writeValue(Object value, XContentBuilder b) throws IOException { diff --git a/x-pack/qa/full-cluster-restart/src/javaRestTest/java/org/elasticsearch/xpack/restart/MLModelDeploymentFullClusterRestartIT.java b/x-pack/qa/full-cluster-restart/src/javaRestTest/java/org/elasticsearch/xpack/restart/MLModelDeploymentFullClusterRestartIT.java index 484e2ed3ac9c3..abc09a6563ee9 100644 --- a/x-pack/qa/full-cluster-restart/src/javaRestTest/java/org/elasticsearch/xpack/restart/MLModelDeploymentFullClusterRestartIT.java +++ b/x-pack/qa/full-cluster-restart/src/javaRestTest/java/org/elasticsearch/xpack/restart/MLModelDeploymentFullClusterRestartIT.java @@ -16,10 +16,12 @@ import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.util.concurrent.ThreadContext; import org.elasticsearch.common.xcontent.support.XContentMapValues; +import org.elasticsearch.core.RestApiVersion; import org.elasticsearch.core.Strings; import org.elasticsearch.core.UpdateForV9; import org.elasticsearch.test.rest.RestTestLegacyFeatures; import org.elasticsearch.upgrades.FullClusterRestartUpgradeStatus; +import org.elasticsearch.xcontent.XContentType; import org.elasticsearch.xpack.core.ml.inference.assignment.AllocationStatus; import org.junit.Before; @@ -195,14 +197,30 @@ private Response startDeployment(String modelId) throws IOException { } private Response startDeployment(String modelId, String waitForState) throws IOException { + String inferenceThreadParamName = "threads_per_allocation"; + String modelThreadParamName = "number_of_allocations"; + String compatibleHeader = null; + if (isRunningAgainstOldCluster()) { + compatibleHeader = compatibleMediaType(XContentType.VND_JSON, RestApiVersion.V_8); + inferenceThreadParamName = "inference_threads"; + modelThreadParamName = "model_threads"; + } + Request request = new Request( "POST", "/_ml/trained_models/" + modelId + "/deployment/_start?timeout=40s&wait_for=" + waitForState - + "&inference_threads=1&model_threads=1" + + "&" + + inferenceThreadParamName + + "=1&" + + modelThreadParamName + + "=1" ); + if (compatibleHeader != null) { + request.setOptions(request.getOptions().toBuilder().addHeader("Accept", compatibleHeader).build()); + } request.setOptions(request.getOptions().toBuilder().setWarningsHandler(PERMISSIVE).build()); var response = client().performRequest(request); assertOK(response); diff --git a/x-pack/qa/reindex-tests-with-security/src/yamlRestTest/java/org/elasticsearch/xpack/security/ReindexWithSecurityIT.java b/x-pack/qa/reindex-tests-with-security/src/yamlRestTest/java/org/elasticsearch/xpack/security/ReindexWithSecurityIT.java index 121c0f527f209..3356005d4bd83 100644 --- a/x-pack/qa/reindex-tests-with-security/src/yamlRestTest/java/org/elasticsearch/xpack/security/ReindexWithSecurityIT.java +++ b/x-pack/qa/reindex-tests-with-security/src/yamlRestTest/java/org/elasticsearch/xpack/security/ReindexWithSecurityIT.java @@ -25,6 +25,7 @@ import org.elasticsearch.test.cluster.local.distribution.DistributionType; import org.elasticsearch.test.cluster.util.resource.Resource; import org.elasticsearch.test.rest.ESRestTestCase; +import org.elasticsearch.test.rest.TestResponseParsers; import org.elasticsearch.xcontent.XContentBuilder; import org.junit.AfterClass; import org.junit.BeforeClass; @@ -233,7 +234,7 @@ private void createIndicesWithRandomAliases(String... indices) throws IOExceptio request.toXContent(builder, null); restRequest.setEntity(new StringEntity(Strings.toString(builder), ContentType.APPLICATION_JSON)); Response restResponse = client().performRequest(restRequest); - AcknowledgedResponse response = AcknowledgedResponse.fromXContent(responseAsParser(restResponse)); + AcknowledgedResponse response = TestResponseParsers.parseAcknowledgedResponse(responseAsParser(restResponse)); assertThat(response.isAcknowledged(), is(true)); } diff --git a/x-pack/qa/repository-old-versions/src/test/java/org/elasticsearch/oldrepos/OldRepositoryAccessIT.java b/x-pack/qa/repository-old-versions/src/test/java/org/elasticsearch/oldrepos/OldRepositoryAccessIT.java index c8c72855eaf7a..d61c143098fcb 100644 --- a/x-pack/qa/repository-old-versions/src/test/java/org/elasticsearch/oldrepos/OldRepositoryAccessIT.java +++ b/x-pack/qa/repository-old-versions/src/test/java/org/elasticsearch/oldrepos/OldRepositoryAccessIT.java @@ -370,14 +370,10 @@ private void assertDocs( Version oldVersion, int numberOfShards ) throws IOException { - RequestOptions v7RequestOptions = RequestOptions.DEFAULT.toBuilder() - .addHeader("Content-Type", "application/vnd.elasticsearch+json;compatible-with=7") - .addHeader("Accept", "application/vnd.elasticsearch+json;compatible-with=7") - .build(); - RequestOptions randomRequestOptions = randomBoolean() ? RequestOptions.DEFAULT : v7RequestOptions; + RequestOptions requestOptions = RequestOptions.DEFAULT; // run a search against the index - SearchResponse searchResponse = search(index, null, randomRequestOptions); + SearchResponse searchResponse = search(index, null, requestOptions); try { logger.info(searchResponse); // check hit count @@ -404,7 +400,7 @@ private void assertDocs( SearchSourceBuilder.searchSource() .query(QueryBuilders.matchQuery("val", num)) .runtimeMappings(Map.of("val", Map.of("type", "long"))), - randomRequestOptions + requestOptions ); try { logger.info(searchResponse); @@ -422,7 +418,7 @@ private void assertDocs( SearchSourceBuilder.searchSource() .query(QueryBuilders.matchAllQuery()) .sort(SortBuilders.fieldSort("val").order(SortOrder.DESC)), - randomRequestOptions + requestOptions ); try { logger.info(searchResponse); @@ -439,7 +435,7 @@ private void assertDocs( searchResponse = search( index, SearchSourceBuilder.searchSource().query(QueryBuilders.matchQuery("test", "test" + num)), - randomRequestOptions + requestOptions ); try { logger.info(searchResponse); @@ -456,7 +452,7 @@ private void assertDocs( searchResponse = search( index, SearchSourceBuilder.searchSource().query(QueryBuilders.termQuery("_type", randomType)), - randomRequestOptions + requestOptions ); try { logger.info(searchResponse); @@ -482,7 +478,7 @@ private void assertDocs( searchResponse = search( index, SearchSourceBuilder.searchSource().query(QueryBuilders.rangeQuery("create_date").from("2020-02-01")), - randomRequestOptions + requestOptions ); try { logger.info(searchResponse); diff --git a/x-pack/qa/rolling-upgrade/src/test/java/org/elasticsearch/upgrades/MLModelDeploymentsUpgradeIT.java b/x-pack/qa/rolling-upgrade/src/test/java/org/elasticsearch/upgrades/MLModelDeploymentsUpgradeIT.java index d935672e0a243..553d5e7425de7 100644 --- a/x-pack/qa/rolling-upgrade/src/test/java/org/elasticsearch/upgrades/MLModelDeploymentsUpgradeIT.java +++ b/x-pack/qa/rolling-upgrade/src/test/java/org/elasticsearch/upgrades/MLModelDeploymentsUpgradeIT.java @@ -12,8 +12,10 @@ import org.elasticsearch.client.Request; import org.elasticsearch.client.Response; import org.elasticsearch.common.xcontent.support.XContentMapValues; +import org.elasticsearch.core.RestApiVersion; import org.elasticsearch.core.Strings; import org.elasticsearch.core.UpdateForV9; +import org.elasticsearch.xcontent.XContentType; import org.junit.After; import org.junit.Before; import org.junit.BeforeClass; @@ -242,14 +244,30 @@ private Response startDeployment(String modelId) throws IOException { } private Response startDeployment(String modelId, String waitForState) throws IOException { + String inferenceThreadParamName = "threads_per_allocation"; + String modelThreadParamName = "number_of_allocations"; + String compatibleHeader = null; + if (CLUSTER_TYPE.equals(ClusterType.OLD) || CLUSTER_TYPE.equals(ClusterType.MIXED)) { + compatibleHeader = compatibleMediaType(XContentType.VND_JSON, RestApiVersion.V_8); + inferenceThreadParamName = "inference_threads"; + modelThreadParamName = "model_threads"; + } + Request request = new Request( "POST", "/_ml/trained_models/" + modelId + "/deployment/_start?timeout=40s&wait_for=" + waitForState - + "&inference_threads=1&model_threads=1" + + "&" + + inferenceThreadParamName + + "=1&" + + modelThreadParamName + + "=1" ); + if (compatibleHeader != null) { + request.setOptions(request.getOptions().toBuilder().addHeader("Accept", compatibleHeader).build()); + } request.setOptions(request.getOptions().toBuilder().setWarningsHandler(PERMISSIVE).build()); var response = client().performRequest(request); assertOK(response); diff --git a/x-pack/qa/rolling-upgrade/src/test/java/org/elasticsearch/upgrades/MlAssignmentPlannerUpgradeIT.java b/x-pack/qa/rolling-upgrade/src/test/java/org/elasticsearch/upgrades/MlAssignmentPlannerUpgradeIT.java index 6d34ef5887629..88118f6c2727a 100644 --- a/x-pack/qa/rolling-upgrade/src/test/java/org/elasticsearch/upgrades/MlAssignmentPlannerUpgradeIT.java +++ b/x-pack/qa/rolling-upgrade/src/test/java/org/elasticsearch/upgrades/MlAssignmentPlannerUpgradeIT.java @@ -12,11 +12,13 @@ import org.elasticsearch.client.Response; import org.elasticsearch.common.unit.ByteSizeValue; import org.elasticsearch.common.xcontent.support.XContentMapValues; +import org.elasticsearch.core.RestApiVersion; import org.elasticsearch.core.Strings; import org.elasticsearch.core.UpdateForV9; import org.elasticsearch.logging.LogManager; import org.elasticsearch.logging.Logger; import org.elasticsearch.test.rest.RestTestLegacyFeatures; +import org.elasticsearch.xcontent.XContentType; import java.io.IOException; import java.util.ArrayList; @@ -278,14 +280,30 @@ private Response startDeployment(String modelId) throws IOException { } private Response startDeployment(String modelId, String waitForState) throws IOException { + String inferenceThreadParamName = "threads_per_allocation"; + String modelThreadParamName = "number_of_allocations"; + String compatibleHeader = null; + if (CLUSTER_TYPE.equals(ClusterType.OLD) || CLUSTER_TYPE.equals(ClusterType.MIXED)) { + compatibleHeader = compatibleMediaType(XContentType.VND_JSON, RestApiVersion.V_8); + inferenceThreadParamName = "inference_threads"; + modelThreadParamName = "model_threads"; + } + Request request = new Request( "POST", "/_ml/trained_models/" + modelId + "/deployment/_start?timeout=40s&wait_for=" + waitForState - + "&inference_threads=1&model_threads=1" + + "&" + + inferenceThreadParamName + + "=1&" + + modelThreadParamName + + "=1" ); + if (compatibleHeader != null) { + request.setOptions(request.getOptions().toBuilder().addHeader("Accept", compatibleHeader).build()); + } request.setOptions(request.getOptions().toBuilder().setWarningsHandler(PERMISSIVE).build()); var response = client().performRequest(request); assertOK(response);