diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
index 6281fa0af3e36..5476637b84e92 100644
--- a/.github/pull_request_template.md
+++ b/.github/pull_request_template.md
@@ -13,15 +13,9 @@ Resolves #[Issue number to be closed when this PR is merged]
### Check List
-- [ ] New functionality includes testing.
- - [ ] All tests pass
-- [ ] New functionality has been documented.
- - [ ] New functionality has javadoc added
-- [ ] API changes companion pull request [created](https://github.com/opensearch-project/opensearch-api-specification/blob/main/DEVELOPER_GUIDE.md).
-- [ ] Failing checks are inspected and point to the corresponding known issue(s) (See: [Troubleshooting Failing Builds](../blob/main/CONTRIBUTING.md#troubleshooting-failing-builds))
-- [ ] Commits are signed per the DCO using --signoff
-- [ ] Commit changes are listed out in CHANGELOG.md file (See: [Changelog](../blob/main/CONTRIBUTING.md#changelog))
-- [ ] Public documentation issue/PR [created](https://github.com/opensearch-project/documentation-website/issues/new/choose)
+- [ ] Functionality includes testing.
+- [ ] API changes companion pull request [created](https://github.com/opensearch-project/opensearch-api-specification/blob/main/DEVELOPER_GUIDE.md), if applicable.
+- [ ] Public documentation issue/PR [created](https://github.com/opensearch-project/documentation-website/issues/new/choose), if applicable.
By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license.
For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/OpenSearch/blob/main/CONTRIBUTING.md#developer-certificate-of-origin).
diff --git a/.github/workflows/assemble.yml b/.github/workflows/assemble.yml
index 4bff5b7d60d1f..51ae075ffa2c9 100644
--- a/.github/workflows/assemble.yml
+++ b/.github/workflows/assemble.yml
@@ -17,10 +17,23 @@ jobs:
java-version: ${{ matrix.java }}
distribution: temurin
- name: Setup docker (missing on MacOS)
+ id: setup_docker
if: runner.os == 'macos'
uses: douglascamata/setup-docker-macos-action@main
+ continue-on-error: true
with:
upgrade-qemu: true
+ colima: v0.6.8
- name: Run Gradle (assemble)
+ if: runner.os == 'macos' && steps.setup_docker.outcome != 'success'
+ run: |
+ # Report success even if previous step failed (Docker on MacOS runner is very unstable)
+ exit 0;
+ - name: Run Gradle (assemble)
+ if: runner.os != 'macos'
+ run: |
+ ./gradlew assemble --parallel --no-build-cache -PDISABLE_BUILD_CACHE
+ - name: Run Gradle (assemble)
+ if: runner.os == 'macos' && steps.setup_docker.outcome == 'success'
run: |
./gradlew assemble --parallel --no-build-cache -PDISABLE_BUILD_CACHE
diff --git a/.github/workflows/pull-request-checks.yml b/.github/workflows/pull-request-checks.yml
deleted file mode 100644
index eec363572478c..0000000000000
--- a/.github/workflows/pull-request-checks.yml
+++ /dev/null
@@ -1,29 +0,0 @@
-name: Pull Request Checks
-
-on:
- pull_request:
- types:
- [
- opened,
- edited,
- review_requested,
- synchronize,
- reopened,
- ready_for_review,
- ]
-
-jobs:
- verify-description-checklist:
- name: Verify Description Checklist
- runs-on: ubuntu-latest
- steps:
- - uses: peternied/check-pull-request-description-checklist@v1.1
- if: github.event.pull_request.user.login != 'dependabot[bot]'
- with:
- checklist-items: |
- New functionality includes testing.
- All tests pass
- New functionality has been documented.
- New functionality has javadoc added
- Commits are signed per the DCO using --signoff
- Commit changes are listed out in CHANGELOG.md file (See: [Changelog](../blob/main/CONTRIBUTING.md#changelog))
diff --git a/CHANGELOG-3.0.md b/CHANGELOG-3.0.md
index 964383078c38d..1cc12f66d52e1 100644
--- a/CHANGELOG-3.0.md
+++ b/CHANGELOG-3.0.md
@@ -17,6 +17,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
### Dependencies
### Changed
+- Changed locale provider from COMPAT to CLDR ([13988](https://github.com/opensearch-project/OpenSearch/pull/13988))
- Migrate client transports to Apache HttpClient / Core 5.x ([#4459](https://github.com/opensearch-project/OpenSearch/pull/4459))
- Change http code on create index API with bad input raising NotXContentException from 500 to 400 ([#4773](https://github.com/opensearch-project/OpenSearch/pull/4773))
- Improve summary error message for invalid setting updates ([#4792](https://github.com/opensearch-project/OpenSearch/pull/4792))
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5ce11dabb6e9c..098405f8f1d44 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
## [Unreleased 2.x]
### Added
+- Add leader and follower check failure counter metrics ([#12439](https://github.com/opensearch-project/OpenSearch/pull/12439))
- Add latency metrics for instrumenting critical clusterManager code paths ([#12333](https://github.com/opensearch-project/OpenSearch/pull/12333))
- Add support for Azure Managed Identity in repository-azure ([#12423](https://github.com/opensearch-project/OpenSearch/issues/12423))
- Add useCompoundFile index setting ([#13478](https://github.com/opensearch-project/OpenSearch/pull/13478))
@@ -14,12 +15,14 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Add getMetadataFields to MapperService ([#13819](https://github.com/opensearch-project/OpenSearch/pull/13819))
- [Remote State] Add async remote state deletion task running on an interval, configurable by a setting ([#13131](https://github.com/opensearch-project/OpenSearch/pull/13131))
- Allow setting query parameters on requests ([#13776](https://github.com/opensearch-project/OpenSearch/issues/13776))
+- Add remote routing table for remote state publication with experimental feature flag ([#13304](https://github.com/opensearch-project/OpenSearch/pull/13304))
+- [Remote Store] Add support to disable flush based on translog reader count ([#14027](https://github.com/opensearch-project/OpenSearch/pull/14027))
### Dependencies
- Bump `com.github.spullara.mustache.java:compiler` from 0.9.10 to 0.9.13 ([#13329](https://github.com/opensearch-project/OpenSearch/pull/13329), [#13559](https://github.com/opensearch-project/OpenSearch/pull/13559))
- Bump `org.gradle.test-retry` from 1.5.8 to 1.5.9 ([#13442](https://github.com/opensearch-project/OpenSearch/pull/13442))
- Bump `org.apache.commons:commons-text` from 1.11.0 to 1.12.0 ([#13557](https://github.com/opensearch-project/OpenSearch/pull/13557))
-- Bump `org.hdrhistogram:HdrHistogram` from 2.1.12 to 2.2.1 ([#13556](https://github.com/opensearch-project/OpenSearch/pull/13556))
+- Bump `org.hdrhistogram:HdrHistogram` from 2.1.12 to 2.2.2 ([#13556](https://github.com/opensearch-project/OpenSearch/pull/13556), [#13986](https://github.com/opensearch-project/OpenSearch/pull/13986))
- Bump `com.gradle.enterprise` from 3.17.2 to 3.17.4 ([#13641](https://github.com/opensearch-project/OpenSearch/pull/13641), [#13753](https://github.com/opensearch-project/OpenSearch/pull/13753))
- Bump `org.apache.hadoop:hadoop-minicluster` from 3.3.6 to 3.4.0 ([#13642](https://github.com/opensearch-project/OpenSearch/pull/13642))
- Bump `mockito` from 5.11.0 to 5.12.0 ([#13665](https://github.com/opensearch-project/OpenSearch/pull/13665))
diff --git a/buildSrc/src/main/java/org/opensearch/gradle/OpenSearchTestBasePlugin.java b/buildSrc/src/main/java/org/opensearch/gradle/OpenSearchTestBasePlugin.java
index 2ea8c2d015ecc..d0cb2da9c1dd3 100644
--- a/buildSrc/src/main/java/org/opensearch/gradle/OpenSearchTestBasePlugin.java
+++ b/buildSrc/src/main/java/org/opensearch/gradle/OpenSearchTestBasePlugin.java
@@ -110,7 +110,7 @@ public void execute(Task t) {
if (BuildParams.getRuntimeJavaVersion() == JavaVersion.VERSION_1_8) {
test.systemProperty("java.locale.providers", "SPI,JRE");
} else {
- test.systemProperty("java.locale.providers", "SPI,COMPAT");
+ test.systemProperty("java.locale.providers", "SPI,CLDR");
if (test.getJavaVersion().compareTo(JavaVersion.VERSION_17) < 0) {
test.jvmArgs("--illegal-access=warn");
}
diff --git a/buildSrc/version.properties b/buildSrc/version.properties
index 0a36ed5e200f7..3a76cf6e9b7ad 100644
--- a/buildSrc/version.properties
+++ b/buildSrc/version.properties
@@ -23,6 +23,8 @@ guava = 32.1.1-jre
protobuf = 3.22.3
jakarta_annotation = 1.3.5
google_http_client = 1.44.1
+tdigest = 3.3
+hdrhistogram = 2.2.2
# when updating the JNA version, also update the version in buildSrc/build.gradle
jna = 5.13.0
diff --git a/distribution/tools/launchers/src/main/java/org/opensearch/tools/launchers/SystemJvmOptions.java b/distribution/tools/launchers/src/main/java/org/opensearch/tools/launchers/SystemJvmOptions.java
index 726c381db09f6..af7138569972a 100644
--- a/distribution/tools/launchers/src/main/java/org/opensearch/tools/launchers/SystemJvmOptions.java
+++ b/distribution/tools/launchers/src/main/java/org/opensearch/tools/launchers/SystemJvmOptions.java
@@ -105,13 +105,8 @@ private static String javaLocaleProviders() {
SPI setting is used to allow loading custom CalendarDataProvider
in jdk8 it has to be loaded from jre/lib/ext,
in jdk9+ it is already within ES project and on a classpath
-
- Due to internationalization enhancements in JDK 9 OpenSearch need to set the provider to COMPAT otherwise time/date
- parsing will break in an incompatible way for some date patterns and locales.
- //TODO COMPAT will be deprecated in at some point, see please https://bugs.openjdk.java.net/browse/JDK-8232906
- See also: documentation in server/org.opensearch.common.time.IsoCalendarDataProvider
*/
- return "-Djava.locale.providers=SPI,COMPAT";
+ return "-Djava.locale.providers=SPI,CLDR";
}
}
diff --git a/gradle/ide.gradle b/gradle/ide.gradle
index 14d6b2982ccd0..e266d9add172d 100644
--- a/gradle/ide.gradle
+++ b/gradle/ide.gradle
@@ -28,7 +28,7 @@ allprojects {
apply plugin: 'idea'
tasks.named('idea').configure {
- doFirst { throw new GradleException("Use of the 'idea' task has been deprecated. For details on importing into IntelliJ see CONTRIBUTING.md.") }
+ doFirst { throw new GradleException("Use of the 'idea' task has been deprecated. For details on importing into IntelliJ see DEVELOPER_GUIDE.md.") }
}
}
@@ -81,7 +81,7 @@ if (System.getProperty('idea.active') == 'true') {
}
runConfigurations {
defaults(JUnit) {
- vmParameters = '-ea -Djava.locale.providers=SPI,COMPAT'
+ vmParameters = '-ea -Djava.locale.providers=SPI,CLDR'
if (BuildParams.runtimeJavaVersion > JavaVersion.VERSION_17) {
vmParameters += ' -Djava.security.manager=allow'
}
diff --git a/server/src/main/java/org/opensearch/index/translog/BufferedChecksumStreamInput.java b/libs/core/src/main/java/org/opensearch/core/common/io/stream/BufferedChecksumStreamInput.java
similarity index 96%
rename from server/src/main/java/org/opensearch/index/translog/BufferedChecksumStreamInput.java
rename to libs/core/src/main/java/org/opensearch/core/common/io/stream/BufferedChecksumStreamInput.java
index f75f27b7bcb91..41680961b36e9 100644
--- a/server/src/main/java/org/opensearch/index/translog/BufferedChecksumStreamInput.java
+++ b/libs/core/src/main/java/org/opensearch/core/common/io/stream/BufferedChecksumStreamInput.java
@@ -30,12 +30,10 @@
* GitHub history for details.
*/
-package org.opensearch.index.translog;
+package org.opensearch.core.common.io.stream;
import org.apache.lucene.store.BufferedChecksum;
import org.apache.lucene.util.BitUtil;
-import org.opensearch.core.common.io.stream.FilterStreamInput;
-import org.opensearch.core.common.io.stream.StreamInput;
import java.io.EOFException;
import java.io.IOException;
diff --git a/server/src/main/java/org/opensearch/index/translog/BufferedChecksumStreamOutput.java b/libs/core/src/main/java/org/opensearch/core/common/io/stream/BufferedChecksumStreamOutput.java
similarity index 96%
rename from server/src/main/java/org/opensearch/index/translog/BufferedChecksumStreamOutput.java
rename to libs/core/src/main/java/org/opensearch/core/common/io/stream/BufferedChecksumStreamOutput.java
index 9e96664c79cc5..422f956c0cd47 100644
--- a/server/src/main/java/org/opensearch/index/translog/BufferedChecksumStreamOutput.java
+++ b/libs/core/src/main/java/org/opensearch/core/common/io/stream/BufferedChecksumStreamOutput.java
@@ -30,11 +30,10 @@
* GitHub history for details.
*/
-package org.opensearch.index.translog;
+package org.opensearch.core.common.io.stream;
import org.apache.lucene.store.BufferedChecksum;
import org.opensearch.common.annotation.PublicApi;
-import org.opensearch.core.common.io.stream.StreamOutput;
import java.io.IOException;
import java.util.zip.CRC32;
diff --git a/release-notes/opensearch.release-notes-1.3.17.md b/release-notes/opensearch.release-notes-1.3.17.md
new file mode 100644
index 0000000000000..5218b9e3be20c
--- /dev/null
+++ b/release-notes/opensearch.release-notes-1.3.17.md
@@ -0,0 +1,6 @@
+## 2024-05-30 Version 1.3.17 Release Notes
+
+### Upgrades
+- OpenJDK Update (April 2024 Patch releases), update to Eclipse Temurin 11.0.23+9 ([#13406](https://github.com/opensearch-project/OpenSearch/pull/13406))
+- Upgrade BouncyCastle dependencies from 1.75 to 1.78.1 resolving [CVE-2024-30172], [CVE-2024-30171] and [CVE-2024-29857]
+- Bump `netty` from 4.1.109.Final to 4.1.110.Final ([#13802](https://github.com/opensearch-project/OpenSearch/pull/13802))
diff --git a/server/build.gradle b/server/build.gradle
index 15301e68fca3d..624e5fe332662 100644
--- a/server/build.gradle
+++ b/server/build.gradle
@@ -96,9 +96,9 @@ dependencies {
api "joda-time:joda-time:${versions.joda}"
// percentiles aggregation
- api 'com.tdunning:t-digest:3.3'
- // precentil ranks aggregation
- api 'org.hdrhistogram:HdrHistogram:2.2.1'
+ api "com.tdunning:t-digest:${versions.tdigest}"
+ // percentile ranks aggregation
+ api "org.hdrhistogram:HdrHistogram:${versions.hdrhistogram}"
// lucene spatial
api "org.locationtech.spatial4j:spatial4j:${versions.spatial4j}", optional
diff --git a/server/licenses/HdrHistogram-2.2.1.jar.sha1 b/server/licenses/HdrHistogram-2.2.1.jar.sha1
deleted file mode 100644
index 68225950d4744..0000000000000
--- a/server/licenses/HdrHistogram-2.2.1.jar.sha1
+++ /dev/null
@@ -1 +0,0 @@
-0eb1feb351f64176c377772a30174e582c0274d5
\ No newline at end of file
diff --git a/server/licenses/HdrHistogram-2.2.2.jar.sha1 b/server/licenses/HdrHistogram-2.2.2.jar.sha1
new file mode 100644
index 0000000000000..2c895841bce81
--- /dev/null
+++ b/server/licenses/HdrHistogram-2.2.2.jar.sha1
@@ -0,0 +1 @@
+7959933ebcc0f05b2eaa5af0a0c8689fa257b15c
diff --git a/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteClusterStateCleanupManagerIT.java b/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteClusterStateCleanupManagerIT.java
new file mode 100644
index 0000000000000..e96dedaa3e6a0
--- /dev/null
+++ b/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteClusterStateCleanupManagerIT.java
@@ -0,0 +1,154 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.gateway.remote;
+
+import org.opensearch.action.admin.cluster.settings.ClusterUpdateSettingsResponse;
+import org.opensearch.common.blobstore.BlobPath;
+import org.opensearch.common.settings.Settings;
+import org.opensearch.remotestore.RemoteStoreBaseIntegTestCase;
+import org.opensearch.repositories.RepositoriesService;
+import org.opensearch.repositories.blobstore.BlobStoreRepository;
+import org.opensearch.test.OpenSearchIntegTestCase;
+import org.junit.Before;
+
+import java.nio.charset.StandardCharsets;
+import java.util.Base64;
+import java.util.Map;
+import java.util.concurrent.TimeUnit;
+
+import static org.opensearch.gateway.remote.RemoteClusterStateCleanupManager.CLUSTER_STATE_CLEANUP_INTERVAL_DEFAULT;
+import static org.opensearch.gateway.remote.RemoteClusterStateCleanupManager.REMOTE_CLUSTER_STATE_CLEANUP_INTERVAL_SETTING;
+import static org.opensearch.gateway.remote.RemoteClusterStateCleanupManager.RETAINED_MANIFESTS;
+import static org.opensearch.gateway.remote.RemoteClusterStateCleanupManager.SKIP_CLEANUP_STATE_CHANGES;
+import static org.opensearch.gateway.remote.RemoteClusterStateService.REMOTE_CLUSTER_STATE_ENABLED_SETTING;
+import static org.opensearch.indices.IndicesService.CLUSTER_DEFAULT_INDEX_REFRESH_INTERVAL_SETTING;
+
+@OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, numDataNodes = 0)
+public class RemoteClusterStateCleanupManagerIT extends RemoteStoreBaseIntegTestCase {
+
+ private static final String INDEX_NAME = "test-index";
+
+ @Before
+ public void setup() {
+ asyncUploadMockFsRepo = false;
+ }
+
+ @Override
+ protected Settings nodeSettings(int nodeOrdinal) {
+ return Settings.builder().put(super.nodeSettings(nodeOrdinal)).put(REMOTE_CLUSTER_STATE_ENABLED_SETTING.getKey(), true).build();
+ }
+
+ private Map initialTestSetup(int shardCount, int replicaCount, int dataNodeCount, int clusterManagerNodeCount) {
+ prepareCluster(clusterManagerNodeCount, dataNodeCount, INDEX_NAME, replicaCount, shardCount);
+ Map indexStats = indexData(1, false, INDEX_NAME);
+ assertEquals(shardCount * (replicaCount + 1), getNumShards(INDEX_NAME).totalNumShards);
+ ensureGreen(INDEX_NAME);
+ return indexStats;
+ }
+
+ public void testRemoteCleanupTaskUpdated() {
+ int shardCount = randomIntBetween(1, 2);
+ int replicaCount = 1;
+ int dataNodeCount = shardCount * (replicaCount + 1);
+ int clusterManagerNodeCount = 1;
+
+ initialTestSetup(shardCount, replicaCount, dataNodeCount, clusterManagerNodeCount);
+ RemoteClusterStateCleanupManager remoteClusterStateCleanupManager = internalCluster().getClusterManagerNodeInstance(
+ RemoteClusterStateCleanupManager.class
+ );
+
+ assertEquals(CLUSTER_STATE_CLEANUP_INTERVAL_DEFAULT, remoteClusterStateCleanupManager.getStaleFileDeletionTask().getInterval());
+ assertTrue(remoteClusterStateCleanupManager.getStaleFileDeletionTask().isScheduled());
+
+ // now disable
+ client().admin()
+ .cluster()
+ .prepareUpdateSettings()
+ .setPersistentSettings(Settings.builder().put(REMOTE_CLUSTER_STATE_CLEANUP_INTERVAL_SETTING.getKey(), -1))
+ .get();
+
+ assertEquals(-1, remoteClusterStateCleanupManager.getStaleFileDeletionTask().getInterval().getMillis());
+ assertFalse(remoteClusterStateCleanupManager.getStaleFileDeletionTask().isScheduled());
+
+ // now set Clean up interval to 1 min
+ client().admin()
+ .cluster()
+ .prepareUpdateSettings()
+ .setPersistentSettings(Settings.builder().put(REMOTE_CLUSTER_STATE_CLEANUP_INTERVAL_SETTING.getKey(), "1m"))
+ .get();
+ assertEquals(1, remoteClusterStateCleanupManager.getStaleFileDeletionTask().getInterval().getMinutes());
+ }
+
+ public void testRemoteCleanupDeleteStale() throws Exception {
+ int shardCount = randomIntBetween(1, 2);
+ int replicaCount = 1;
+ int dataNodeCount = shardCount * (replicaCount + 1);
+ int clusterManagerNodeCount = 1;
+
+ initialTestSetup(shardCount, replicaCount, dataNodeCount, clusterManagerNodeCount);
+
+ // update cluster state 21 times to ensure that clean up has run after this will upload 42 manifest files
+ // to repository, if manifest files are less than that it means clean up has run
+ updateClusterStateNTimes(RETAINED_MANIFESTS + SKIP_CLEANUP_STATE_CHANGES + 1);
+
+ RepositoriesService repositoriesService = internalCluster().getClusterManagerNodeInstance(RepositoriesService.class);
+ BlobStoreRepository repository = (BlobStoreRepository) repositoriesService.repository(REPOSITORY_NAME);
+ BlobPath baseMetadataPath = repository.basePath()
+ .add(
+ Base64.getUrlEncoder()
+ .withoutPadding()
+ .encodeToString(getClusterState().getClusterName().value().getBytes(StandardCharsets.UTF_8))
+ )
+ .add("cluster-state")
+ .add(getClusterState().metadata().clusterUUID());
+ BlobPath manifestContainerPath = baseMetadataPath.add("manifest");
+
+ // set cleanup interval to 100 ms to make the test faster
+ ClusterUpdateSettingsResponse response = client().admin()
+ .cluster()
+ .prepareUpdateSettings()
+ .setPersistentSettings(Settings.builder().put(REMOTE_CLUSTER_STATE_CLEANUP_INTERVAL_SETTING.getKey(), "100ms"))
+ .get();
+
+ assertTrue(response.isAcknowledged());
+
+ assertBusy(() -> {
+ int manifestFiles = repository.blobStore().blobContainer(manifestContainerPath).listBlobsByPrefix("manifest").size();
+ logger.info("number of current manifest file: {}", manifestFiles);
+ // we can't guarantee that we have same number of manifest as Retained manifest in our repo as there can be other queued task
+ // other than replica count change which can upload new manifest files, that's why we check that number of manifests is between
+ // Retained manifests and Retained manifests + 2 * Skip cleanup state changes (each cluster state update uploads 2 manifests)
+ assertTrue(
+ "Current number of manifest files: " + manifestFiles,
+ manifestFiles >= RETAINED_MANIFESTS && manifestFiles < RETAINED_MANIFESTS + 2 * SKIP_CLEANUP_STATE_CHANGES
+ );
+ }, 500, TimeUnit.MILLISECONDS);
+
+ // disable the clean up to avoid race condition during shutdown
+ response = client().admin()
+ .cluster()
+ .prepareUpdateSettings()
+ .setPersistentSettings(Settings.builder().put(REMOTE_CLUSTER_STATE_CLEANUP_INTERVAL_SETTING.getKey(), "-1"))
+ .get();
+
+ assertTrue(response.isAcknowledged());
+ }
+
+ private void updateClusterStateNTimes(int n) {
+ int newReplicaCount = randomIntBetween(0, 3);
+ for (int i = n; i > 0; i--) {
+ ClusterUpdateSettingsResponse response = client().admin()
+ .cluster()
+ .prepareUpdateSettings()
+ .setPersistentSettings(Settings.builder().put(CLUSTER_DEFAULT_INDEX_REFRESH_INTERVAL_SETTING.getKey(), i, TimeUnit.SECONDS))
+ .get();
+ assertTrue(response.isAcknowledged());
+ }
+ }
+}
diff --git a/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteClusterStateServiceIT.java b/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteClusterStateServiceIT.java
index 42120aa32eb47..ab2f0f0080566 100644
--- a/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteClusterStateServiceIT.java
+++ b/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteClusterStateServiceIT.java
@@ -10,7 +10,6 @@
import org.opensearch.action.admin.cluster.node.stats.NodesStatsRequest;
import org.opensearch.action.admin.cluster.node.stats.NodesStatsResponse;
-import org.opensearch.cluster.metadata.IndexMetadata;
import org.opensearch.common.blobstore.BlobPath;
import org.opensearch.common.settings.Settings;
import org.opensearch.discovery.DiscoveryStats;
@@ -27,7 +26,6 @@
import java.util.function.Function;
import java.util.stream.Collectors;
-import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_REPLICAS;
import static org.opensearch.gateway.remote.RemoteClusterStateService.COORDINATION_METADATA;
import static org.opensearch.gateway.remote.RemoteClusterStateService.CUSTOM_METADATA;
import static org.opensearch.gateway.remote.RemoteClusterStateService.DELIMITER;
@@ -51,16 +49,6 @@ protected Settings nodeSettings(int nodeOrdinal) {
return Settings.builder().put(super.nodeSettings(nodeOrdinal)).put(REMOTE_CLUSTER_STATE_ENABLED_SETTING.getKey(), true).build();
}
- private void prepareCluster(int numClusterManagerNodes, int numDataOnlyNodes, String indices, int replicaCount, int shardCount) {
- internalCluster().startClusterManagerOnlyNodes(numClusterManagerNodes);
- internalCluster().startDataOnlyNodes(numDataOnlyNodes);
- for (String index : indices.split(",")) {
- createIndex(index, remoteStoreIndexSettings(replicaCount, shardCount));
- ensureYellowAndNoInitializingShards(index);
- ensureGreen(index);
- }
- }
-
private Map initialTestSetup(int shardCount, int replicaCount, int dataNodeCount, int clusterManagerNodeCount) {
prepareCluster(clusterManagerNodeCount, dataNodeCount, INDEX_NAME, replicaCount, shardCount);
Map indexStats = indexData(1, false, INDEX_NAME);
@@ -69,49 +57,6 @@ private Map initialTestSetup(int shardCount, int replicaCount, int
return indexStats;
}
- public void testFullClusterRestoreStaleDelete() throws Exception {
- int shardCount = randomIntBetween(1, 2);
- int replicaCount = 1;
- int dataNodeCount = shardCount * (replicaCount + 1);
- int clusterManagerNodeCount = 1;
-
- initialTestSetup(shardCount, replicaCount, dataNodeCount, clusterManagerNodeCount);
- setReplicaCount(0);
- setReplicaCount(2);
- setReplicaCount(0);
- setReplicaCount(1);
- setReplicaCount(0);
- setReplicaCount(1);
- setReplicaCount(0);
- setReplicaCount(2);
- setReplicaCount(0);
-
- RemoteClusterStateService remoteClusterStateService = internalCluster().getClusterManagerNodeInstance(
- RemoteClusterStateService.class
- );
-
- RepositoriesService repositoriesService = internalCluster().getClusterManagerNodeInstance(RepositoriesService.class);
-
- BlobStoreRepository repository = (BlobStoreRepository) repositoriesService.repository(REPOSITORY_NAME);
- BlobPath baseMetadataPath = repository.basePath()
- .add(
- Base64.getUrlEncoder()
- .withoutPadding()
- .encodeToString(getClusterState().getClusterName().value().getBytes(StandardCharsets.UTF_8))
- )
- .add("cluster-state")
- .add(getClusterState().metadata().clusterUUID());
-
- assertEquals(10, repository.blobStore().blobContainer(baseMetadataPath.add("manifest")).listBlobsByPrefix("manifest").size());
-
- Map indexMetadataMap = remoteClusterStateService.getLatestClusterState(
- cluster().getClusterName(),
- getClusterState().metadata().clusterUUID()
- ).getMetadata().getIndices();
- assertEquals(0, indexMetadataMap.values().stream().findFirst().get().getNumberOfReplicas());
- assertEquals(shardCount, indexMetadataMap.values().stream().findFirst().get().getNumberOfShards());
- }
-
public void testRemoteStateStats() {
int shardCount = randomIntBetween(1, 2);
int replicaCount = 1;
@@ -241,12 +186,4 @@ private void validateNodesStatsResponse(NodesStatsResponse nodesStatsResponse) {
assertNotNull(nodesStatsResponse.getNodes().get(0));
assertNotNull(nodesStatsResponse.getNodes().get(0).getDiscoveryStats());
}
-
- private void setReplicaCount(int replicaCount) {
- client().admin()
- .indices()
- .prepareUpdateSettings(INDEX_NAME)
- .setSettings(Settings.builder().put(SETTING_NUMBER_OF_REPLICAS, replicaCount))
- .get();
- }
}
diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreBaseIntegTestCase.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreBaseIntegTestCase.java
index 740aee69f7d80..64efcee6ef1b5 100644
--- a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreBaseIntegTestCase.java
+++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreBaseIntegTestCase.java
@@ -350,4 +350,14 @@ protected void restore(boolean restoreAllShards, String... indices) {
PlainActionFuture.newFuture()
);
}
+
+ protected void prepareCluster(int numClusterManagerNodes, int numDataOnlyNodes, String indices, int replicaCount, int shardCount) {
+ internalCluster().startClusterManagerOnlyNodes(numClusterManagerNodes);
+ internalCluster().startDataOnlyNodes(numDataOnlyNodes);
+ for (String index : indices.split(",")) {
+ createIndex(index, remoteStoreIndexSettings(replicaCount, shardCount));
+ ensureYellowAndNoInitializingShards(index);
+ ensureGreen(index);
+ }
+ }
}
diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreIT.java
index 7721b18a4fe6b..96d6338e5913b 100644
--- a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreIT.java
+++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreIT.java
@@ -852,7 +852,9 @@ public void testFlushOnTooManyRemoteTranslogFiles() throws Exception {
ClusterUpdateSettingsRequest updateSettingsRequest = new ClusterUpdateSettingsRequest();
updateSettingsRequest.persistentSettings(
- Settings.builder().put(RemoteStoreSettings.CLUSTER_REMOTE_MAX_TRANSLOG_READERS.getKey(), "100")
+ Settings.builder()
+ .put(RemoteStoreSettings.CLUSTER_REMOTE_MAX_TRANSLOG_READERS.getKey(), "100")
+ .put(CLUSTER_REMOTE_TRANSLOG_BUFFER_INTERVAL_SETTING.getKey(), "0ms")
);
assertAcked(client().admin().cluster().updateSettings(updateSettingsRequest).actionGet());
@@ -883,5 +885,27 @@ public void testFlushOnTooManyRemoteTranslogFiles() throws Exception {
assertEquals(totalFiles, 1L);
}
}, 30, TimeUnit.SECONDS);
+
+ // Disabling max translog readers
+ assertAcked(
+ internalCluster().client()
+ .admin()
+ .cluster()
+ .prepareUpdateSettings()
+ .setPersistentSettings(Settings.builder().put(RemoteStoreSettings.CLUSTER_REMOTE_MAX_TRANSLOG_READERS.getKey(), "-1"))
+ .get()
+ );
+
+ // Indexing 500 more docs
+ for (int i = 0; i < 500; i++) {
+ indexBulk(INDEX_NAME, 1);
+ }
+
+ // No flush is triggered since max_translog_readers is set to -1
+ // Total tlog files would be incremented by 500
+ try (Stream files = Files.list(translogLocation)) {
+ long totalFiles = files.filter(f -> f.getFileName().toString().endsWith(Translog.TRANSLOG_FILE_SUFFIX)).count();
+ assertEquals(totalFiles, 501L);
+ }
}
}
diff --git a/server/src/internalClusterTest/java/org/opensearch/search/query/SearchQueryIT.java b/server/src/internalClusterTest/java/org/opensearch/search/query/SearchQueryIT.java
index a58db51780826..01ad06757640c 100644
--- a/server/src/internalClusterTest/java/org/opensearch/search/query/SearchQueryIT.java
+++ b/server/src/internalClusterTest/java/org/opensearch/search/query/SearchQueryIT.java
@@ -1914,14 +1914,8 @@ public void testRangeQueryWithTimeZone() throws Exception {
* Test range with a custom locale, e.g. "de" in this case. Documents here mention the day of week
* as "Mi" for "Mittwoch (Wednesday" and "Do" for "Donnerstag (Thursday)" and the month in the query
* as "Dez" for "Dezember (December)".
- * Note: this test currently needs the JVM arg `-Djava.locale.providers=SPI,COMPAT` to be set.
- * When running with gradle this is done implicitly through the BuildPlugin, but when running from
- * an IDE this might need to be set manually in the run configuration. See also CONTRIBUTING.md section
- * on "Configuring IDEs And Running Tests".
*/
public void testRangeQueryWithLocaleMapping() throws Exception {
- assert ("SPI,COMPAT".equals(System.getProperty("java.locale.providers"))) : "`-Djava.locale.providers=SPI,COMPAT` needs to be set";
-
assertAcked(
prepareCreate("test").setMapping(
jsonBuilder().startObject()
@@ -1938,17 +1932,21 @@ public void testRangeQueryWithLocaleMapping() throws Exception {
indexRandom(
true,
- client().prepareIndex("test").setId("1").setSource("date_field", "Mi, 06 Dez 2000 02:55:00 -0800"),
- client().prepareIndex("test").setId("2").setSource("date_field", "Do, 07 Dez 2000 02:55:00 -0800")
+ client().prepareIndex("test").setId("1").setSource("date_field", "Mi., 06 Dez. 2000 02:55:00 -0800"),
+ client().prepareIndex("test").setId("2").setSource("date_field", "Do., 07 Dez. 2000 02:55:00 -0800")
);
SearchResponse searchResponse = client().prepareSearch("test")
- .setQuery(QueryBuilders.rangeQuery("date_field").gte("Di, 05 Dez 2000 02:55:00 -0800").lte("Do, 07 Dez 2000 00:00:00 -0800"))
+ .setQuery(
+ QueryBuilders.rangeQuery("date_field").gte("Di., 05 Dez. 2000 02:55:00 -0800").lte("Do., 07 Dez. 2000 00:00:00 -0800")
+ )
.get();
assertHitCount(searchResponse, 1L);
searchResponse = client().prepareSearch("test")
- .setQuery(QueryBuilders.rangeQuery("date_field").gte("Di, 05 Dez 2000 02:55:00 -0800").lte("Fr, 08 Dez 2000 00:00:00 -0800"))
+ .setQuery(
+ QueryBuilders.rangeQuery("date_field").gte("Di., 05 Dez. 2000 02:55:00 -0800").lte("Fr., 08 Dez. 2000 00:00:00 -0800")
+ )
.get();
assertHitCount(searchResponse, 2L);
}
diff --git a/server/src/main/java/org/opensearch/cluster/ClusterManagerMetrics.java b/server/src/main/java/org/opensearch/cluster/ClusterManagerMetrics.java
index d48f82a388245..a98349a4af5cd 100644
--- a/server/src/main/java/org/opensearch/cluster/ClusterManagerMetrics.java
+++ b/server/src/main/java/org/opensearch/cluster/ClusterManagerMetrics.java
@@ -8,6 +8,7 @@
package org.opensearch.cluster;
+import org.opensearch.telemetry.metrics.Counter;
import org.opensearch.telemetry.metrics.Histogram;
import org.opensearch.telemetry.metrics.MetricsRegistry;
import org.opensearch.telemetry.metrics.tags.Tags;
@@ -23,6 +24,7 @@
public final class ClusterManagerMetrics {
private static final String LATENCY_METRIC_UNIT_MS = "ms";
+ private static final String COUNTER_METRICS_UNIT = "1";
public final Histogram clusterStateAppliersHistogram;
public final Histogram clusterStateListenersHistogram;
@@ -30,6 +32,9 @@ public final class ClusterManagerMetrics {
public final Histogram clusterStateComputeHistogram;
public final Histogram clusterStatePublishHistogram;
+ public final Counter leaderCheckFailureCounter;
+ public final Counter followerChecksFailureCounter;
+
public ClusterManagerMetrics(MetricsRegistry metricsRegistry) {
clusterStateAppliersHistogram = metricsRegistry.createHistogram(
"cluster.state.appliers.latency",
@@ -56,6 +61,16 @@ public ClusterManagerMetrics(MetricsRegistry metricsRegistry) {
"Histogram for recording time taken to publish a new cluster state",
LATENCY_METRIC_UNIT_MS
);
+ followerChecksFailureCounter = metricsRegistry.createCounter(
+ "followers.checker.failure.count",
+ "Counter for number of failed follower checks",
+ COUNTER_METRICS_UNIT
+ );
+ leaderCheckFailureCounter = metricsRegistry.createCounter(
+ "leader.checker.failure.count",
+ "Counter for number of failed leader checks",
+ COUNTER_METRICS_UNIT
+ );
}
public void recordLatency(Histogram histogram, Double value) {
@@ -69,4 +84,16 @@ public void recordLatency(Histogram histogram, Double value, Optional tags
}
histogram.record(value, tags.get());
}
+
+ public void incrementCounter(Counter counter, Double value) {
+ incrementCounter(counter, value, Optional.empty());
+ }
+
+ public void incrementCounter(Counter counter, Double value, Optional tags) {
+ if (Objects.isNull(tags) || tags.isEmpty()) {
+ counter.add(value);
+ return;
+ }
+ counter.add(value, tags.get());
+ }
}
diff --git a/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java b/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java
index 3d74feddfa261..f53e6837a67f5 100644
--- a/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java
+++ b/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java
@@ -36,6 +36,7 @@
import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.message.ParameterizedMessage;
import org.opensearch.cluster.ClusterChangedEvent;
+import org.opensearch.cluster.ClusterManagerMetrics;
import org.opensearch.cluster.ClusterName;
import org.opensearch.cluster.ClusterState;
import org.opensearch.cluster.ClusterStateTaskConfig;
@@ -207,7 +208,8 @@ public Coordinator(
ElectionStrategy electionStrategy,
NodeHealthService nodeHealthService,
PersistedStateRegistry persistedStateRegistry,
- RemoteStoreNodeService remoteStoreNodeService
+ RemoteStoreNodeService remoteStoreNodeService,
+ ClusterManagerMetrics clusterManagerMetrics
) {
this.settings = settings;
this.transportService = transportService;
@@ -261,14 +263,22 @@ public Coordinator(
this::handlePublishRequest,
this::handleApplyCommit
);
- this.leaderChecker = new LeaderChecker(settings, clusterSettings, transportService, this::onLeaderFailure, nodeHealthService);
+ this.leaderChecker = new LeaderChecker(
+ settings,
+ clusterSettings,
+ transportService,
+ this::onLeaderFailure,
+ nodeHealthService,
+ clusterManagerMetrics
+ );
this.followersChecker = new FollowersChecker(
settings,
clusterSettings,
transportService,
this::onFollowerCheckRequest,
this::removeNode,
- nodeHealthService
+ nodeHealthService,
+ clusterManagerMetrics
);
this.nodeRemovalExecutor = new NodeRemovalClusterStateTaskExecutor(allocationService, logger);
this.clusterApplier = clusterApplier;
diff --git a/server/src/main/java/org/opensearch/cluster/coordination/FollowersChecker.java b/server/src/main/java/org/opensearch/cluster/coordination/FollowersChecker.java
index 70bb0515bb022..2ec0dabd91786 100644
--- a/server/src/main/java/org/opensearch/cluster/coordination/FollowersChecker.java
+++ b/server/src/main/java/org/opensearch/cluster/coordination/FollowersChecker.java
@@ -35,6 +35,7 @@
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.message.ParameterizedMessage;
+import org.opensearch.cluster.ClusterManagerMetrics;
import org.opensearch.cluster.coordination.Coordinator.Mode;
import org.opensearch.cluster.node.DiscoveryNode;
import org.opensearch.cluster.node.DiscoveryNodes;
@@ -127,6 +128,7 @@ public class FollowersChecker {
private final TransportService transportService;
private final NodeHealthService nodeHealthService;
private volatile FastResponseState fastResponseState;
+ private ClusterManagerMetrics clusterManagerMetrics;
public FollowersChecker(
Settings settings,
@@ -134,7 +136,8 @@ public FollowersChecker(
TransportService transportService,
Consumer handleRequestAndUpdateState,
BiConsumer onNodeFailure,
- NodeHealthService nodeHealthService
+ NodeHealthService nodeHealthService,
+ ClusterManagerMetrics clusterManagerMetrics
) {
this.settings = settings;
this.transportService = transportService;
@@ -161,6 +164,7 @@ public void onNodeDisconnected(DiscoveryNode node, Transport.Connection connecti
handleDisconnectedNode(node);
}
});
+ this.clusterManagerMetrics = clusterManagerMetrics;
}
private void setFollowerCheckTimeout(TimeValue followerCheckTimeout) {
@@ -413,6 +417,7 @@ public String executor() {
}
void failNode(String reason) {
+ clusterManagerMetrics.incrementCounter(clusterManagerMetrics.followerChecksFailureCounter, 1.0);
transportService.getThreadPool().generic().execute(new Runnable() {
@Override
public void run() {
diff --git a/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java b/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java
index 5475470b81b93..f77a7ffc8ce8e 100644
--- a/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java
+++ b/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java
@@ -511,11 +511,27 @@ private static void ensureRemoteStoreNodesCompatibility(DiscoveryNode joiningNod
assert existingNodes.isEmpty() == false;
CompatibilityMode remoteStoreCompatibilityMode = REMOTE_STORE_COMPATIBILITY_MODE_SETTING.get(metadata.settings());
- if (STRICT.equals(remoteStoreCompatibilityMode)) {
- DiscoveryNode existingNode = existingNodes.get(0);
+ List reposToSkip = new ArrayList<>(1);
+ Optional remoteRoutingTableNode = existingNodes.stream()
+ .filter(
+ node -> node.getAttributes().get(RemoteStoreNodeAttribute.REMOTE_STORE_ROUTING_TABLE_REPOSITORY_NAME_ATTRIBUTE_KEY) != null
+ )
+ .findFirst();
+ // If none of the existing nodes have routing table repo, then we skip this repo check if present in joining node.
+ // This ensures a new node with remote routing table repo is able to join the cluster.
+ if (remoteRoutingTableNode.isEmpty()) {
+ String joiningNodeRepoName = joiningNode.getAttributes()
+ .get(RemoteStoreNodeAttribute.REMOTE_STORE_ROUTING_TABLE_REPOSITORY_NAME_ATTRIBUTE_KEY);
+ if (joiningNodeRepoName != null) {
+ reposToSkip.add(joiningNodeRepoName);
+ }
+ }
+
+ if (STRICT.equals(remoteStoreCompatibilityMode)) {
+ DiscoveryNode existingNode = remoteRoutingTableNode.orElseGet(() -> existingNodes.get(0));
if (joiningNode.isRemoteStoreNode()) {
- ensureRemoteStoreNodesCompatibility(joiningNode, existingNode);
+ ensureRemoteStoreNodesCompatibility(joiningNode, existingNode, reposToSkip);
} else {
if (existingNode.isRemoteStoreNode()) {
throw new IllegalStateException(
@@ -537,19 +553,25 @@ private static void ensureRemoteStoreNodesCompatibility(DiscoveryNode joiningNod
throw new IllegalStateException(reason);
}
if (joiningNode.isRemoteStoreNode()) {
- Optional remoteDN = existingNodes.stream().filter(DiscoveryNode::isRemoteStoreNode).findFirst();
- remoteDN.ifPresent(discoveryNode -> ensureRemoteStoreNodesCompatibility(joiningNode, discoveryNode));
+ Optional remoteDN = remoteRoutingTableNode.isPresent()
+ ? remoteRoutingTableNode
+ : existingNodes.stream().filter(DiscoveryNode::isRemoteStoreNode).findFirst();
+ remoteDN.ifPresent(discoveryNode -> ensureRemoteStoreNodesCompatibility(joiningNode, discoveryNode, reposToSkip));
}
}
}
}
- private static void ensureRemoteStoreNodesCompatibility(DiscoveryNode joiningNode, DiscoveryNode existingNode) {
+ private static void ensureRemoteStoreNodesCompatibility(
+ DiscoveryNode joiningNode,
+ DiscoveryNode existingNode,
+ List reposToSkip
+ ) {
if (joiningNode.isRemoteStoreNode()) {
if (existingNode.isRemoteStoreNode()) {
RemoteStoreNodeAttribute joiningRemoteStoreNodeAttribute = new RemoteStoreNodeAttribute(joiningNode);
RemoteStoreNodeAttribute existingRemoteStoreNodeAttribute = new RemoteStoreNodeAttribute(existingNode);
- if (existingRemoteStoreNodeAttribute.equals(joiningRemoteStoreNodeAttribute) == false) {
+ if (existingRemoteStoreNodeAttribute.equalsWithRepoSkip(joiningRemoteStoreNodeAttribute, reposToSkip) == false) {
throw new IllegalStateException(
"a remote store node ["
+ joiningNode
diff --git a/server/src/main/java/org/opensearch/cluster/coordination/LeaderChecker.java b/server/src/main/java/org/opensearch/cluster/coordination/LeaderChecker.java
index 8d4373b865f62..4fd2c0eb13073 100644
--- a/server/src/main/java/org/opensearch/cluster/coordination/LeaderChecker.java
+++ b/server/src/main/java/org/opensearch/cluster/coordination/LeaderChecker.java
@@ -36,6 +36,7 @@
import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.message.ParameterizedMessage;
import org.opensearch.OpenSearchException;
+import org.opensearch.cluster.ClusterManagerMetrics;
import org.opensearch.cluster.node.DiscoveryNode;
import org.opensearch.cluster.node.DiscoveryNodes;
import org.opensearch.common.Nullable;
@@ -119,17 +120,17 @@ public class LeaderChecker {
private final TransportService transportService;
private final Consumer onLeaderFailure;
private final NodeHealthService nodeHealthService;
-
private AtomicReference currentChecker = new AtomicReference<>();
-
private volatile DiscoveryNodes discoveryNodes;
+ private final ClusterManagerMetrics clusterManagerMetrics;
LeaderChecker(
final Settings settings,
final ClusterSettings clusterSettings,
final TransportService transportService,
final Consumer onLeaderFailure,
- NodeHealthService nodeHealthService
+ NodeHealthService nodeHealthService,
+ final ClusterManagerMetrics clusterManagerMetrics
) {
this.settings = settings;
leaderCheckInterval = LEADER_CHECK_INTERVAL_SETTING.get(settings);
@@ -138,6 +139,7 @@ public class LeaderChecker {
this.transportService = transportService;
this.onLeaderFailure = onLeaderFailure;
this.nodeHealthService = nodeHealthService;
+ this.clusterManagerMetrics = clusterManagerMetrics;
clusterSettings.addSettingsUpdateConsumer(LEADER_CHECK_TIMEOUT_SETTING, this::setLeaderCheckTimeout);
transportService.registerRequestHandler(
@@ -293,7 +295,6 @@ public void handleResponse(Empty response) {
logger.debug("closed check scheduler received a response, doing nothing");
return;
}
-
failureCountSinceLastSuccess.set(0);
scheduleNextWakeUp(); // logs trace message indicating success
}
@@ -304,7 +305,6 @@ public void handleException(TransportException exp) {
logger.debug("closed check scheduler received a response, doing nothing");
return;
}
-
if (exp instanceof ConnectTransportException || exp.getCause() instanceof ConnectTransportException) {
logger.debug(new ParameterizedMessage("leader [{}] disconnected during check", leader), exp);
leaderFailed(new ConnectTransportException(leader, "disconnected during check", exp));
@@ -355,6 +355,7 @@ public String executor() {
void leaderFailed(Exception e) {
if (isClosed.compareAndSet(false, true)) {
+ clusterManagerMetrics.incrementCounter(clusterManagerMetrics.leaderCheckFailureCounter, 1.0);
transportService.getThreadPool().generic().execute(new Runnable() {
@Override
public void run() {
diff --git a/server/src/main/java/org/opensearch/cluster/metadata/RepositoriesMetadata.java b/server/src/main/java/org/opensearch/cluster/metadata/RepositoriesMetadata.java
index 9b52bdd1b16c5..4b3dc7964a87b 100644
--- a/server/src/main/java/org/opensearch/cluster/metadata/RepositoriesMetadata.java
+++ b/server/src/main/java/org/opensearch/cluster/metadata/RepositoriesMetadata.java
@@ -51,8 +51,10 @@
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
+import java.util.Comparator;
import java.util.EnumSet;
import java.util.List;
+import java.util.stream.Collectors;
import static org.opensearch.repositories.blobstore.BlobStoreRepository.SYSTEM_REPOSITORY_SETTING;
@@ -164,6 +166,40 @@ public boolean equalsIgnoreGenerations(@Nullable RepositoriesMetadata other) {
return true;
}
+ /**
+ * Checks if this instance and the give instance share the same repositories, with option to skip checking for a list of repos.
+ * This will support
+ * @param other other repositories metadata
+ * @param reposToSkip list of repos to skip check for equality
+ * @return {@code true} iff both instances contain the same repositories apart from differences in generations, not including repos provided in reposToSkip.
+ */
+ public boolean equalsIgnoreGenerationsWithRepoSkip(@Nullable RepositoriesMetadata other, List reposToSkip) {
+ if (other == null) {
+ return false;
+ }
+ List currentRepositories = repositories.stream()
+ .filter(repo -> !reposToSkip.contains(repo.name()))
+ .collect(Collectors.toList());
+ List otherRepositories = other.repositories.stream()
+ .filter(repo -> !reposToSkip.contains(repo.name()))
+ .collect(Collectors.toList());
+
+ if (otherRepositories.size() != currentRepositories.size()) {
+ return false;
+ }
+ // Sort repos by name for ordered comparison
+ Comparator compareByName = (o1, o2) -> o1.name().compareTo(o2.name());
+ currentRepositories.sort(compareByName);
+ otherRepositories.sort(compareByName);
+
+ for (int i = 0; i < currentRepositories.size(); i++) {
+ if (currentRepositories.get(i).equalsIgnoreGenerations(otherRepositories.get(i)) == false) {
+ return false;
+ }
+ }
+ return true;
+ }
+
@Override
public int hashCode() {
return repositories.hashCode();
diff --git a/server/src/main/java/org/opensearch/cluster/routing/remote/RemoteRoutingTableService.java b/server/src/main/java/org/opensearch/cluster/routing/remote/RemoteRoutingTableService.java
new file mode 100644
index 0000000000000..ba2208e17df1f
--- /dev/null
+++ b/server/src/main/java/org/opensearch/cluster/routing/remote/RemoteRoutingTableService.java
@@ -0,0 +1,67 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.cluster.routing.remote;
+
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+import org.opensearch.common.lifecycle.AbstractLifecycleComponent;
+import org.opensearch.common.settings.Settings;
+import org.opensearch.common.util.io.IOUtils;
+import org.opensearch.node.Node;
+import org.opensearch.node.remotestore.RemoteStoreNodeAttribute;
+import org.opensearch.repositories.RepositoriesService;
+import org.opensearch.repositories.Repository;
+import org.opensearch.repositories.blobstore.BlobStoreRepository;
+
+import java.io.IOException;
+import java.util.function.Supplier;
+
+import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.isRemoteRoutingTableEnabled;
+
+/**
+ * A Service which provides APIs to upload and download routing table from remote store.
+ *
+ * @opensearch.internal
+ */
+public class RemoteRoutingTableService extends AbstractLifecycleComponent {
+
+ private static final Logger logger = LogManager.getLogger(RemoteRoutingTableService.class);
+ private final Settings settings;
+ private final Supplier repositoriesService;
+ private BlobStoreRepository blobStoreRepository;
+
+ public RemoteRoutingTableService(Supplier repositoriesService, Settings settings) {
+ assert isRemoteRoutingTableEnabled(settings) : "Remote routing table is not enabled";
+ this.repositoriesService = repositoriesService;
+ this.settings = settings;
+ }
+
+ @Override
+ protected void doClose() throws IOException {
+ if (blobStoreRepository != null) {
+ IOUtils.close(blobStoreRepository);
+ }
+ }
+
+ @Override
+ protected void doStart() {
+ assert isRemoteRoutingTableEnabled(settings) == true : "Remote routing table is not enabled";
+ final String remoteStoreRepo = settings.get(
+ Node.NODE_ATTRIBUTES.getKey() + RemoteStoreNodeAttribute.REMOTE_STORE_ROUTING_TABLE_REPOSITORY_NAME_ATTRIBUTE_KEY
+ );
+ assert remoteStoreRepo != null : "Remote routing table repository is not configured";
+ final Repository repository = repositoriesService.get().repository(remoteStoreRepo);
+ assert repository instanceof BlobStoreRepository : "Repository should be instance of BlobStoreRepository";
+ blobStoreRepository = (BlobStoreRepository) repository;
+ }
+
+ @Override
+ protected void doStop() {}
+
+}
diff --git a/server/src/main/java/org/opensearch/cluster/routing/remote/package-info.java b/server/src/main/java/org/opensearch/cluster/routing/remote/package-info.java
new file mode 100644
index 0000000000000..9fe016e783f20
--- /dev/null
+++ b/server/src/main/java/org/opensearch/cluster/routing/remote/package-info.java
@@ -0,0 +1,10 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+/** Package containing class to perform operations on remote routing table */
+package org.opensearch.cluster.routing.remote;
diff --git a/server/src/main/java/org/opensearch/common/remote/AbstractRemoteWritableBlobEntity.java b/server/src/main/java/org/opensearch/common/remote/AbstractRemoteWritableBlobEntity.java
new file mode 100644
index 0000000000000..632b2b70d61df
--- /dev/null
+++ b/server/src/main/java/org/opensearch/common/remote/AbstractRemoteWritableBlobEntity.java
@@ -0,0 +1,91 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.common.remote;
+
+import org.opensearch.common.blobstore.BlobPath;
+import org.opensearch.core.compress.Compressor;
+import org.opensearch.core.xcontent.NamedXContentRegistry;
+import org.opensearch.gateway.remote.ClusterMetadataManifest.UploadedMetadata;
+
+import static org.opensearch.gateway.remote.RemoteClusterStateUtils.PATH_DELIMITER;
+
+/**
+ * An extension of {@link RemoteWriteableEntity} class which caters to the use case of writing to and reading from a blob storage
+ *
+ * @param The class type which can be uploaded to or downloaded from a blob storage.
+ */
+public abstract class AbstractRemoteWritableBlobEntity implements RemoteWriteableEntity {
+
+ protected String blobFileName;
+
+ protected String blobName;
+ private final String clusterUUID;
+ private final Compressor compressor;
+ private final NamedXContentRegistry namedXContentRegistry;
+ private String[] pathTokens;
+
+ public AbstractRemoteWritableBlobEntity(
+ final String clusterUUID,
+ final Compressor compressor,
+ final NamedXContentRegistry namedXContentRegistry
+ ) {
+ this.clusterUUID = clusterUUID;
+ this.compressor = compressor;
+ this.namedXContentRegistry = namedXContentRegistry;
+ }
+
+ public abstract BlobPathParameters getBlobPathParameters();
+
+ public String getFullBlobName() {
+ return blobName;
+ }
+
+ public String getBlobFileName() {
+ if (blobFileName == null) {
+ String[] pathTokens = getBlobPathTokens();
+ if (pathTokens == null || pathTokens.length < 1) {
+ return null;
+ }
+ blobFileName = pathTokens[pathTokens.length - 1];
+ }
+ return blobFileName;
+ }
+
+ public String[] getBlobPathTokens() {
+ if (pathTokens != null) {
+ return pathTokens;
+ }
+ if (blobName == null) {
+ return null;
+ }
+ pathTokens = blobName.split(PATH_DELIMITER);
+ return pathTokens;
+ }
+
+ public abstract String generateBlobFileName();
+
+ public String clusterUUID() {
+ return clusterUUID;
+ }
+
+ public abstract UploadedMetadata getUploadedMetadata();
+
+ public void setFullBlobName(BlobPath blobPath) {
+ this.blobName = blobPath.buildAsString() + blobFileName;
+ }
+
+ public NamedXContentRegistry getNamedXContentRegistry() {
+ return namedXContentRegistry;
+ }
+
+ protected Compressor getCompressor() {
+ return compressor;
+ }
+
+}
diff --git a/server/src/main/java/org/opensearch/common/remote/BlobPathParameters.java b/server/src/main/java/org/opensearch/common/remote/BlobPathParameters.java
new file mode 100644
index 0000000000000..58c73a804b66a
--- /dev/null
+++ b/server/src/main/java/org/opensearch/common/remote/BlobPathParameters.java
@@ -0,0 +1,34 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.common.remote;
+
+import java.util.List;
+
+/**
+ * Parameters which can be used to construct a blob path
+ *
+ */
+public class BlobPathParameters {
+
+ private final List pathTokens;
+ private final String filePrefix;
+
+ public BlobPathParameters(final List pathTokens, final String filePrefix) {
+ this.pathTokens = pathTokens;
+ this.filePrefix = filePrefix;
+ }
+
+ public List getPathTokens() {
+ return pathTokens;
+ }
+
+ public String getFilePrefix() {
+ return filePrefix;
+ }
+}
diff --git a/server/src/main/java/org/opensearch/common/remote/RemoteWritableEntityStore.java b/server/src/main/java/org/opensearch/common/remote/RemoteWritableEntityStore.java
new file mode 100644
index 0000000000000..ccf7cafff1730
--- /dev/null
+++ b/server/src/main/java/org/opensearch/common/remote/RemoteWritableEntityStore.java
@@ -0,0 +1,28 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.common.remote;
+
+import org.opensearch.core.action.ActionListener;
+
+import java.io.IOException;
+
+/**
+ * An interface to read/write an object from/to a remote storage. This interface is agnostic of the remote storage type.
+ *
+ * @param The object type which can be uploaded to or downloaded from remote storage.
+ * @param The wrapper entity which provides methods for serializing/deserializing entity T.
+ */
+public interface RemoteWritableEntityStore> {
+
+ public void writeAsync(U entity, ActionListener listener);
+
+ public T read(U entity) throws IOException;
+
+ public void readAsync(U entity, ActionListener listener);
+}
diff --git a/server/src/main/java/org/opensearch/common/remote/RemoteWriteableEntity.java b/server/src/main/java/org/opensearch/common/remote/RemoteWriteableEntity.java
new file mode 100644
index 0000000000000..778c24dce2e27
--- /dev/null
+++ b/server/src/main/java/org/opensearch/common/remote/RemoteWriteableEntity.java
@@ -0,0 +1,34 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.common.remote;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+/**
+ * An interface to which provides defines the serialization/deserialization methods for objects to be uploaded to or downloaded from remote store.
+ * This interface is agnostic of the remote storage type.
+ *
+ * @param The object type which can be uploaded to or downloaded from remote storage.
+ */
+public interface RemoteWriteableEntity {
+ /**
+ * @return An InputStream created by serializing the entity T
+ * @throws IOException Exception encountered while serialization
+ */
+ public InputStream serialize() throws IOException;
+
+ /**
+ * @param inputStream The InputStream which is used to read the serialized entity
+ * @return The entity T after deserialization
+ * @throws IOException Exception encountered while deserialization
+ */
+ public T deserialize(InputStream inputStream) throws IOException;
+
+}
diff --git a/server/src/main/java/org/opensearch/common/remote/package-info.java b/server/src/main/java/org/opensearch/common/remote/package-info.java
new file mode 100644
index 0000000000000..08ff9e910dc98
--- /dev/null
+++ b/server/src/main/java/org/opensearch/common/remote/package-info.java
@@ -0,0 +1,11 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+/**
+ * Common remote store package
+ */
+package org.opensearch.common.remote;
diff --git a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java
index 7814518af471b..297fc98764d07 100644
--- a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java
+++ b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java
@@ -104,6 +104,7 @@
import org.opensearch.gateway.GatewayService;
import org.opensearch.gateway.PersistedClusterStateService;
import org.opensearch.gateway.ShardsBatchGatewayAllocator;
+import org.opensearch.gateway.remote.RemoteClusterStateCleanupManager;
import org.opensearch.gateway.remote.RemoteClusterStateService;
import org.opensearch.http.HttpTransportSettings;
import org.opensearch.index.IndexModule;
@@ -711,6 +712,7 @@ public void apply(Settings value, Settings current, Settings previous) {
SearchRequestSlowLog.CLUSTER_SEARCH_REQUEST_SLOWLOG_LEVEL,
// Remote cluster state settings
+ RemoteClusterStateCleanupManager.REMOTE_CLUSTER_STATE_CLEANUP_INTERVAL_SETTING,
RemoteClusterStateService.REMOTE_CLUSTER_STATE_ENABLED_SETTING,
RemoteClusterStateService.INDEX_METADATA_UPLOAD_TIMEOUT_SETTING,
RemoteClusterStateService.GLOBAL_METADATA_UPLOAD_TIMEOUT_SETTING,
diff --git a/server/src/main/java/org/opensearch/common/settings/FeatureFlagSettings.java b/server/src/main/java/org/opensearch/common/settings/FeatureFlagSettings.java
index 7a364de1c5dc6..238df1bd90113 100644
--- a/server/src/main/java/org/opensearch/common/settings/FeatureFlagSettings.java
+++ b/server/src/main/java/org/opensearch/common/settings/FeatureFlagSettings.java
@@ -36,6 +36,7 @@ protected FeatureFlagSettings(
FeatureFlags.DATETIME_FORMATTER_CACHING_SETTING,
FeatureFlags.TIERED_REMOTE_INDEX_SETTING,
FeatureFlags.REMOTE_STORE_MIGRATION_EXPERIMENTAL_SETTING,
- FeatureFlags.PLUGGABLE_CACHE_SETTING
+ FeatureFlags.PLUGGABLE_CACHE_SETTING,
+ FeatureFlags.REMOTE_PUBLICATION_EXPERIMENTAL_SETTING
);
}
diff --git a/server/src/main/java/org/opensearch/common/util/FeatureFlags.java b/server/src/main/java/org/opensearch/common/util/FeatureFlags.java
index 62cfbd861d4d9..82f43921d2d28 100644
--- a/server/src/main/java/org/opensearch/common/util/FeatureFlags.java
+++ b/server/src/main/java/org/opensearch/common/util/FeatureFlags.java
@@ -67,6 +67,11 @@ public class FeatureFlags {
*/
public static final String PLUGGABLE_CACHE = "opensearch.experimental.feature.pluggable.caching.enabled";
+ /**
+ * Gates the functionality of remote routing table.
+ */
+ public static final String REMOTE_PUBLICATION_EXPERIMENTAL = "opensearch.experimental.feature.remote_store.publication.enabled";
+
public static final Setting REMOTE_STORE_MIGRATION_EXPERIMENTAL_SETTING = Setting.boolSetting(
REMOTE_STORE_MIGRATION_EXPERIMENTAL,
false,
@@ -89,6 +94,12 @@ public class FeatureFlags {
public static final Setting PLUGGABLE_CACHE_SETTING = Setting.boolSetting(PLUGGABLE_CACHE, false, Property.NodeScope);
+ public static final Setting REMOTE_PUBLICATION_EXPERIMENTAL_SETTING = Setting.boolSetting(
+ REMOTE_PUBLICATION_EXPERIMENTAL,
+ false,
+ Property.NodeScope
+ );
+
private static final List> ALL_FEATURE_FLAG_SETTINGS = List.of(
REMOTE_STORE_MIGRATION_EXPERIMENTAL_SETTING,
EXTENSIONS_SETTING,
@@ -96,7 +107,8 @@ public class FeatureFlags {
TELEMETRY_SETTING,
DATETIME_FORMATTER_CACHING_SETTING,
TIERED_REMOTE_INDEX_SETTING,
- PLUGGABLE_CACHE_SETTING
+ PLUGGABLE_CACHE_SETTING,
+ REMOTE_PUBLICATION_EXPERIMENTAL_SETTING
);
/**
* Should store the settings from opensearch.yml.
diff --git a/server/src/main/java/org/opensearch/discovery/DiscoveryModule.java b/server/src/main/java/org/opensearch/discovery/DiscoveryModule.java
index 288371aa240a0..538dea5b2e60b 100644
--- a/server/src/main/java/org/opensearch/discovery/DiscoveryModule.java
+++ b/server/src/main/java/org/opensearch/discovery/DiscoveryModule.java
@@ -34,6 +34,7 @@
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
+import org.opensearch.cluster.ClusterManagerMetrics;
import org.opensearch.cluster.ClusterState;
import org.opensearch.cluster.coordination.Coordinator;
import org.opensearch.cluster.coordination.ElectionStrategy;
@@ -133,7 +134,8 @@ public DiscoveryModule(
RerouteService rerouteService,
NodeHealthService nodeHealthService,
PersistedStateRegistry persistedStateRegistry,
- RemoteStoreNodeService remoteStoreNodeService
+ RemoteStoreNodeService remoteStoreNodeService,
+ ClusterManagerMetrics clusterManagerMetrics
) {
final Collection> joinValidators = new ArrayList<>();
final Map> hostProviders = new HashMap<>();
@@ -211,7 +213,8 @@ public DiscoveryModule(
electionStrategy,
nodeHealthService,
persistedStateRegistry,
- remoteStoreNodeService
+ remoteStoreNodeService,
+ clusterManagerMetrics
);
} else {
throw new IllegalArgumentException("Unknown discovery type [" + discoveryType + "]");
diff --git a/server/src/main/java/org/opensearch/gateway/remote/ClusterMetadataManifest.java b/server/src/main/java/org/opensearch/gateway/remote/ClusterMetadataManifest.java
index 0242e94cc4da1..4337d539ef425 100644
--- a/server/src/main/java/org/opensearch/gateway/remote/ClusterMetadataManifest.java
+++ b/server/src/main/java/org/opensearch/gateway/remote/ClusterMetadataManifest.java
@@ -43,7 +43,7 @@ public class ClusterMetadataManifest implements Writeable, ToXContentFragment {
public static final int CODEC_V0 = 0; // Older codec version, where we haven't introduced codec versions for manifest.
public static final int CODEC_V1 = 1; // In Codec V1 we have introduced global-metadata and codec version in Manifest file.
public static final int CODEC_V2 = 2; // In Codec V2, there are seperate metadata files rather than a single global metadata file.
- public static final int CODEC_V3 = 3; // In Codec V3, we introduce index routing-metadata, diff and other attributes as part of manifest required for state publication
+ public static final int CODEC_V3 = 3; // In Codec V3, we introduce index routing-metadata in manifest file.
private static final ParseField CLUSTER_TERM_FIELD = new ParseField("cluster_term");
private static final ParseField STATE_VERSION_FIELD = new ParseField("state_version");
@@ -61,6 +61,8 @@ public class ClusterMetadataManifest implements Writeable, ToXContentFragment {
private static final ParseField UPLOADED_SETTINGS_METADATA = new ParseField("uploaded_settings_metadata");
private static final ParseField UPLOADED_TEMPLATES_METADATA = new ParseField("uploaded_templates_metadata");
private static final ParseField UPLOADED_CUSTOM_METADATA = new ParseField("uploaded_custom_metadata");
+ private static final ParseField ROUTING_TABLE_VERSION_FIELD = new ParseField("routing_table_version");
+ private static final ParseField INDICES_ROUTING_FIELD = new ParseField("indices_routing");
private static final ParseField METADATA_VERSION = new ParseField("metadata_version");
private static final ParseField UPLOADED_TRANSIENT_SETTINGS_METADATA = new ParseField("uploaded_transient_settings_metadata");
private static final ParseField UPLOADED_DISCOVERY_NODES_METADATA = new ParseField("uploaded_discovery_nodes_metadata");
@@ -98,6 +100,8 @@ private static ClusterMetadataManifest.Builder manifestV2Builder(Object[] fields
private static ClusterMetadataManifest.Builder manifestV3Builder(Object[] fields) {
return manifestV2Builder(fields)
+ .routingTableVersion(routingTableVersion(fields))
+ .indicesRouting(indicesRouting(fields))
.discoveryNodesMetadata(discoveryNodesMetadata(fields))
.clusterBlocksMetadata(clusterBlocksMetadata(fields))
.diffManifest(diffManifest(fields))
@@ -172,33 +176,41 @@ private static Map customMetadata(Object[] fi
return customs.stream().collect(Collectors.toMap(UploadedMetadataAttribute::getAttributeName, Function.identity()));
}
+ private static long routingTableVersion(Object[] fields) {
+ return (long) fields[15];
+ }
+
+ private static List indicesRouting(Object[] fields) {
+ return (List) fields[16];
+ }
+
private static UploadedMetadataAttribute discoveryNodesMetadata(Object[] fields) {
- return (UploadedMetadataAttribute) fields[15];
+ return (UploadedMetadataAttribute) fields[17];
}
private static UploadedMetadataAttribute clusterBlocksMetadata(Object[] fields) {
- return (UploadedMetadataAttribute) fields[16];
+ return (UploadedMetadataAttribute) fields[18];
}
private static long metadataVersion(Object[] fields) {
- return (long) fields[17];
+ return (long) fields[19];
}
private static UploadedMetadataAttribute transientSettingsMetadata(Object[] fields) {
- return (UploadedMetadataAttribute) fields[18];
+ return (UploadedMetadataAttribute) fields[20];
}
private static UploadedMetadataAttribute hashesOfConsistentSettings(Object[] fields) {
- return (UploadedMetadataAttribute) fields[19];
+ return (UploadedMetadataAttribute) fields[21];
}
private static Map clusterStateCustomMetadata(Object[] fields) {
- List customs = (List) fields[20];
+ List customs = (List) fields[22];
return customs.stream().collect(Collectors.toMap(UploadedMetadataAttribute::getAttributeName, Function.identity()));
}
private static ClusterStateDiffManifest diffManifest(Object[] fields) {
- return (ClusterStateDiffManifest) fields[21];
+ return (ClusterStateDiffManifest) fields[23];
}
private static final ConstructingObjectParser PARSER_V0 = new ConstructingObjectParser<>(
@@ -221,7 +233,7 @@ private static ClusterStateDiffManifest diffManifest(Object[] fields) {
fields -> manifestV3Builder(fields).build()
);
- private static final ConstructingObjectParser CURRENT_PARSER = PARSER_V2;
+ private static final ConstructingObjectParser CURRENT_PARSER = PARSER_V3;
static {
declareParser(PARSER_V0, CODEC_V0);
@@ -273,6 +285,12 @@ private static void declareParser(ConstructingObjectParser= CODEC_V3) {
+ parser.declareLong(ConstructingObjectParser.constructorArg(), ROUTING_TABLE_VERSION_FIELD);
+ parser.declareObjectArray(
+ ConstructingObjectParser.constructorArg(),
+ (p, c) -> UploadedIndexMetadata.fromXContent(p),
+ INDICES_ROUTING_FIELD
+ );
parser.declareNamedObject(
ConstructingObjectParser.optionalConstructorArg(),
UploadedMetadataAttribute.PARSER,
@@ -323,6 +341,8 @@ private static void declareParser(ConstructingObjectParser indicesRouting;
private final long metadataVersion;
private final UploadedMetadataAttribute uploadedTransientSettingsMetadata;
private final UploadedMetadataAttribute uploadedDiscoveryNodesMetadata;
@@ -434,6 +454,14 @@ public boolean hasMetadataAttributesFiles() {
|| !uploadedCustomMetadataMap.isEmpty();
}
+ public long getRoutingTableVersion() {
+ return routingTableVersion;
+ }
+
+ public List getIndicesRouting() {
+ return indicesRouting;
+ }
+
public ClusterMetadataManifest(
long clusterTerm,
long version,
@@ -451,6 +479,8 @@ public ClusterMetadataManifest(
UploadedMetadataAttribute uploadedSettingsMetadata,
UploadedMetadataAttribute uploadedTemplatesMetadata,
Map uploadedCustomMetadataMap,
+ long routingTableVersion,
+ List indicesRouting,
long metadataVersion,
UploadedMetadataAttribute discoveryNodesMetadata,
UploadedMetadataAttribute clusterBlocksMetadata,
@@ -471,6 +501,8 @@ public ClusterMetadataManifest(
this.indices = Collections.unmodifiableList(indices);
this.previousClusterUUID = previousClusterUUID;
this.clusterUUIDCommitted = clusterUUIDCommitted;
+ this.routingTableVersion = routingTableVersion;
+ this.indicesRouting = Collections.unmodifiableList(indicesRouting);
this.uploadedCoordinationMetadata = uploadedCoordinationMetadata;
this.uploadedSettingsMetadata = uploadedSettingsMetadata;
this.uploadedTemplatesMetadata = uploadedTemplatesMetadata;
@@ -509,6 +541,8 @@ public ClusterMetadataManifest(StreamInput in) throws IOException {
);
this.globalMetadataFileName = null;
// ToDo: change the version check and initialize these
+ this.routingTableVersion = in.readLong();
+ this.indicesRouting = Collections.unmodifiableList(in.readList(UploadedIndexMetadata::new));
this.metadataVersion = in.readLong();
this.uploadedDiscoveryNodesMetadata = new UploadedMetadataAttribute(in);
this.uploadedClusterBlocksMetadata = new UploadedMetadataAttribute(in);
@@ -530,6 +564,8 @@ public ClusterMetadataManifest(StreamInput in) throws IOException {
this.uploadedSettingsMetadata = null;
this.uploadedTemplatesMetadata = null;
this.uploadedCustomMetadataMap = null;
+ this.routingTableVersion = -1;
+ this.indicesRouting = null;
this.uploadedDiscoveryNodesMetadata = null;
this.uploadedClusterBlocksMetadata = null;
this.diffManifest = null;
@@ -595,6 +631,16 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
builder.field(GLOBAL_METADATA_FIELD.getPreferredName(), getGlobalMetadataFileName());
}
if (onOrAfterCodecVersion(CODEC_V3)) {
+ builder.field(ROUTING_TABLE_VERSION_FIELD.getPreferredName(), getRoutingTableVersion());
+ builder.startArray(INDICES_ROUTING_FIELD.getPreferredName());
+ {
+ for (UploadedIndexMetadata uploadedIndexMetadata : indicesRouting) {
+ builder.startObject();
+ uploadedIndexMetadata.toXContent(builder, params);
+ builder.endObject();
+ }
+ }
+ builder.endArray();
if (getDiscoveryNodesMetadata() != null) {
builder.startObject(UPLOADED_DISCOVERY_NODES_METADATA.getPreferredName());
getDiscoveryNodesMetadata().toXContent(builder, params);
@@ -648,6 +694,8 @@ public void writeTo(StreamOutput out) throws IOException {
uploadedSettingsMetadata.writeTo(out);
uploadedTemplatesMetadata.writeTo(out);
out.writeMap(uploadedCustomMetadataMap, StreamOutput::writeString, (o, v) -> v.writeTo(o));
+ out.writeLong(routingTableVersion);
+ out.writeCollection(indicesRouting);
out.writeLong(metadataVersion);
uploadedDiscoveryNodesMetadata.writeTo(out);
uploadedClusterBlocksMetadata.writeTo(out);
@@ -682,6 +730,8 @@ public boolean equals(Object o) {
&& Objects.equals(clusterUUIDCommitted, that.clusterUUIDCommitted)
&& Objects.equals(globalMetadataFileName, that.globalMetadataFileName)
&& Objects.equals(codecVersion, that.codecVersion)
+ && Objects.equals(routingTableVersion, that.routingTableVersion)
+ && Objects.equals(indicesRouting, that.indicesRouting)
&& Objects.equals(uploadedCoordinationMetadata, that.uploadedCoordinationMetadata)
&& Objects.equals(uploadedSettingsMetadata, that.uploadedSettingsMetadata)
&& Objects.equals(uploadedTemplatesMetadata, that.uploadedTemplatesMetadata)
@@ -710,6 +760,8 @@ public int hashCode() {
committed,
previousClusterUUID,
clusterUUIDCommitted,
+ routingTableVersion,
+ indicesRouting,
uploadedCoordinationMetadata,
uploadedSettingsMetadata,
uploadedTemplatesMetadata,
@@ -740,6 +792,10 @@ public static ClusterMetadataManifest fromXContentV1(XContentParser parser) thro
return PARSER_V1.parse(parser, null);
}
+ public static ClusterMetadataManifest fromXContentV2(XContentParser parser) throws IOException {
+ return PARSER_V2.parse(parser, null);
+ }
+
public static ClusterMetadataManifest fromXContent(XContentParser parser) throws IOException {
return CURRENT_PARSER.parse(parser, null);
}
@@ -767,6 +823,8 @@ public static class Builder {
private String previousClusterUUID;
private boolean committed;
private boolean clusterUUIDCommitted;
+ private long routingTableVersion;
+ private List indicesRouting;
private long metadataVersion;
private UploadedMetadataAttribute discoveryNodesMetadata;
private UploadedMetadataAttribute clusterBlocksMetadata;
@@ -780,6 +838,16 @@ public Builder indices(List indices) {
return this;
}
+ public Builder routingTableVersion(long routingTableVersion) {
+ this.routingTableVersion = routingTableVersion;
+ return this;
+ }
+
+ public Builder indicesRouting(List indicesRouting) {
+ this.indicesRouting = indicesRouting;
+ return this;
+ }
+
public Builder codecVersion(int codecVersion) {
this.codecVersion = codecVersion;
return this;
@@ -854,6 +922,10 @@ public List getIndices() {
return indices;
}
+ public List getIndicesRouting() {
+ return indicesRouting;
+ }
+
public Builder previousClusterUUID(String previousClusterUUID) {
this.previousClusterUUID = previousClusterUUID;
return this;
@@ -903,6 +975,7 @@ public Builder() {
indices = new ArrayList<>();
customMetadataMap = new HashMap<>();
clusterStateCustomMetadataMap = new HashMap<>();
+ indicesRouting = new ArrayList<>();
}
public Builder(ClusterMetadataManifest manifest) {
@@ -922,6 +995,8 @@ public Builder(ClusterMetadataManifest manifest) {
this.indices = new ArrayList<>(manifest.indices);
this.previousClusterUUID = manifest.previousClusterUUID;
this.clusterUUIDCommitted = manifest.clusterUUIDCommitted;
+ this.routingTableVersion = manifest.routingTableVersion;
+ this.indicesRouting = new ArrayList<>(manifest.indicesRouting);
this.diffManifest = manifest.diffManifest;
this.hashesOfConsistentSettings = manifest.uploadedHashesOfConsistentSettings;
this.clusterStateCustomMetadataMap = manifest.uploadedClusterStateCustomMap;
@@ -945,6 +1020,8 @@ public ClusterMetadataManifest build() {
settingsMetadata,
templatesMetadata,
customMetadataMap,
+ routingTableVersion,
+ indicesRouting,
metadataVersion,
discoveryNodesMetadata,
clusterBlocksMetadata,
@@ -1051,11 +1128,9 @@ public String getIndexUUID() {
@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
- return builder.startObject()
- .field(INDEX_NAME_FIELD.getPreferredName(), getIndexName())
+ return builder.field(INDEX_NAME_FIELD.getPreferredName(), getIndexName())
.field(INDEX_UUID_FIELD.getPreferredName(), getIndexUUID())
- .field(UPLOADED_FILENAME_FIELD.getPreferredName(), getUploadedFilePath())
- .endObject();
+ .field(UPLOADED_FILENAME_FIELD.getPreferredName(), getUploadedFilePath());
}
@Override
diff --git a/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateCleanupManager.java b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateCleanupManager.java
new file mode 100644
index 0000000000000..2fca239b10efd
--- /dev/null
+++ b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateCleanupManager.java
@@ -0,0 +1,413 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.gateway.remote;
+
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+import org.apache.logging.log4j.message.ParameterizedMessage;
+import org.apache.logging.log4j.util.Strings;
+import org.opensearch.cluster.ClusterState;
+import org.opensearch.cluster.service.ClusterApplierService;
+import org.opensearch.cluster.service.ClusterService;
+import org.opensearch.common.blobstore.BlobMetadata;
+import org.opensearch.common.blobstore.BlobPath;
+import org.opensearch.common.settings.ClusterSettings;
+import org.opensearch.common.settings.Setting;
+import org.opensearch.common.unit.TimeValue;
+import org.opensearch.common.util.concurrent.AbstractAsyncTask;
+import org.opensearch.core.action.ActionListener;
+import org.opensearch.index.translog.transfer.BlobStoreTransferService;
+import org.opensearch.threadpool.ThreadPool;
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Locale;
+import java.util.Set;
+import java.util.concurrent.atomic.AtomicBoolean;
+
+import static org.opensearch.gateway.remote.RemoteClusterStateService.GLOBAL_METADATA_FORMAT;
+import static org.opensearch.gateway.remote.RemoteClusterStateService.GLOBAL_METADATA_PATH_TOKEN;
+import static org.opensearch.gateway.remote.RemoteClusterStateService.INDEX_METADATA_FORMAT;
+import static org.opensearch.gateway.remote.RemoteClusterStateService.INDEX_PATH_TOKEN;
+import static org.opensearch.gateway.remote.RemoteClusterStateService.MANIFEST_FILE_PREFIX;
+import static org.opensearch.gateway.remote.RemoteClusterStateService.MANIFEST_PATH_TOKEN;
+
+/**
+ * A Manager which provides APIs to clean up stale cluster state files and runs an async stale cleanup task
+ *
+ * @opensearch.internal
+ */
+public class RemoteClusterStateCleanupManager implements Closeable {
+
+ public static final int RETAINED_MANIFESTS = 10;
+ public static final int SKIP_CLEANUP_STATE_CHANGES = 10;
+ public static final TimeValue CLUSTER_STATE_CLEANUP_INTERVAL_DEFAULT = TimeValue.timeValueMinutes(5);
+ public static final TimeValue CLUSTER_STATE_CLEANUP_INTERVAL_MINIMUM = TimeValue.MINUS_ONE;
+
+ /**
+ * Setting to specify the interval to do run stale file cleanup job
+ * Min value -1 indicates that the stale file cleanup job should be disabled
+ */
+ public static final Setting REMOTE_CLUSTER_STATE_CLEANUP_INTERVAL_SETTING = Setting.timeSetting(
+ "cluster.remote_store.state.cleanup_interval",
+ CLUSTER_STATE_CLEANUP_INTERVAL_DEFAULT,
+ CLUSTER_STATE_CLEANUP_INTERVAL_MINIMUM,
+ Setting.Property.NodeScope,
+ Setting.Property.Dynamic
+ );
+ private static final Logger logger = LogManager.getLogger(RemoteClusterStateCleanupManager.class);
+ private final RemoteClusterStateService remoteClusterStateService;
+ private final RemotePersistenceStats remoteStateStats;
+ private BlobStoreTransferService blobStoreTransferService;
+ private TimeValue staleFileCleanupInterval;
+ private final AtomicBoolean deleteStaleMetadataRunning = new AtomicBoolean(false);
+ private volatile AsyncStaleFileDeletion staleFileDeletionTask;
+ private long lastCleanupAttemptStateVersion;
+ private final ThreadPool threadpool;
+ private final ClusterApplierService clusterApplierService;
+
+ public RemoteClusterStateCleanupManager(RemoteClusterStateService remoteClusterStateService, ClusterService clusterService) {
+ this.remoteClusterStateService = remoteClusterStateService;
+ this.remoteStateStats = remoteClusterStateService.getStats();
+ ClusterSettings clusterSettings = clusterService.getClusterSettings();
+ this.clusterApplierService = clusterService.getClusterApplierService();
+ this.staleFileCleanupInterval = clusterSettings.get(REMOTE_CLUSTER_STATE_CLEANUP_INTERVAL_SETTING);
+ this.threadpool = remoteClusterStateService.getThreadpool();
+ // initialize with 0, a cleanup will be done when this node is elected master node and version is incremented more than threshold
+ this.lastCleanupAttemptStateVersion = 0;
+ clusterSettings.addSettingsUpdateConsumer(REMOTE_CLUSTER_STATE_CLEANUP_INTERVAL_SETTING, this::updateCleanupInterval);
+ }
+
+ void start() {
+ staleFileDeletionTask = new AsyncStaleFileDeletion(this);
+ }
+
+ @Override
+ public void close() throws IOException {
+ if (staleFileDeletionTask != null) {
+ staleFileDeletionTask.close();
+ }
+ }
+
+ private BlobStoreTransferService getBlobStoreTransferService() {
+ if (blobStoreTransferService == null) {
+ blobStoreTransferService = new BlobStoreTransferService(remoteClusterStateService.getBlobStore(), threadpool);
+ }
+ return blobStoreTransferService;
+ }
+
+ private void updateCleanupInterval(TimeValue updatedInterval) {
+ this.staleFileCleanupInterval = updatedInterval;
+ logger.info("updated remote state cleanup interval to {}", updatedInterval);
+ // After updating the interval, we need to close the current task and create a new one which will run with updated interval
+ if (staleFileDeletionTask != null && !staleFileDeletionTask.getInterval().equals(updatedInterval)) {
+ staleFileDeletionTask.setInterval(updatedInterval);
+ }
+ }
+
+ // visible for testing
+ void cleanUpStaleFiles() {
+ ClusterState currentAppliedState = clusterApplierService.state();
+ if (currentAppliedState.nodes().isLocalNodeElectedClusterManager()) {
+ long cleanUpAttemptStateVersion = currentAppliedState.version();
+ assert Strings.isNotEmpty(currentAppliedState.getClusterName().value()) : "cluster name is not set";
+ assert Strings.isNotEmpty(currentAppliedState.metadata().clusterUUID()) : "cluster uuid is not set";
+ if (cleanUpAttemptStateVersion - lastCleanupAttemptStateVersion > SKIP_CLEANUP_STATE_CHANGES) {
+ logger.info(
+ "Cleaning up stale remote state files for cluster [{}] with uuid [{}]. Last clean was done before {} updates",
+ currentAppliedState.getClusterName().value(),
+ currentAppliedState.metadata().clusterUUID(),
+ cleanUpAttemptStateVersion - lastCleanupAttemptStateVersion
+ );
+ this.deleteStaleClusterMetadata(
+ currentAppliedState.getClusterName().value(),
+ currentAppliedState.metadata().clusterUUID(),
+ RETAINED_MANIFESTS
+ );
+ lastCleanupAttemptStateVersion = cleanUpAttemptStateVersion;
+ } else {
+ logger.debug(
+ "Skipping cleanup of stale remote state files for cluster [{}] with uuid [{}]. Last clean was done before {} updates, which is less than threshold {}",
+ currentAppliedState.getClusterName().value(),
+ currentAppliedState.metadata().clusterUUID(),
+ cleanUpAttemptStateVersion - lastCleanupAttemptStateVersion,
+ SKIP_CLEANUP_STATE_CHANGES
+ );
+ }
+ } else {
+ logger.debug("Skipping cleanup task as local node is not elected Cluster Manager");
+ }
+ }
+
+ private void addStaleGlobalMetadataPath(String fileName, Set filesToKeep, Set staleGlobalMetadataPaths) {
+ if (!filesToKeep.contains(fileName)) {
+ String[] splitPath = fileName.split("/");
+ staleGlobalMetadataPaths.add(
+ new BlobPath().add(GLOBAL_METADATA_PATH_TOKEN).buildAsString() + GLOBAL_METADATA_FORMAT.blobName(
+ splitPath[splitPath.length - 1]
+ )
+ );
+ }
+ }
+
+ // visible for testing
+ void deleteClusterMetadata(
+ String clusterName,
+ String clusterUUID,
+ List activeManifestBlobMetadata,
+ List staleManifestBlobMetadata
+ ) {
+ try {
+ Set filesToKeep = new HashSet<>();
+ Set staleManifestPaths = new HashSet<>();
+ Set staleIndexMetadataPaths = new HashSet<>();
+ Set staleGlobalMetadataPaths = new HashSet<>();
+ activeManifestBlobMetadata.forEach(blobMetadata -> {
+ ClusterMetadataManifest clusterMetadataManifest = remoteClusterStateService.fetchRemoteClusterMetadataManifest(
+ clusterName,
+ clusterUUID,
+ blobMetadata.name()
+ );
+ clusterMetadataManifest.getIndices()
+ .forEach(uploadedIndexMetadata -> filesToKeep.add(uploadedIndexMetadata.getUploadedFilename()));
+ if (clusterMetadataManifest.getCodecVersion() == ClusterMetadataManifest.CODEC_V1) {
+ filesToKeep.add(clusterMetadataManifest.getGlobalMetadataFileName());
+ } else if (clusterMetadataManifest.getCodecVersion() >= ClusterMetadataManifest.CODEC_V2) {
+ filesToKeep.add(clusterMetadataManifest.getCoordinationMetadata().getUploadedFilename());
+ filesToKeep.add(clusterMetadataManifest.getSettingsMetadata().getUploadedFilename());
+ filesToKeep.add(clusterMetadataManifest.getTemplatesMetadata().getUploadedFilename());
+ clusterMetadataManifest.getCustomMetadataMap()
+ .values()
+ .forEach(attribute -> filesToKeep.add(attribute.getUploadedFilename()));
+ }
+ });
+ staleManifestBlobMetadata.forEach(blobMetadata -> {
+ ClusterMetadataManifest clusterMetadataManifest = remoteClusterStateService.fetchRemoteClusterMetadataManifest(
+ clusterName,
+ clusterUUID,
+ blobMetadata.name()
+ );
+ staleManifestPaths.add(new BlobPath().add(MANIFEST_PATH_TOKEN).buildAsString() + blobMetadata.name());
+ if (clusterMetadataManifest.getCodecVersion() == ClusterMetadataManifest.CODEC_V1) {
+ addStaleGlobalMetadataPath(clusterMetadataManifest.getGlobalMetadataFileName(), filesToKeep, staleGlobalMetadataPaths);
+ } else if (clusterMetadataManifest.getCodecVersion() >= ClusterMetadataManifest.CODEC_V2) {
+ addStaleGlobalMetadataPath(
+ clusterMetadataManifest.getCoordinationMetadata().getUploadedFilename(),
+ filesToKeep,
+ staleGlobalMetadataPaths
+ );
+ addStaleGlobalMetadataPath(
+ clusterMetadataManifest.getSettingsMetadata().getUploadedFilename(),
+ filesToKeep,
+ staleGlobalMetadataPaths
+ );
+ addStaleGlobalMetadataPath(
+ clusterMetadataManifest.getTemplatesMetadata().getUploadedFilename(),
+ filesToKeep,
+ staleGlobalMetadataPaths
+ );
+ clusterMetadataManifest.getCustomMetadataMap()
+ .values()
+ .forEach(
+ attribute -> addStaleGlobalMetadataPath(attribute.getUploadedFilename(), filesToKeep, staleGlobalMetadataPaths)
+ );
+ }
+
+ clusterMetadataManifest.getIndices().forEach(uploadedIndexMetadata -> {
+ if (filesToKeep.contains(uploadedIndexMetadata.getUploadedFilename()) == false) {
+ staleIndexMetadataPaths.add(
+ new BlobPath().add(INDEX_PATH_TOKEN).add(uploadedIndexMetadata.getIndexUUID()).buildAsString()
+ + INDEX_METADATA_FORMAT.blobName(uploadedIndexMetadata.getUploadedFilename())
+ );
+ }
+ });
+ });
+
+ if (staleManifestPaths.isEmpty()) {
+ logger.debug("No stale Remote Cluster Metadata files found");
+ return;
+ }
+
+ deleteStalePaths(clusterName, clusterUUID, new ArrayList<>(staleGlobalMetadataPaths));
+ deleteStalePaths(clusterName, clusterUUID, new ArrayList<>(staleIndexMetadataPaths));
+ deleteStalePaths(clusterName, clusterUUID, new ArrayList<>(staleManifestPaths));
+ } catch (IllegalStateException e) {
+ logger.error("Error while fetching Remote Cluster Metadata manifests", e);
+ } catch (IOException e) {
+ logger.error("Error while deleting stale Remote Cluster Metadata files", e);
+ remoteStateStats.cleanUpAttemptFailed();
+ } catch (Exception e) {
+ logger.error("Unexpected error while deleting stale Remote Cluster Metadata files", e);
+ remoteStateStats.cleanUpAttemptFailed();
+ }
+ }
+
+ /**
+ * Deletes older than last {@code versionsToRetain} manifests. Also cleans up unreferenced IndexMetadata associated with older manifests
+ *
+ * @param clusterName name of the cluster
+ * @param clusterUUID uuid of cluster state to refer to in remote
+ * @param manifestsToRetain no of latest manifest files to keep in remote
+ */
+ // package private for testing
+ void deleteStaleClusterMetadata(String clusterName, String clusterUUID, int manifestsToRetain) {
+ if (deleteStaleMetadataRunning.compareAndSet(false, true) == false) {
+ logger.info("Delete stale cluster metadata task is already in progress.");
+ return;
+ }
+ try {
+ getBlobStoreTransferService().listAllInSortedOrderAsync(
+ ThreadPool.Names.REMOTE_PURGE,
+ remoteClusterStateService.getManifestFolderPath(clusterName, clusterUUID),
+ MANIFEST_FILE_PREFIX,
+ Integer.MAX_VALUE,
+ new ActionListener<>() {
+ @Override
+ public void onResponse(List blobMetadata) {
+ if (blobMetadata.size() > manifestsToRetain) {
+ deleteClusterMetadata(
+ clusterName,
+ clusterUUID,
+ blobMetadata.subList(0, manifestsToRetain),
+ blobMetadata.subList(manifestsToRetain, blobMetadata.size())
+ );
+ }
+ deleteStaleMetadataRunning.set(false);
+ }
+
+ @Override
+ public void onFailure(Exception e) {
+ logger.error(
+ new ParameterizedMessage(
+ "Exception occurred while deleting Remote Cluster Metadata for clusterUUIDs {}",
+ clusterUUID
+ )
+ );
+ deleteStaleMetadataRunning.set(false);
+ }
+ }
+ );
+ } catch (Exception e) {
+ deleteStaleMetadataRunning.set(false);
+ throw e;
+ }
+ }
+
+ /**
+ * Purges all remote cluster state against provided cluster UUIDs
+ *
+ * @param clusterName name of the cluster
+ * @param clusterUUIDs clusteUUIDs for which the remote state needs to be purged
+ */
+ void deleteStaleUUIDsClusterMetadata(String clusterName, List clusterUUIDs) {
+ clusterUUIDs.forEach(
+ clusterUUID -> getBlobStoreTransferService().deleteAsync(
+ ThreadPool.Names.REMOTE_PURGE,
+ remoteClusterStateService.getCusterMetadataBasePath(clusterName, clusterUUID),
+ new ActionListener<>() {
+ @Override
+ public void onResponse(Void unused) {
+ logger.info("Deleted all remote cluster metadata for cluster UUID - {}", clusterUUID);
+ }
+
+ @Override
+ public void onFailure(Exception e) {
+ logger.error(
+ new ParameterizedMessage(
+ "Exception occurred while deleting all remote cluster metadata for cluster UUID {}",
+ clusterUUID
+ ),
+ e
+ );
+ remoteStateStats.cleanUpAttemptFailed();
+ }
+ }
+ )
+ );
+ }
+
+ // package private for testing
+ void deleteStalePaths(String clusterName, String clusterUUID, List stalePaths) throws IOException {
+ logger.debug(String.format(Locale.ROOT, "Deleting stale files from remote - %s", stalePaths));
+ getBlobStoreTransferService().deleteBlobs(
+ remoteClusterStateService.getCusterMetadataBasePath(clusterName, clusterUUID),
+ stalePaths
+ );
+ }
+
+ /**
+ * Purges all remote cluster state against provided cluster UUIDs
+ * @param clusterState current state of the cluster
+ * @param committedManifest last committed ClusterMetadataManifest
+ */
+ public void deleteStaleClusterUUIDs(ClusterState clusterState, ClusterMetadataManifest committedManifest) {
+ threadpool.executor(ThreadPool.Names.REMOTE_PURGE).execute(() -> {
+ String clusterName = clusterState.getClusterName().value();
+ logger.debug("Deleting stale cluster UUIDs data from remote [{}]", clusterName);
+ Set allClustersUUIDsInRemote;
+ try {
+ allClustersUUIDsInRemote = new HashSet<>(
+ remoteClusterStateService.getAllClusterUUIDs(clusterState.getClusterName().value())
+ );
+ } catch (IOException e) {
+ logger.info(String.format(Locale.ROOT, "Error while fetching all cluster UUIDs for [%s]", clusterName));
+ return;
+ }
+ // Retain last 2 cluster uuids data
+ allClustersUUIDsInRemote.remove(committedManifest.getClusterUUID());
+ allClustersUUIDsInRemote.remove(committedManifest.getPreviousClusterUUID());
+ deleteStaleUUIDsClusterMetadata(clusterName, new ArrayList<>(allClustersUUIDsInRemote));
+ });
+ }
+
+ public TimeValue getStaleFileCleanupInterval() {
+ return this.staleFileCleanupInterval;
+ }
+
+ AsyncStaleFileDeletion getStaleFileDeletionTask() { // for testing
+ return this.staleFileDeletionTask;
+ }
+
+ RemotePersistenceStats getStats() {
+ return this.remoteStateStats;
+ }
+
+ static final class AsyncStaleFileDeletion extends AbstractAsyncTask {
+ private final RemoteClusterStateCleanupManager remoteClusterStateCleanupManager;
+
+ AsyncStaleFileDeletion(RemoteClusterStateCleanupManager remoteClusterStateCleanupManager) {
+ super(
+ logger,
+ remoteClusterStateCleanupManager.threadpool,
+ remoteClusterStateCleanupManager.getStaleFileCleanupInterval(),
+ true
+ );
+ this.remoteClusterStateCleanupManager = remoteClusterStateCleanupManager;
+ rescheduleIfNecessary();
+ }
+
+ @Override
+ protected boolean mustReschedule() {
+ return true;
+ }
+
+ @Override
+ protected void runInternal() {
+ remoteClusterStateCleanupManager.cleanUpStaleFiles();
+ }
+
+ @Override
+ protected String getThreadPool() {
+ return ThreadPool.Names.REMOTE_PURGE;
+ }
+ }
+}
diff --git a/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java
index 0f862d1b68820..d0593dcd51475 100644
--- a/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java
+++ b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java
@@ -18,11 +18,14 @@
import org.opensearch.cluster.metadata.IndexMetadata;
import org.opensearch.cluster.metadata.Metadata;
import org.opensearch.cluster.metadata.TemplatesMetadata;
+import org.opensearch.cluster.routing.remote.RemoteRoutingTableService;
+import org.opensearch.cluster.service.ClusterService;
import org.opensearch.common.CheckedRunnable;
import org.opensearch.common.Nullable;
import org.opensearch.common.blobstore.BlobContainer;
import org.opensearch.common.blobstore.BlobMetadata;
import org.opensearch.common.blobstore.BlobPath;
+import org.opensearch.common.blobstore.BlobStore;
import org.opensearch.common.settings.ClusterSettings;
import org.opensearch.common.settings.Setting;
import org.opensearch.common.settings.Setting.Property;
@@ -59,7 +62,6 @@
import java.util.Set;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;
-import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicReference;
import java.util.function.Function;
import java.util.function.LongSupplier;
@@ -68,6 +70,7 @@
import static java.util.Objects.requireNonNull;
import static org.opensearch.gateway.PersistedClusterStateService.SLOW_WRITE_LOGGING_THRESHOLD;
+import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.isRemoteRoutingTableEnabled;
import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.isRemoteStoreClusterStateEnabled;
/**
@@ -81,8 +84,6 @@ public class RemoteClusterStateService implements Closeable {
public static final String METADATA_MANIFEST_NAME_FORMAT = "%s";
- public static final int RETAINED_MANIFESTS = 10;
-
public static final String DELIMITER = "__";
public static final String CUSTOM_DELIMITER = "--";
@@ -202,20 +203,20 @@ public class RemoteClusterStateService implements Closeable {
private final List indexMetadataUploadListeners;
private BlobStoreRepository blobStoreRepository;
private BlobStoreTransferService blobStoreTransferService;
+ private Optional remoteRoutingTableService;
private volatile TimeValue slowWriteLoggingThreshold;
private volatile TimeValue indexMetadataUploadTimeout;
private volatile TimeValue globalMetadataUploadTimeout;
private volatile TimeValue metadataManifestUploadTimeout;
-
- private final AtomicBoolean deleteStaleMetadataRunning = new AtomicBoolean(false);
+ private RemoteClusterStateCleanupManager remoteClusterStateCleanupManager;
private final RemotePersistenceStats remoteStateStats;
private final String CLUSTER_STATE_UPLOAD_TIME_LOG_STRING = "writing cluster state for version [{}] took [{}ms]";
private final String METADATA_UPDATE_LOG_STRING = "wrote metadata for [{}] indices and skipped [{}] unchanged "
+ "indices, coordination metadata updated : [{}], settings metadata updated : [{}], templates metadata "
+ "updated : [{}], custom metadata updated : [{}]";
public static final int INDEX_METADATA_CURRENT_CODEC_VERSION = 1;
- public static final int MANIFEST_CURRENT_CODEC_VERSION = ClusterMetadataManifest.CODEC_V2;
+ public static final int MANIFEST_CURRENT_CODEC_VERSION = ClusterMetadataManifest.CODEC_V3;
public static final int GLOBAL_METADATA_CURRENT_CODEC_VERSION = 2;
// ToXContent Params with gateway mode.
@@ -232,7 +233,7 @@ public RemoteClusterStateService(
String nodeId,
Supplier repositoriesService,
Settings settings,
- ClusterSettings clusterSettings,
+ ClusterService clusterService,
LongSupplier relativeTimeNanosSupplier,
ThreadPool threadPool,
List indexMetadataUploadListeners
@@ -243,6 +244,7 @@ public RemoteClusterStateService(
this.settings = settings;
this.relativeTimeNanosSupplier = relativeTimeNanosSupplier;
this.threadpool = threadPool;
+ ClusterSettings clusterSettings = clusterService.getClusterSettings();
this.slowWriteLoggingThreshold = clusterSettings.get(SLOW_WRITE_LOGGING_THRESHOLD);
this.indexMetadataUploadTimeout = clusterSettings.get(INDEX_METADATA_UPLOAD_TIMEOUT_SETTING);
this.globalMetadataUploadTimeout = clusterSettings.get(GLOBAL_METADATA_UPLOAD_TIMEOUT_SETTING);
@@ -252,14 +254,11 @@ public RemoteClusterStateService(
clusterSettings.addSettingsUpdateConsumer(GLOBAL_METADATA_UPLOAD_TIMEOUT_SETTING, this::setGlobalMetadataUploadTimeout);
clusterSettings.addSettingsUpdateConsumer(METADATA_MANIFEST_UPLOAD_TIMEOUT_SETTING, this::setMetadataManifestUploadTimeout);
this.remoteStateStats = new RemotePersistenceStats();
+ this.remoteClusterStateCleanupManager = new RemoteClusterStateCleanupManager(this, clusterService);
this.indexMetadataUploadListeners = indexMetadataUploadListeners;
- }
-
- private BlobStoreTransferService getBlobStoreTransferService() {
- if (blobStoreTransferService == null) {
- blobStoreTransferService = new BlobStoreTransferService(blobStoreRepository.blobStore(), threadpool);
- }
- return blobStoreTransferService;
+ this.remoteRoutingTableService = isRemoteRoutingTableEnabled(settings)
+ ? Optional.of(new RemoteRoutingTableService(repositoriesService, settings))
+ : Optional.empty();
}
/**
@@ -417,7 +416,6 @@ public ClusterMetadataManifest writeIncrementalMetadata(
: previousManifest.getCustomMetadataMap(),
false
);
- deleteStaleClusterMetadata(clusterState.getClusterName().value(), clusterState.metadata().clusterUUID(), RETAINED_MANIFESTS);
final long durationMillis = TimeValue.nsecToMSec(relativeTimeNanosSupplier.getAsLong() - startTimeNanos);
remoteStateStats.stateSucceeded();
@@ -721,6 +719,10 @@ private CheckedRunnable getAsyncMetadataWriteAction(
);
}
+ public RemoteClusterStateCleanupManager getCleanupManager() {
+ return remoteClusterStateCleanupManager;
+ }
+
@Nullable
public ClusterMetadataManifest markLastStateAsCommitted(ClusterState clusterState, ClusterMetadataManifest previousManifest)
throws IOException {
@@ -740,15 +742,22 @@ public ClusterMetadataManifest markLastStateAsCommitted(ClusterState clusterStat
previousManifest.getCustomMetadataMap(),
true
);
- deleteStaleClusterUUIDs(clusterState, committedManifest);
+ if (!previousManifest.isClusterUUIDCommitted() && committedManifest.isClusterUUIDCommitted()) {
+ remoteClusterStateCleanupManager.deleteStaleClusterUUIDs(clusterState, committedManifest);
+ }
+
return committedManifest;
}
@Override
public void close() throws IOException {
+ remoteClusterStateCleanupManager.close();
if (blobStoreRepository != null) {
IOUtils.close(blobStoreRepository);
}
+ if (this.remoteRoutingTableService.isPresent()) {
+ this.remoteRoutingTableService.get().close();
+ }
}
public void start() {
@@ -760,6 +769,8 @@ public void start() {
final Repository repository = repositoriesService.get().repository(remoteStoreRepo);
assert repository instanceof BlobStoreRepository : "Repository should be instance of BlobStoreRepository";
blobStoreRepository = (BlobStoreRepository) repository;
+ remoteClusterStateCleanupManager.start();
+ this.remoteRoutingTableService.ifPresent(RemoteRoutingTableService::start);
}
private ClusterMetadataManifest uploadManifest(
@@ -795,7 +806,10 @@ private ClusterMetadataManifest uploadManifest(
uploadedCoordinationMetadata,
uploadedSettingsMetadata,
uploadedTemplatesMetadata,
- uploadedCustomMetadataMap
+ uploadedCustomMetadataMap,
+ clusterState.routingTable().version(),
+ // TODO: Add actual list of changed indices routing with index routing upload flow.
+ new ArrayList<>()
);
writeMetadataManifest(clusterState.getClusterName().value(), clusterState.metadata().clusterUUID(), manifest, manifestFileName);
return manifest;
@@ -850,6 +864,14 @@ private void writeMetadataManifest(String clusterName, String clusterUUID, Clust
);
}
+ ThreadPool getThreadpool() {
+ return threadpool;
+ }
+
+ BlobStore getBlobStore() {
+ return blobStoreRepository.blobStore();
+ }
+
private BlobContainer indexMetadataContainer(String clusterName, String clusterUUID, String indexUUID) {
// 123456789012_test-cluster/cluster-state/dsgYj10Nkso7/index/ftqsCnn9TgOX
return blobStoreRepository.blobStore()
@@ -867,7 +889,7 @@ private BlobContainer manifestContainer(String clusterName, String clusterUUID)
return blobStoreRepository.blobStore().blobContainer(getManifestFolderPath(clusterName, clusterUUID));
}
- private BlobPath getCusterMetadataBasePath(String clusterName, String clusterUUID) {
+ BlobPath getCusterMetadataBasePath(String clusterName, String clusterUUID) {
return blobStoreRepository.basePath().add(encodeString(clusterName)).add(CLUSTER_STATE_PATH_TOKEN).add(clusterUUID);
}
@@ -933,6 +955,11 @@ public TimeValue getMetadataManifestUploadTimeout() {
return this.metadataManifestUploadTimeout;
}
+ // Package private for unit test
+ Optional getRemoteRoutingTableService() {
+ return this.remoteRoutingTableService;
+ }
+
static String getManifestFileName(long term, long version, boolean committed, int codecVersion) {
// 123456789012_test-cluster/cluster-state/dsgYj10Nkso7/manifest/manifest______C/P____
return String.join(
@@ -982,7 +1009,7 @@ private static String metadataAttributeFileName(String componentPrefix, Long met
);
}
- private BlobPath getManifestFolderPath(String clusterName, String clusterUUID) {
+ BlobPath getManifestFolderPath(String clusterName, String clusterUUID) {
return getCusterMetadataBasePath(clusterName, clusterUUID).add(MANIFEST_PATH_TOKEN);
}
@@ -1235,7 +1262,7 @@ public String getLastKnownUUIDFromRemote(String clusterName) {
}
}
- private Set getAllClusterUUIDs(String clusterName) throws IOException {
+ Set getAllClusterUUIDs(String clusterName) throws IOException {
Map clusterUUIDMetadata = clusterUUIDContainer(clusterName).children();
if (clusterUUIDMetadata == null) {
return Collections.emptySet();
@@ -1426,7 +1453,7 @@ private Optional getLatestManifestFileName(String clusterName, String cl
* @param clusterName name of the cluster
* @return ClusterMetadataManifest
*/
- private ClusterMetadataManifest fetchRemoteClusterMetadataManifest(String clusterName, String clusterUUID, String filename)
+ ClusterMetadataManifest fetchRemoteClusterMetadataManifest(String clusterName, String clusterUUID, String filename)
throws IllegalStateException {
try {
return getClusterMetadataManifestBlobStoreFormat(filename).read(
@@ -1486,234 +1513,6 @@ public RemoteStateTransferException(String errorDesc, Throwable cause) {
}
}
- /**
- * Purges all remote cluster state against provided cluster UUIDs
- *
- * @param clusterName name of the cluster
- * @param clusterUUIDs clusteUUIDs for which the remote state needs to be purged
- */
- void deleteStaleUUIDsClusterMetadata(String clusterName, List clusterUUIDs) {
- clusterUUIDs.forEach(clusterUUID -> {
- getBlobStoreTransferService().deleteAsync(
- ThreadPool.Names.REMOTE_PURGE,
- getCusterMetadataBasePath(clusterName, clusterUUID),
- new ActionListener<>() {
- @Override
- public void onResponse(Void unused) {
- logger.info("Deleted all remote cluster metadata for cluster UUID - {}", clusterUUID);
- }
-
- @Override
- public void onFailure(Exception e) {
- logger.error(
- new ParameterizedMessage(
- "Exception occurred while deleting all remote cluster metadata for cluster UUID {}",
- clusterUUID
- ),
- e
- );
- remoteStateStats.cleanUpAttemptFailed();
- }
- }
- );
- });
- }
-
- /**
- * Deletes older than last {@code versionsToRetain} manifests. Also cleans up unreferenced IndexMetadata associated with older manifests
- *
- * @param clusterName name of the cluster
- * @param clusterUUID uuid of cluster state to refer to in remote
- * @param manifestsToRetain no of latest manifest files to keep in remote
- */
- // package private for testing
- void deleteStaleClusterMetadata(String clusterName, String clusterUUID, int manifestsToRetain) {
- if (deleteStaleMetadataRunning.compareAndSet(false, true) == false) {
- logger.info("Delete stale cluster metadata task is already in progress.");
- return;
- }
- try {
- getBlobStoreTransferService().listAllInSortedOrderAsync(
- ThreadPool.Names.REMOTE_PURGE,
- getManifestFolderPath(clusterName, clusterUUID),
- "manifest",
- Integer.MAX_VALUE,
- new ActionListener<>() {
- @Override
- public void onResponse(List blobMetadata) {
- if (blobMetadata.size() > manifestsToRetain) {
- deleteClusterMetadata(
- clusterName,
- clusterUUID,
- blobMetadata.subList(0, manifestsToRetain - 1),
- blobMetadata.subList(manifestsToRetain - 1, blobMetadata.size())
- );
- }
- deleteStaleMetadataRunning.set(false);
- }
-
- @Override
- public void onFailure(Exception e) {
- logger.error(
- new ParameterizedMessage(
- "Exception occurred while deleting Remote Cluster Metadata for clusterUUIDs {}",
- clusterUUID
- )
- );
- deleteStaleMetadataRunning.set(false);
- }
- }
- );
- } catch (Exception e) {
- deleteStaleMetadataRunning.set(false);
- throw e;
- }
- }
-
- private void deleteClusterMetadata(
- String clusterName,
- String clusterUUID,
- List activeManifestBlobMetadata,
- List staleManifestBlobMetadata
- ) {
- try {
- Set filesToKeep = new HashSet<>();
- Set staleManifestPaths = new HashSet<>();
- Set staleIndexMetadataPaths = new HashSet<>();
- Set staleGlobalMetadataPaths = new HashSet<>();
- activeManifestBlobMetadata.forEach(blobMetadata -> {
- ClusterMetadataManifest clusterMetadataManifest = fetchRemoteClusterMetadataManifest(
- clusterName,
- clusterUUID,
- blobMetadata.name()
- );
- clusterMetadataManifest.getIndices()
- .forEach(uploadedIndexMetadata -> filesToKeep.add(uploadedIndexMetadata.getUploadedFilename()));
- if (clusterMetadataManifest.getGlobalMetadataFileName() != null) {
- filesToKeep.add(clusterMetadataManifest.getGlobalMetadataFileName());
- } else {
- filesToKeep.add(clusterMetadataManifest.getCoordinationMetadata().getUploadedFilename());
- filesToKeep.add(clusterMetadataManifest.getTemplatesMetadata().getUploadedFilename());
- filesToKeep.add(clusterMetadataManifest.getSettingsMetadata().getUploadedFilename());
- clusterMetadataManifest.getCustomMetadataMap()
- .forEach((key, value) -> { filesToKeep.add(value.getUploadedFilename()); });
- }
- });
- staleManifestBlobMetadata.forEach(blobMetadata -> {
- ClusterMetadataManifest clusterMetadataManifest = fetchRemoteClusterMetadataManifest(
- clusterName,
- clusterUUID,
- blobMetadata.name()
- );
- staleManifestPaths.add(new BlobPath().add(MANIFEST_PATH_TOKEN).buildAsString() + blobMetadata.name());
- if (clusterMetadataManifest.getGlobalMetadataFileName() != null) {
- if (filesToKeep.contains(clusterMetadataManifest.getGlobalMetadataFileName()) == false) {
- String[] globalMetadataSplitPath = clusterMetadataManifest.getGlobalMetadataFileName().split("/");
- staleGlobalMetadataPaths.add(
- new BlobPath().add(GLOBAL_METADATA_PATH_TOKEN).buildAsString() + GLOBAL_METADATA_FORMAT.blobName(
- globalMetadataSplitPath[globalMetadataSplitPath.length - 1]
- )
- );
- }
- } else {
- if (filesToKeep.contains(clusterMetadataManifest.getCoordinationMetadata().getUploadedFilename()) == false) {
- String[] coordinationMetadataSplitPath = clusterMetadataManifest.getCoordinationMetadata()
- .getUploadedFilename()
- .split("/");
- staleGlobalMetadataPaths.add(
- new BlobPath().add(GLOBAL_METADATA_PATH_TOKEN).buildAsString() + GLOBAL_METADATA_FORMAT.blobName(
- coordinationMetadataSplitPath[coordinationMetadataSplitPath.length - 1]
- )
- );
- }
- if (filesToKeep.contains(clusterMetadataManifest.getTemplatesMetadata().getUploadedFilename()) == false) {
- String[] templatesMetadataSplitPath = clusterMetadataManifest.getTemplatesMetadata()
- .getUploadedFilename()
- .split("/");
- staleGlobalMetadataPaths.add(
- new BlobPath().add(GLOBAL_METADATA_PATH_TOKEN).buildAsString() + GLOBAL_METADATA_FORMAT.blobName(
- templatesMetadataSplitPath[templatesMetadataSplitPath.length - 1]
- )
- );
- }
- if (filesToKeep.contains(clusterMetadataManifest.getSettingsMetadata().getUploadedFilename()) == false) {
- String[] settingsMetadataSplitPath = clusterMetadataManifest.getSettingsMetadata().getUploadedFilename().split("/");
- staleGlobalMetadataPaths.add(
- new BlobPath().add(GLOBAL_METADATA_PATH_TOKEN).buildAsString() + GLOBAL_METADATA_FORMAT.blobName(
- settingsMetadataSplitPath[settingsMetadataSplitPath.length - 1]
- )
- );
- }
- clusterMetadataManifest.getCustomMetadataMap().forEach((key, value) -> {
- if (filesToKeep.contains(value.getUploadedFilename()) == false) {
- String[] customMetadataSplitPath = value.getUploadedFilename().split("/");
- staleGlobalMetadataPaths.add(
- new BlobPath().add(GLOBAL_METADATA_PATH_TOKEN).buildAsString() + GLOBAL_METADATA_FORMAT.blobName(
- customMetadataSplitPath[customMetadataSplitPath.length - 1]
- )
- );
- }
- });
- }
-
- clusterMetadataManifest.getIndices().forEach(uploadedIndexMetadata -> {
- if (filesToKeep.contains(uploadedIndexMetadata.getUploadedFilename()) == false) {
- staleIndexMetadataPaths.add(
- new BlobPath().add(INDEX_PATH_TOKEN).add(uploadedIndexMetadata.getIndexUUID()).buildAsString()
- + INDEX_METADATA_FORMAT.blobName(uploadedIndexMetadata.getUploadedFilename())
- );
- }
- });
- });
-
- if (staleManifestPaths.isEmpty()) {
- logger.debug("No stale Remote Cluster Metadata files found");
- return;
- }
-
- deleteStalePaths(clusterName, clusterUUID, new ArrayList<>(staleGlobalMetadataPaths));
- deleteStalePaths(clusterName, clusterUUID, new ArrayList<>(staleIndexMetadataPaths));
- deleteStalePaths(clusterName, clusterUUID, new ArrayList<>(staleManifestPaths));
- } catch (IllegalStateException e) {
- logger.error("Error while fetching Remote Cluster Metadata manifests", e);
- } catch (IOException e) {
- logger.error("Error while deleting stale Remote Cluster Metadata files", e);
- remoteStateStats.cleanUpAttemptFailed();
- } catch (Exception e) {
- logger.error("Unexpected error while deleting stale Remote Cluster Metadata files", e);
- remoteStateStats.cleanUpAttemptFailed();
- }
- }
-
- private void deleteStalePaths(String clusterName, String clusterUUID, List stalePaths) throws IOException {
- logger.debug(String.format(Locale.ROOT, "Deleting stale files from remote - %s", stalePaths));
- getBlobStoreTransferService().deleteBlobs(getCusterMetadataBasePath(clusterName, clusterUUID), stalePaths);
- }
-
- /**
- * Purges all remote cluster state against provided cluster UUIDs
- *
- * @param clusterState current state of the cluster
- * @param committedManifest last committed ClusterMetadataManifest
- */
- public void deleteStaleClusterUUIDs(ClusterState clusterState, ClusterMetadataManifest committedManifest) {
- threadpool.executor(ThreadPool.Names.REMOTE_PURGE).execute(() -> {
- String clusterName = clusterState.getClusterName().value();
- logger.debug("Deleting stale cluster UUIDs data from remote [{}]", clusterName);
- Set allClustersUUIDsInRemote;
- try {
- allClustersUUIDsInRemote = new HashSet<>(getAllClusterUUIDs(clusterState.getClusterName().value()));
- } catch (IOException e) {
- logger.info(String.format(Locale.ROOT, "Error while fetching all cluster UUIDs for [%s]", clusterName));
- return;
- }
- // Retain last 2 cluster uuids data
- allClustersUUIDsInRemote.remove(committedManifest.getClusterUUID());
- allClustersUUIDsInRemote.remove(committedManifest.getPreviousClusterUUID());
- deleteStaleUUIDsClusterMetadata(clusterName, new ArrayList<>(allClustersUUIDsInRemote));
- });
- }
-
public RemotePersistenceStats getStats() {
return remoteStateStats;
}
diff --git a/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateUtils.java b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateUtils.java
new file mode 100644
index 0000000000000..500d1af0211e8
--- /dev/null
+++ b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateUtils.java
@@ -0,0 +1,23 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.gateway.remote;
+
+import java.nio.charset.StandardCharsets;
+import java.util.Base64;
+
+/**
+ * Utility class for Remote Cluster State
+ */
+public class RemoteClusterStateUtils {
+ public static final String PATH_DELIMITER = "/";
+
+ public static String encodeString(String content) {
+ return Base64.getUrlEncoder().withoutPadding().encodeToString(content.getBytes(StandardCharsets.UTF_8));
+ }
+}
diff --git a/server/src/main/java/org/opensearch/gateway/remote/model/RemoteClusterStateBlobStore.java b/server/src/main/java/org/opensearch/gateway/remote/model/RemoteClusterStateBlobStore.java
new file mode 100644
index 0000000000000..1aeecc4e70382
--- /dev/null
+++ b/server/src/main/java/org/opensearch/gateway/remote/model/RemoteClusterStateBlobStore.java
@@ -0,0 +1,107 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.gateway.remote.model;
+
+import org.opensearch.common.blobstore.BlobPath;
+import org.opensearch.common.remote.AbstractRemoteWritableBlobEntity;
+import org.opensearch.common.remote.RemoteWritableEntityStore;
+import org.opensearch.common.remote.RemoteWriteableEntity;
+import org.opensearch.core.action.ActionListener;
+import org.opensearch.gateway.remote.RemoteClusterStateUtils;
+import org.opensearch.index.translog.transfer.BlobStoreTransferService;
+import org.opensearch.repositories.blobstore.BlobStoreRepository;
+import org.opensearch.threadpool.ThreadPool;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.concurrent.ExecutorService;
+
+/**
+ * Abstract class for a blob type storage
+ *
+ * @param The entity which can be uploaded to / downloaded from blob store
+ * @param The concrete class implementing {@link RemoteWriteableEntity} which is used as a wrapper for T entity.
+ */
+public class RemoteClusterStateBlobStore> implements RemoteWritableEntityStore {
+
+ private final BlobStoreTransferService transferService;
+ private final BlobStoreRepository blobStoreRepository;
+ private final String clusterName;
+ private final ExecutorService executorService;
+
+ public RemoteClusterStateBlobStore(
+ final BlobStoreTransferService blobStoreTransferService,
+ final BlobStoreRepository blobStoreRepository,
+ final String clusterName,
+ final ThreadPool threadPool,
+ final String executor
+ ) {
+ this.transferService = blobStoreTransferService;
+ this.blobStoreRepository = blobStoreRepository;
+ this.clusterName = clusterName;
+ this.executorService = threadPool.executor(executor);
+ }
+
+ @Override
+ public void writeAsync(final U entity, final ActionListener listener) {
+ try {
+ try (InputStream inputStream = entity.serialize()) {
+ BlobPath blobPath = getBlobPathForUpload(entity);
+ entity.setFullBlobName(blobPath);
+ // TODO uncomment below logic after merging PR https://github.com/opensearch-project/OpenSearch/pull/13836
+ // transferService.uploadBlob(inputStream, getBlobPathForUpload(entity), entity.getBlobFileName(), WritePriority.URGENT,
+ // listener);
+ }
+ } catch (Exception e) {
+ listener.onFailure(e);
+ }
+ }
+
+ public T read(final U entity) throws IOException {
+ // TODO Add timing logs and tracing
+ assert entity.getFullBlobName() != null;
+ return entity.deserialize(transferService.downloadBlob(getBlobPathForDownload(entity), entity.getBlobFileName()));
+ }
+
+ @Override
+ public void readAsync(final U entity, final ActionListener listener) {
+ executorService.execute(() -> {
+ try {
+ listener.onResponse(read(entity));
+ } catch (Exception e) {
+ listener.onFailure(e);
+ }
+ });
+ }
+
+ private BlobPath getBlobPathForUpload(final AbstractRemoteWritableBlobEntity obj) {
+ BlobPath blobPath = blobStoreRepository.basePath()
+ .add(RemoteClusterStateUtils.encodeString(clusterName))
+ .add("cluster-state")
+ .add(obj.clusterUUID());
+ for (String token : obj.getBlobPathParameters().getPathTokens()) {
+ blobPath = blobPath.add(token);
+ }
+ return blobPath;
+ }
+
+ private BlobPath getBlobPathForDownload(final AbstractRemoteWritableBlobEntity obj) {
+ String[] pathTokens = obj.getBlobPathTokens();
+ BlobPath blobPath = new BlobPath();
+ if (pathTokens == null || pathTokens.length < 1) {
+ return blobPath;
+ }
+ // Iterate till second last path token to get the blob folder
+ for (int i = 0; i < pathTokens.length - 1; i++) {
+ blobPath = blobPath.add(pathTokens[i]);
+ }
+ return blobPath;
+ }
+
+}
diff --git a/server/src/main/java/org/opensearch/gateway/remote/model/package-info.java b/server/src/main/java/org/opensearch/gateway/remote/model/package-info.java
new file mode 100644
index 0000000000000..c0d13d15cc885
--- /dev/null
+++ b/server/src/main/java/org/opensearch/gateway/remote/model/package-info.java
@@ -0,0 +1,12 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+/**
+ * Package containing models for remote cluster state
+ */
+package org.opensearch.gateway.remote.model;
diff --git a/server/src/main/java/org/opensearch/gateway/remote/routingtable/IndexRoutingTableHeader.java b/server/src/main/java/org/opensearch/gateway/remote/routingtable/IndexRoutingTableHeader.java
new file mode 100644
index 0000000000000..5baea6adba0c7
--- /dev/null
+++ b/server/src/main/java/org/opensearch/gateway/remote/routingtable/IndexRoutingTableHeader.java
@@ -0,0 +1,81 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.gateway.remote.routingtable;
+
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.IndexFormatTooNewException;
+import org.apache.lucene.index.IndexFormatTooOldException;
+import org.apache.lucene.store.InputStreamDataInput;
+import org.apache.lucene.store.OutputStreamDataOutput;
+import org.opensearch.core.common.io.stream.StreamInput;
+import org.opensearch.core.common.io.stream.StreamOutput;
+import org.opensearch.core.common.io.stream.Writeable;
+
+import java.io.EOFException;
+import java.io.IOException;
+
+/**
+ * The stored header information for the individual index routing table
+ */
+public class IndexRoutingTableHeader implements Writeable {
+
+ public static final String INDEX_ROUTING_HEADER_CODEC = "index_routing_header_codec";
+ public static final int INITIAL_VERSION = 1;
+ public static final int CURRENT_VERSION = INITIAL_VERSION;
+ private final String indexName;
+
+ public IndexRoutingTableHeader(String indexName) {
+ this.indexName = indexName;
+ }
+
+ /**
+ * Reads the contents on the stream into the corresponding {@link IndexRoutingTableHeader}
+ *
+ * @param in streamInput
+ * @throws IOException exception thrown on failing to read from stream.
+ */
+ public IndexRoutingTableHeader(StreamInput in) throws IOException {
+ try {
+ readHeaderVersion(in);
+ indexName = in.readString();
+ } catch (EOFException e) {
+ throw new IOException("index routing header truncated", e);
+ }
+ }
+
+ private void readHeaderVersion(final StreamInput in) throws IOException {
+ try {
+ CodecUtil.checkHeader(new InputStreamDataInput(in), INDEX_ROUTING_HEADER_CODEC, INITIAL_VERSION, CURRENT_VERSION);
+ } catch (CorruptIndexException | IndexFormatTooOldException | IndexFormatTooNewException e) {
+ throw new IOException("index routing table header corrupted", e);
+ }
+ }
+
+ /**
+ * Write the IndexRoutingTable to given stream.
+ *
+ * @param out stream to write
+ * @throws IOException exception thrown on failing to write to stream.
+ */
+ public void writeTo(StreamOutput out) throws IOException {
+ try {
+ CodecUtil.writeHeader(new OutputStreamDataOutput(out), INDEX_ROUTING_HEADER_CODEC, CURRENT_VERSION);
+ out.writeString(indexName);
+ out.flush();
+ } catch (IOException e) {
+ throw new IOException("Failed to write IndexRoutingTable header", e);
+ }
+ }
+
+ public String getIndexName() {
+ return indexName;
+ }
+
+}
diff --git a/server/src/main/java/org/opensearch/gateway/remote/routingtable/RemoteIndexRoutingTable.java b/server/src/main/java/org/opensearch/gateway/remote/routingtable/RemoteIndexRoutingTable.java
new file mode 100644
index 0000000000000..17c55190da07f
--- /dev/null
+++ b/server/src/main/java/org/opensearch/gateway/remote/routingtable/RemoteIndexRoutingTable.java
@@ -0,0 +1,100 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.gateway.remote.routingtable;
+
+import org.opensearch.cluster.routing.IndexRoutingTable;
+import org.opensearch.cluster.routing.IndexShardRoutingTable;
+import org.opensearch.core.common.io.stream.BufferedChecksumStreamInput;
+import org.opensearch.core.common.io.stream.BufferedChecksumStreamOutput;
+import org.opensearch.core.common.io.stream.InputStreamStreamInput;
+import org.opensearch.core.common.io.stream.StreamOutput;
+import org.opensearch.core.common.io.stream.Writeable;
+import org.opensearch.core.index.Index;
+
+import java.io.EOFException;
+import java.io.IOException;
+import java.io.InputStream;
+
+/**
+ * Remote store object for IndexRoutingTable
+ */
+public class RemoteIndexRoutingTable implements Writeable {
+
+ private final IndexRoutingTable indexRoutingTable;
+
+ public RemoteIndexRoutingTable(IndexRoutingTable indexRoutingTable) {
+ this.indexRoutingTable = indexRoutingTable;
+ }
+
+ /**
+ * Reads data from inputStream and creates RemoteIndexRoutingTable object with the {@link IndexRoutingTable}
+ * @param inputStream input stream with index routing data
+ * @param index index for the current routing data
+ * @throws IOException exception thrown on failing to read from stream.
+ */
+ public RemoteIndexRoutingTable(InputStream inputStream, Index index) throws IOException {
+ try {
+ try (BufferedChecksumStreamInput in = new BufferedChecksumStreamInput(new InputStreamStreamInput(inputStream), "assertion")) {
+ // Read the Table Header first and confirm the index
+ IndexRoutingTableHeader indexRoutingTableHeader = new IndexRoutingTableHeader(in);
+ assert indexRoutingTableHeader.getIndexName().equals(index.getName());
+
+ int numberOfShardRouting = in.readVInt();
+ IndexRoutingTable.Builder indicesRoutingTable = IndexRoutingTable.builder(index);
+ for (int idx = 0; idx < numberOfShardRouting; idx++) {
+ IndexShardRoutingTable indexShardRoutingTable = IndexShardRoutingTable.Builder.readFrom(in);
+ indicesRoutingTable.addIndexShard(indexShardRoutingTable);
+ }
+ verifyCheckSum(in);
+ indexRoutingTable = indicesRoutingTable.build();
+ }
+ } catch (EOFException e) {
+ throw new IOException("Indices Routing table is corrupted", e);
+ }
+ }
+
+ public IndexRoutingTable getIndexRoutingTable() {
+ return indexRoutingTable;
+ }
+
+ /**
+ * Writes {@link IndexRoutingTable} to the given stream
+ * @param streamOutput output stream to write
+ * @throws IOException exception thrown on failing to write to stream.
+ */
+ @Override
+ public void writeTo(StreamOutput streamOutput) throws IOException {
+ try {
+ BufferedChecksumStreamOutput out = new BufferedChecksumStreamOutput(streamOutput);
+ IndexRoutingTableHeader indexRoutingTableHeader = new IndexRoutingTableHeader(indexRoutingTable.getIndex().getName());
+ indexRoutingTableHeader.writeTo(out);
+ out.writeVInt(indexRoutingTable.shards().size());
+ for (IndexShardRoutingTable next : indexRoutingTable) {
+ IndexShardRoutingTable.Builder.writeTo(next, out);
+ }
+ out.writeLong(out.getChecksum());
+ out.flush();
+ } catch (IOException e) {
+ throw new IOException("Failed to write IndexRoutingTable to stream", e);
+ }
+ }
+
+ private void verifyCheckSum(BufferedChecksumStreamInput in) throws IOException {
+ long expectedChecksum = in.getChecksum();
+ long readChecksum = in.readLong();
+ if (readChecksum != expectedChecksum) {
+ throw new IOException(
+ "checksum verification failed - expected: 0x"
+ + Long.toHexString(expectedChecksum)
+ + ", got: 0x"
+ + Long.toHexString(readChecksum)
+ );
+ }
+ }
+}
diff --git a/server/src/main/java/org/opensearch/gateway/remote/routingtable/package-info.java b/server/src/main/java/org/opensearch/gateway/remote/routingtable/package-info.java
new file mode 100644
index 0000000000000..a6cb2251a5dd7
--- /dev/null
+++ b/server/src/main/java/org/opensearch/gateway/remote/routingtable/package-info.java
@@ -0,0 +1,12 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+/**
+ * Package containing class to perform operations on remote routing table.
+ */
+package org.opensearch.gateway.remote.routingtable;
diff --git a/server/src/main/java/org/opensearch/index/shard/RemoteStoreRefreshListener.java b/server/src/main/java/org/opensearch/index/shard/RemoteStoreRefreshListener.java
index 20afd7b2f3568..77556f8391473 100644
--- a/server/src/main/java/org/opensearch/index/shard/RemoteStoreRefreshListener.java
+++ b/server/src/main/java/org/opensearch/index/shard/RemoteStoreRefreshListener.java
@@ -170,6 +170,13 @@ protected boolean performAfterRefreshWithPermit(boolean didRefresh) {
* @return true if sync is needed
*/
private boolean shouldSync(boolean didRefresh, boolean skipPrimaryTermCheck) {
+ // Ignore syncing segments if the underlying shard is closed
+ // This also makes sure that retries are not scheduled for shards
+ // with failed syncSegments invocation after they are closed
+ if (shardClosed()) {
+ logger.info("Shard is already closed. Not attempting sync to remote store");
+ return false;
+ }
boolean shouldSync = didRefresh // If the readers change, didRefresh is always true.
// The third condition exists for uploading the zero state segments where the refresh has not changed the reader
// reference, but it is important to upload the zero state segments so that the restore does not break.
@@ -607,6 +614,15 @@ public void onFailure(String file) {
};
}
+ /**
+ * Checks if the underlying IndexShard instance is closed
+ *
+ * @return true if it is closed, false otherwise
+ */
+ private boolean shardClosed() {
+ return indexShard.state() == IndexShardState.CLOSED;
+ }
+
@Override
protected Logger getLogger() {
return logger;
diff --git a/server/src/main/java/org/opensearch/index/translog/BaseTranslogReader.java b/server/src/main/java/org/opensearch/index/translog/BaseTranslogReader.java
index d6fa2a2e53de3..37af1dcbeab8b 100644
--- a/server/src/main/java/org/opensearch/index/translog/BaseTranslogReader.java
+++ b/server/src/main/java/org/opensearch/index/translog/BaseTranslogReader.java
@@ -32,6 +32,7 @@
package org.opensearch.index.translog;
+import org.opensearch.core.common.io.stream.BufferedChecksumStreamInput;
import org.opensearch.core.common.io.stream.ByteBufferStreamInput;
import org.opensearch.index.seqno.SequenceNumbers;
diff --git a/server/src/main/java/org/opensearch/index/translog/RemoteFsTranslog.java b/server/src/main/java/org/opensearch/index/translog/RemoteFsTranslog.java
index 67549c86b7dd2..f29b6fba6537f 100644
--- a/server/src/main/java/org/opensearch/index/translog/RemoteFsTranslog.java
+++ b/server/src/main/java/org/opensearch/index/translog/RemoteFsTranslog.java
@@ -706,6 +706,10 @@ int availablePermits() {
*/
@Override
protected boolean shouldFlush() {
- return readers.size() >= translogTransferManager.getMaxRemoteTranslogReadersSettings();
+ int maxRemoteTlogReaders = translogTransferManager.getMaxRemoteTranslogReadersSettings();
+ if (maxRemoteTlogReaders == -1) {
+ return false;
+ }
+ return readers.size() >= maxRemoteTlogReaders;
}
}
diff --git a/server/src/main/java/org/opensearch/index/translog/Translog.java b/server/src/main/java/org/opensearch/index/translog/Translog.java
index 842e9c77d2350..87e0c21b8203c 100644
--- a/server/src/main/java/org/opensearch/index/translog/Translog.java
+++ b/server/src/main/java/org/opensearch/index/translog/Translog.java
@@ -48,6 +48,8 @@
import org.opensearch.core.common.Strings;
import org.opensearch.core.common.bytes.BytesArray;
import org.opensearch.core.common.bytes.BytesReference;
+import org.opensearch.core.common.io.stream.BufferedChecksumStreamInput;
+import org.opensearch.core.common.io.stream.BufferedChecksumStreamOutput;
import org.opensearch.core.common.io.stream.StreamInput;
import org.opensearch.core.common.io.stream.StreamOutput;
import org.opensearch.core.index.shard.ShardId;
diff --git a/server/src/main/java/org/opensearch/index/translog/TranslogHeader.java b/server/src/main/java/org/opensearch/index/translog/TranslogHeader.java
index 7b5be9505f27a..66a9fe08d06b5 100644
--- a/server/src/main/java/org/opensearch/index/translog/TranslogHeader.java
+++ b/server/src/main/java/org/opensearch/index/translog/TranslogHeader.java
@@ -40,6 +40,8 @@
import org.apache.lucene.store.OutputStreamDataOutput;
import org.apache.lucene.util.BytesRef;
import org.opensearch.common.io.Channels;
+import org.opensearch.core.common.io.stream.BufferedChecksumStreamInput;
+import org.opensearch.core.common.io.stream.BufferedChecksumStreamOutput;
import org.opensearch.core.common.io.stream.InputStreamStreamInput;
import org.opensearch.core.common.io.stream.OutputStreamStreamOutput;
import org.opensearch.core.common.io.stream.StreamInput;
diff --git a/server/src/main/java/org/opensearch/index/translog/TranslogSnapshot.java b/server/src/main/java/org/opensearch/index/translog/TranslogSnapshot.java
index 89718156cbbe8..521472f4d64a0 100644
--- a/server/src/main/java/org/opensearch/index/translog/TranslogSnapshot.java
+++ b/server/src/main/java/org/opensearch/index/translog/TranslogSnapshot.java
@@ -32,6 +32,7 @@
package org.opensearch.index.translog;
import org.opensearch.common.io.Channels;
+import org.opensearch.core.common.io.stream.BufferedChecksumStreamInput;
import org.opensearch.index.seqno.SequenceNumbers;
import java.io.EOFException;
diff --git a/server/src/main/java/org/opensearch/index/translog/TranslogWriter.java b/server/src/main/java/org/opensearch/index/translog/TranslogWriter.java
index 86f7567f3333d..b0c7d51c3e43b 100644
--- a/server/src/main/java/org/opensearch/index/translog/TranslogWriter.java
+++ b/server/src/main/java/org/opensearch/index/translog/TranslogWriter.java
@@ -50,6 +50,7 @@
import org.opensearch.core.Assertions;
import org.opensearch.core.common.bytes.BytesArray;
import org.opensearch.core.common.bytes.BytesReference;
+import org.opensearch.core.common.io.stream.BufferedChecksumStreamInput;
import org.opensearch.core.common.unit.ByteSizeValue;
import org.opensearch.core.index.shard.ShardId;
import org.opensearch.index.seqno.SequenceNumbers;
diff --git a/server/src/main/java/org/opensearch/indices/RemoteStoreSettings.java b/server/src/main/java/org/opensearch/indices/RemoteStoreSettings.java
index 074186f64a75d..8cb482c8d8681 100644
--- a/server/src/main/java/org/opensearch/indices/RemoteStoreSettings.java
+++ b/server/src/main/java/org/opensearch/indices/RemoteStoreSettings.java
@@ -25,6 +25,7 @@
*/
@PublicApi(since = "2.14.0")
public class RemoteStoreSettings {
+ private static final int MIN_CLUSTER_REMOTE_MAX_TRANSLOG_READERS = 100;
/**
* Used to specify the default translog buffer interval for remote store backed indexes.
@@ -112,7 +113,12 @@ public class RemoteStoreSettings {
public static final Setting CLUSTER_REMOTE_MAX_TRANSLOG_READERS = Setting.intSetting(
"cluster.remote_store.translog.max_readers",
1000,
- 100,
+ -1,
+ v -> {
+ if (v != -1 && v < MIN_CLUSTER_REMOTE_MAX_TRANSLOG_READERS) {
+ throw new IllegalArgumentException("Cannot set value lower than " + MIN_CLUSTER_REMOTE_MAX_TRANSLOG_READERS);
+ }
+ },
Property.Dynamic,
Property.NodeScope
);
diff --git a/server/src/main/java/org/opensearch/node/Node.java b/server/src/main/java/org/opensearch/node/Node.java
index 76109ba10624a..cb1f2caa082fc 100644
--- a/server/src/main/java/org/opensearch/node/Node.java
+++ b/server/src/main/java/org/opensearch/node/Node.java
@@ -138,6 +138,7 @@
import org.opensearch.gateway.MetaStateService;
import org.opensearch.gateway.PersistedClusterStateService;
import org.opensearch.gateway.ShardsBatchGatewayAllocator;
+import org.opensearch.gateway.remote.RemoteClusterStateCleanupManager;
import org.opensearch.gateway.remote.RemoteClusterStateService;
import org.opensearch.http.HttpServerTransport;
import org.opensearch.identity.IdentityService;
@@ -752,6 +753,7 @@ protected Node(
threadPool::relativeTimeInMillis
);
final RemoteClusterStateService remoteClusterStateService;
+ final RemoteClusterStateCleanupManager remoteClusterStateCleanupManager;
final RemoteIndexPathUploader remoteIndexPathUploader;
if (isRemoteStoreClusterStateEnabled(settings)) {
remoteIndexPathUploader = new RemoteIndexPathUploader(
@@ -764,14 +766,16 @@ protected Node(
nodeEnvironment.nodeId(),
repositoriesServiceReference::get,
settings,
- clusterService.getClusterSettings(),
+ clusterService,
threadPool::preciseRelativeTimeInNanos,
threadPool,
List.of(remoteIndexPathUploader)
);
+ remoteClusterStateCleanupManager = remoteClusterStateService.getCleanupManager();
} else {
remoteClusterStateService = null;
remoteIndexPathUploader = null;
+ remoteClusterStateCleanupManager = null;
}
// collect engine factory providers from plugins
@@ -1195,7 +1199,8 @@ protected Node(
rerouteService,
fsHealthService,
persistedStateRegistry,
- remoteStoreNodeService
+ remoteStoreNodeService,
+ clusterManagerMetrics
);
final SearchPipelineService searchPipelineService = new SearchPipelineService(
clusterService,
@@ -1376,6 +1381,7 @@ protected Node(
b.bind(MetricsRegistry.class).toInstance(metricsRegistry);
b.bind(RemoteClusterStateService.class).toProvider(() -> remoteClusterStateService);
b.bind(RemoteIndexPathUploader.class).toProvider(() -> remoteIndexPathUploader);
+ b.bind(RemoteClusterStateCleanupManager.class).toProvider(() -> remoteClusterStateCleanupManager);
b.bind(PersistedStateRegistry.class).toInstance(persistedStateRegistry);
b.bind(SegmentReplicationStatsTracker.class).toInstance(segmentReplicationStatsTracker);
b.bind(SearchRequestOperationsCompositeListenerFactory.class).toInstance(searchRequestOperationsCompositeListenerFactory);
diff --git a/server/src/main/java/org/opensearch/node/remotestore/RemoteStoreNodeAttribute.java b/server/src/main/java/org/opensearch/node/remotestore/RemoteStoreNodeAttribute.java
index b10ec0d99c3d5..a0f745a4270c4 100644
--- a/server/src/main/java/org/opensearch/node/remotestore/RemoteStoreNodeAttribute.java
+++ b/server/src/main/java/org/opensearch/node/remotestore/RemoteStoreNodeAttribute.java
@@ -13,6 +13,7 @@
import org.opensearch.cluster.metadata.RepositoryMetadata;
import org.opensearch.cluster.node.DiscoveryNode;
import org.opensearch.common.settings.Settings;
+import org.opensearch.common.util.FeatureFlags;
import org.opensearch.gateway.remote.RemoteClusterStateService;
import org.opensearch.node.Node;
import org.opensearch.repositories.blobstore.BlobStoreRepository;
@@ -28,6 +29,8 @@
import java.util.Set;
import java.util.stream.Collectors;
+import static org.opensearch.common.util.FeatureFlags.REMOTE_PUBLICATION_EXPERIMENTAL;
+
/**
* This is an abstraction for validating and storing information specific to remote backed storage nodes.
*
@@ -46,6 +49,8 @@ public class RemoteStoreNodeAttribute {
+ "."
+ CryptoMetadata.SETTINGS_KEY;
public static final String REMOTE_STORE_REPOSITORY_SETTINGS_ATTRIBUTE_KEY_PREFIX = "remote_store.repository.%s.settings.";
+ public static final String REMOTE_STORE_ROUTING_TABLE_REPOSITORY_NAME_ATTRIBUTE_KEY = "remote_store.routing_table.repository";
+
private final RepositoriesMetadata repositoriesMetadata;
public static List SUPPORTED_DATA_REPO_NAME_ATTRIBUTES = List.of(
@@ -157,6 +162,10 @@ private Set getValidatedRepositoryNames(DiscoveryNode node) {
} else if (node.getAttributes().containsKey(REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY)) {
repositoryNames.add(validateAttributeNonNull(node, REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY));
}
+ if (node.getAttributes().containsKey(REMOTE_STORE_ROUTING_TABLE_REPOSITORY_NAME_ATTRIBUTE_KEY)) {
+ repositoryNames.add(validateAttributeNonNull(node, REMOTE_STORE_ROUTING_TABLE_REPOSITORY_NAME_ATTRIBUTE_KEY));
+ }
+
return repositoryNames;
}
@@ -187,6 +196,15 @@ public static boolean isRemoteStoreClusterStateEnabled(Settings settings) {
&& isRemoteClusterStateAttributePresent(settings);
}
+ private static boolean isRemoteRoutingTableAttributePresent(Settings settings) {
+ return settings.getByPrefix(Node.NODE_ATTRIBUTES.getKey() + REMOTE_STORE_ROUTING_TABLE_REPOSITORY_NAME_ATTRIBUTE_KEY)
+ .isEmpty() == false;
+ }
+
+ public static boolean isRemoteRoutingTableEnabled(Settings settings) {
+ return FeatureFlags.isEnabled(REMOTE_PUBLICATION_EXPERIMENTAL) && isRemoteRoutingTableAttributePresent(settings);
+ }
+
public RepositoriesMetadata getRepositoriesMetadata() {
return this.repositoriesMetadata;
}
@@ -231,6 +249,21 @@ public int hashCode() {
return hashCode;
}
+ /**
+ * Checks if 2 instances are equal, with option to skip check for a list of repos.
+ * *
+ * @param o other instance
+ * @param reposToSkip list of repos to skip check for equality
+ * @return {@code true} iff both instances are equal, not including the repositories in both instances if they are part of reposToSkip.
+ */
+ public boolean equalsWithRepoSkip(Object o, List reposToSkip) {
+ if (this == o) return true;
+ if (o == null || getClass() != o.getClass()) return false;
+
+ RemoteStoreNodeAttribute that = (RemoteStoreNodeAttribute) o;
+ return this.getRepositoriesMetadata().equalsIgnoreGenerationsWithRepoSkip(that.getRepositoriesMetadata(), reposToSkip);
+ }
+
@Override
public boolean equals(Object o) {
if (this == o) return true;
diff --git a/server/src/test/java/org/opensearch/cluster/coordination/FollowersCheckerTests.java b/server/src/test/java/org/opensearch/cluster/coordination/FollowersCheckerTests.java
index a106706c00732..d0bc41b459cc3 100644
--- a/server/src/test/java/org/opensearch/cluster/coordination/FollowersCheckerTests.java
+++ b/server/src/test/java/org/opensearch/cluster/coordination/FollowersCheckerTests.java
@@ -33,6 +33,7 @@
import org.opensearch.OpenSearchException;
import org.opensearch.Version;
+import org.opensearch.cluster.ClusterManagerMetrics;
import org.opensearch.cluster.ClusterName;
import org.opensearch.cluster.coordination.Coordinator.Mode;
import org.opensearch.cluster.coordination.FollowersChecker.FollowerCheckRequest;
@@ -47,6 +48,9 @@
import org.opensearch.core.transport.TransportResponse.Empty;
import org.opensearch.monitor.NodeHealthService;
import org.opensearch.monitor.StatusInfo;
+import org.opensearch.telemetry.TestInMemoryMetricsRegistry;
+import org.opensearch.telemetry.metrics.MetricsRegistry;
+import org.opensearch.telemetry.metrics.noop.NoopMetricsRegistry;
import org.opensearch.telemetry.tracing.noop.NoopTracer;
import org.opensearch.test.EqualsHashCodeTestUtils;
import org.opensearch.test.EqualsHashCodeTestUtils.CopyFunction;
@@ -131,6 +135,8 @@ protected void onSendRequest(long requestId, String action, TransportRequest req
transportService.start();
transportService.acceptIncomingRequests();
+ TestInMemoryMetricsRegistry metricsRegistry = new TestInMemoryMetricsRegistry();
+
final FollowersChecker followersChecker = new FollowersChecker(
settings,
clusterSettings,
@@ -139,7 +145,8 @@ protected void onSendRequest(long requestId, String action, TransportRequest req
(node, reason) -> {
assert false : node;
},
- () -> new StatusInfo(StatusInfo.Status.HEALTHY, "healthy-info")
+ () -> new StatusInfo(StatusInfo.Status.HEALTHY, "healthy-info"),
+ new ClusterManagerMetrics(metricsRegistry)
);
followersChecker.setCurrentNodes(discoveryNodesHolder[0]);
@@ -193,35 +200,43 @@ protected void onSendRequest(long requestId, String action, TransportRequest req
followersChecker.clearCurrentNodes();
deterministicTaskQueue.runAllTasks();
assertThat(checkedNodes, empty());
+ assertEquals(Integer.valueOf(0), metricsRegistry.getCounterStore().get("followers.checker.failure.count").getCounterValue());
}
public void testFailsNodeThatDoesNotRespond() {
final Settings settings = randomSettings();
+ TestInMemoryMetricsRegistry metricsRegistry = new TestInMemoryMetricsRegistry();
testBehaviourOfFailingNode(
settings,
() -> null,
"followers check retry count exceeded",
(FOLLOWER_CHECK_RETRY_COUNT_SETTING.get(settings) - 1) * FOLLOWER_CHECK_INTERVAL_SETTING.get(settings).millis()
+ FOLLOWER_CHECK_RETRY_COUNT_SETTING.get(settings) * FOLLOWER_CHECK_TIMEOUT_SETTING.get(settings).millis(),
- () -> new StatusInfo(HEALTHY, "healthy-info")
+ () -> new StatusInfo(HEALTHY, "healthy-info"),
+ metricsRegistry
);
+ assertEquals(Integer.valueOf(2), metricsRegistry.getCounterStore().get("followers.checker.failure.count").getCounterValue());
}
public void testFailsNodeThatRejectsCheck() {
final Settings settings = randomSettings();
+ TestInMemoryMetricsRegistry metricsRegistry = new TestInMemoryMetricsRegistry();
testBehaviourOfFailingNode(
settings,
() -> { throw new OpenSearchException("simulated exception"); },
"followers check retry count exceeded",
(FOLLOWER_CHECK_RETRY_COUNT_SETTING.get(settings) - 1) * FOLLOWER_CHECK_INTERVAL_SETTING.get(settings).millis(),
- () -> new StatusInfo(HEALTHY, "healthy-info")
+ () -> new StatusInfo(HEALTHY, "healthy-info"),
+ metricsRegistry
);
+ assertEquals(Integer.valueOf(2), metricsRegistry.getCounterStore().get("followers.checker.failure.count").getCounterValue());
}
public void testFailureCounterResetsOnSuccess() {
final Settings settings = randomSettings();
final int retryCount = FOLLOWER_CHECK_RETRY_COUNT_SETTING.get(settings);
final int maxRecoveries = randomIntBetween(3, 10);
+ TestInMemoryMetricsRegistry metricsRegistry = new TestInMemoryMetricsRegistry();
// passes just enough checks to keep it alive, up to maxRecoveries, and then fails completely
testBehaviourOfFailingNode(settings, new Supplier() {
@@ -241,18 +256,23 @@ public Empty get() {
"followers check retry count exceeded",
(FOLLOWER_CHECK_RETRY_COUNT_SETTING.get(settings) * (maxRecoveries + 1) - 1) * FOLLOWER_CHECK_INTERVAL_SETTING.get(settings)
.millis(),
- () -> new StatusInfo(HEALTHY, "healthy-info")
+ () -> new StatusInfo(HEALTHY, "healthy-info"),
+ metricsRegistry
);
+ assertEquals(Integer.valueOf(2), metricsRegistry.getCounterStore().get("followers.checker.failure.count").getCounterValue());
}
public void testFailsNodeThatIsDisconnected() {
+ TestInMemoryMetricsRegistry metricsRegistry = new TestInMemoryMetricsRegistry();
testBehaviourOfFailingNode(
Settings.EMPTY,
() -> { throw new ConnectTransportException(null, "simulated exception"); },
"disconnected",
0,
- () -> new StatusInfo(HEALTHY, "healthy-info")
+ () -> new StatusInfo(HEALTHY, "healthy-info"),
+ metricsRegistry
);
+ assertEquals(Integer.valueOf(2), metricsRegistry.getCounterStore().get("followers.checker.failure.count").getCounterValue());
}
public void testFailsNodeThatDisconnects() {
@@ -297,6 +317,7 @@ public String toString() {
transportService.acceptIncomingRequests();
final AtomicBoolean nodeFailed = new AtomicBoolean();
+ TestInMemoryMetricsRegistry metricsRegistry = new TestInMemoryMetricsRegistry();
final FollowersChecker followersChecker = new FollowersChecker(
settings,
@@ -307,7 +328,8 @@ public String toString() {
assertTrue(nodeFailed.compareAndSet(false, true));
assertThat(reason, equalTo("disconnected"));
},
- () -> new StatusInfo(HEALTHY, "healthy-info")
+ () -> new StatusInfo(HEALTHY, "healthy-info"),
+ new ClusterManagerMetrics(metricsRegistry)
);
DiscoveryNodes discoveryNodes = DiscoveryNodes.builder().add(localNode).add(otherNode).localNodeId(localNode.getId()).build();
@@ -318,16 +340,20 @@ public String toString() {
deterministicTaskQueue.runAllRunnableTasks();
assertTrue(nodeFailed.get());
assertThat(followersChecker.getFaultyNodes(), contains(otherNode));
+ assertEquals(Integer.valueOf(1), metricsRegistry.getCounterStore().get("followers.checker.failure.count").getCounterValue());
}
public void testFailsNodeThatIsUnhealthy() {
+ TestInMemoryMetricsRegistry metricsRegistry = new TestInMemoryMetricsRegistry();
testBehaviourOfFailingNode(
randomSettings(),
() -> { throw new NodeHealthCheckFailureException("non writable exception"); },
"health check failed",
0,
- () -> new StatusInfo(HEALTHY, "healthy-info")
+ () -> new StatusInfo(HEALTHY, "healthy-info"),
+ metricsRegistry
);
+ assertEquals(Integer.valueOf(2), metricsRegistry.getCounterStore().get("followers.checker.failure.count").getCounterValue());
}
private void testBehaviourOfFailingNode(
@@ -335,7 +361,8 @@ private void testBehaviourOfFailingNode(
Supplier responder,
String failureReason,
long expectedFailureTime,
- NodeHealthService nodeHealthService
+ NodeHealthService nodeHealthService,
+ MetricsRegistry metricsRegistry
) {
final DiscoveryNode localNode = new DiscoveryNode("local-node", buildNewFakeTransportAddress(), Version.CURRENT);
final DiscoveryNode otherNode = new DiscoveryNode("other-node", buildNewFakeTransportAddress(), Version.CURRENT);
@@ -386,7 +413,6 @@ public String toString() {
transportService.acceptIncomingRequests();
final AtomicBoolean nodeFailed = new AtomicBoolean();
-
final FollowersChecker followersChecker = new FollowersChecker(
settings,
clusterSettings,
@@ -396,7 +422,8 @@ public String toString() {
assertTrue(nodeFailed.compareAndSet(false, true));
assertThat(reason, equalTo(failureReason));
},
- nodeHealthService
+ nodeHealthService,
+ new ClusterManagerMetrics(metricsRegistry)
);
DiscoveryNodes discoveryNodes = DiscoveryNodes.builder().add(localNode).add(otherNode).localNodeId(localNode.getId()).build();
@@ -501,7 +528,11 @@ protected void onSendRequest(long requestId, String action, TransportRequest req
if (exception != null) {
throw exception;
}
- }, (node, reason) -> { assert false : node; }, () -> new StatusInfo(UNHEALTHY, "unhealthy-info"));
+ },
+ (node, reason) -> { assert false : node; },
+ () -> new StatusInfo(UNHEALTHY, "unhealthy-info"),
+ new ClusterManagerMetrics(NoopMetricsRegistry.INSTANCE)
+ );
final long leaderTerm = randomLongBetween(2, Long.MAX_VALUE);
final long followerTerm = randomLongBetween(1, leaderTerm - 1);
@@ -574,7 +605,11 @@ protected void onSendRequest(long requestId, String action, TransportRequest req
if (exception != null) {
throw exception;
}
- }, (node, reason) -> { assert false : node; }, () -> new StatusInfo(HEALTHY, "healthy-info"));
+ },
+ (node, reason) -> { assert false : node; },
+ () -> new StatusInfo(HEALTHY, "healthy-info"),
+ new ClusterManagerMetrics(NoopMetricsRegistry.INSTANCE)
+ );
{
// Does not call into the coordinator in the normal case
@@ -721,7 +756,11 @@ public void testPreferClusterManagerNodes() {
);
final FollowersChecker followersChecker = new FollowersChecker(Settings.EMPTY, clusterSettings, transportService, fcr -> {
assert false : fcr;
- }, (node, reason) -> { assert false : node; }, () -> new StatusInfo(HEALTHY, "healthy-info"));
+ },
+ (node, reason) -> { assert false : node; },
+ () -> new StatusInfo(HEALTHY, "healthy-info"),
+ new ClusterManagerMetrics(NoopMetricsRegistry.INSTANCE)
+ );
followersChecker.setCurrentNodes(discoveryNodes);
List followerTargets = Stream.of(capturingTransport.getCapturedRequestsAndClear())
.map(cr -> cr.node)
diff --git a/server/src/test/java/org/opensearch/cluster/coordination/JoinTaskExecutorTests.java b/server/src/test/java/org/opensearch/cluster/coordination/JoinTaskExecutorTests.java
index 3e343e95f6c4b..9cb1bd0b57132 100644
--- a/server/src/test/java/org/opensearch/cluster/coordination/JoinTaskExecutorTests.java
+++ b/server/src/test/java/org/opensearch/cluster/coordination/JoinTaskExecutorTests.java
@@ -72,6 +72,7 @@
import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY;
import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_REPOSITORY_SETTINGS_ATTRIBUTE_KEY_PREFIX;
import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_REPOSITORY_TYPE_ATTRIBUTE_KEY_FORMAT;
+import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_ROUTING_TABLE_REPOSITORY_NAME_ATTRIBUTE_KEY;
import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY;
import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_TRANSLOG_REPOSITORY_NAME_ATTRIBUTE_KEY;
import static org.opensearch.node.remotestore.RemoteStoreNodeService.MIGRATION_DIRECTION_SETTING;
@@ -944,6 +945,145 @@ public void testNodeJoinInMixedMode() {
JoinTaskExecutor.ensureNodesCompatibility(joiningNode2, currentNodes, metadata);
}
+ public void testRemoteRoutingTableRepoAbsentNodeJoin() {
+
+ final DiscoveryNode existingNode = new DiscoveryNode(
+ UUIDs.base64UUID(),
+ buildNewFakeTransportAddress(),
+ remoteStoreNodeAttributes(SEGMENT_REPO, TRANSLOG_REPO),
+ DiscoveryNodeRole.BUILT_IN_ROLES,
+ Version.CURRENT
+ );
+
+ ClusterState currentState = ClusterState.builder(ClusterName.DEFAULT)
+ .nodes(DiscoveryNodes.builder().add(existingNode).localNodeId(existingNode.getId()).build())
+ .build();
+
+ DiscoveryNode joiningNode = newDiscoveryNode(remoteStoreNodeAttributes(SEGMENT_REPO, TRANSLOG_REPO));
+ JoinTaskExecutor.ensureNodesCompatibility(joiningNode, currentState.getNodes(), currentState.metadata());
+ }
+
+ public void testRemoteRoutingTableNodeJoinRepoPresentInJoiningNode() {
+ final DiscoveryNode existingNode = new DiscoveryNode(
+ UUIDs.base64UUID(),
+ buildNewFakeTransportAddress(),
+ remoteStoreNodeAttributes(SEGMENT_REPO, TRANSLOG_REPO),
+ DiscoveryNodeRole.BUILT_IN_ROLES,
+ Version.CURRENT
+ );
+ ClusterState currentState = ClusterState.builder(ClusterName.DEFAULT)
+ .nodes(DiscoveryNodes.builder().add(existingNode).localNodeId(existingNode.getId()).build())
+ .build();
+
+ Map attr = remoteStoreNodeAttributes(SEGMENT_REPO, TRANSLOG_REPO);
+ attr.putAll(remoteRoutingTableAttributes(ROUTING_TABLE_REPO));
+ DiscoveryNode joiningNode = newDiscoveryNode(attr);
+ JoinTaskExecutor.ensureNodesCompatibility(joiningNode, currentState.getNodes(), currentState.metadata());
+ }
+
+ public void testRemoteRoutingTableNodeJoinRepoPresentInExistingNode() {
+ Map attr = remoteStoreNodeAttributes(SEGMENT_REPO, TRANSLOG_REPO);
+ attr.putAll(remoteRoutingTableAttributes(ROUTING_TABLE_REPO));
+ final DiscoveryNode existingNode = new DiscoveryNode(
+ UUIDs.base64UUID(),
+ buildNewFakeTransportAddress(),
+ attr,
+ DiscoveryNodeRole.BUILT_IN_ROLES,
+ Version.CURRENT
+ );
+
+ ClusterState currentState = ClusterState.builder(ClusterName.DEFAULT)
+ .nodes(DiscoveryNodes.builder().add(existingNode).localNodeId(existingNode.getId()).build())
+ .build();
+
+ DiscoveryNode joiningNode = newDiscoveryNode(remoteStoreNodeAttributes(SEGMENT_REPO, TRANSLOG_REPO));
+ assertThrows(
+ IllegalStateException.class,
+ () -> JoinTaskExecutor.ensureNodesCompatibility(joiningNode, currentState.getNodes(), currentState.metadata())
+ );
+ }
+
+ public void testRemoteRoutingTableNodeJoinRepoPresentInBothNode() {
+ Map attr = remoteStoreNodeAttributes(SEGMENT_REPO, TRANSLOG_REPO);
+ attr.putAll(remoteRoutingTableAttributes(ROUTING_TABLE_REPO));
+ final DiscoveryNode existingNode = new DiscoveryNode(
+ UUIDs.base64UUID(),
+ buildNewFakeTransportAddress(),
+ attr,
+ DiscoveryNodeRole.BUILT_IN_ROLES,
+ Version.CURRENT
+ );
+
+ ClusterState currentState = ClusterState.builder(ClusterName.DEFAULT)
+ .nodes(DiscoveryNodes.builder().add(existingNode).localNodeId(existingNode.getId()).build())
+ .build();
+
+ DiscoveryNode joiningNode = newDiscoveryNode(attr);
+ JoinTaskExecutor.ensureNodesCompatibility(joiningNode, currentState.getNodes(), currentState.metadata());
+ }
+
+ public void testRemoteRoutingTableNodeJoinNodeWithRemoteAndRoutingRepoDifference() {
+ Map attr = remoteStoreNodeAttributes(SEGMENT_REPO, TRANSLOG_REPO);
+ attr.putAll(remoteRoutingTableAttributes(ROUTING_TABLE_REPO));
+ final DiscoveryNode existingNode = new DiscoveryNode(
+ UUIDs.base64UUID(),
+ buildNewFakeTransportAddress(),
+ attr,
+ DiscoveryNodeRole.BUILT_IN_ROLES,
+ Version.CURRENT
+ );
+
+ final DiscoveryNode existingNode2 = new DiscoveryNode(
+ UUIDs.base64UUID(),
+ buildNewFakeTransportAddress(),
+ remoteStoreNodeAttributes(SEGMENT_REPO, TRANSLOG_REPO),
+ DiscoveryNodeRole.BUILT_IN_ROLES,
+ Version.CURRENT
+ );
+
+ ClusterState currentState = ClusterState.builder(ClusterName.DEFAULT)
+ .nodes(DiscoveryNodes.builder().add(existingNode2).add(existingNode).localNodeId(existingNode.getId()).build())
+ .build();
+
+ DiscoveryNode joiningNode = newDiscoveryNode(attr);
+ JoinTaskExecutor.ensureNodesCompatibility(joiningNode, currentState.getNodes(), currentState.metadata());
+ }
+
+ public void testRemoteRoutingTableNodeJoinNodeWithRemoteAndRoutingRepoDifferenceMixedMode() {
+ Map attr = remoteStoreNodeAttributes(SEGMENT_REPO, TRANSLOG_REPO);
+ attr.putAll(remoteRoutingTableAttributes(ROUTING_TABLE_REPO));
+ final DiscoveryNode existingNode = new DiscoveryNode(
+ UUIDs.base64UUID(),
+ buildNewFakeTransportAddress(),
+ attr,
+ DiscoveryNodeRole.BUILT_IN_ROLES,
+ Version.CURRENT
+ );
+
+ final DiscoveryNode existingNode2 = new DiscoveryNode(
+ UUIDs.base64UUID(),
+ buildNewFakeTransportAddress(),
+ remoteStoreNodeAttributes(SEGMENT_REPO, TRANSLOG_REPO),
+ DiscoveryNodeRole.BUILT_IN_ROLES,
+ Version.CURRENT
+ );
+
+ final Settings settings = Settings.builder()
+ .put(MIGRATION_DIRECTION_SETTING.getKey(), RemoteStoreNodeService.Direction.REMOTE_STORE)
+ .put(REMOTE_STORE_COMPATIBILITY_MODE_SETTING.getKey(), "mixed")
+ .build();
+ final Settings nodeSettings = Settings.builder().put(REMOTE_STORE_MIGRATION_EXPERIMENTAL, "true").build();
+ FeatureFlags.initializeFeatureFlags(nodeSettings);
+ Metadata metadata = Metadata.builder().persistentSettings(settings).build();
+ ClusterState currentState = ClusterState.builder(ClusterName.DEFAULT)
+ .nodes(DiscoveryNodes.builder().add(existingNode2).add(existingNode).localNodeId(existingNode.getId()).build())
+ .metadata(metadata)
+ .build();
+
+ DiscoveryNode joiningNode = newDiscoveryNode(attr);
+ JoinTaskExecutor.ensureNodesCompatibility(joiningNode, currentState.getNodes(), currentState.metadata());
+ }
+
private void validateRepositoryMetadata(ClusterState updatedState, DiscoveryNode existingNode, int expectedRepositories)
throws Exception {
@@ -985,6 +1125,7 @@ private DiscoveryNode newDiscoveryNode(Map attributes) {
private static final String TRANSLOG_REPO = "translog-repo";
private static final String CLUSTER_STATE_REPO = "cluster-state-repo";
private static final String COMMON_REPO = "remote-repo";
+ private static final String ROUTING_TABLE_REPO = "routing-table-repo";
private Map remoteStoreNodeAttributes(String segmentRepoName, String translogRepoName) {
return remoteStoreNodeAttributes(segmentRepoName, translogRepoName, CLUSTER_STATE_REPO);
@@ -1049,6 +1190,28 @@ private Map remoteStateNodeAttributes(String clusterStateRepo) {
};
}
+ private Map remoteRoutingTableAttributes(String repoName) {
+ String routingTableRepositoryTypeAttributeKey = String.format(
+ Locale.getDefault(),
+ REMOTE_STORE_REPOSITORY_TYPE_ATTRIBUTE_KEY_FORMAT,
+ repoName
+ );
+ String routingTableRepositorySettingsAttributeKeyPrefix = String.format(
+ Locale.getDefault(),
+ REMOTE_STORE_REPOSITORY_SETTINGS_ATTRIBUTE_KEY_PREFIX,
+ repoName
+ );
+
+ return new HashMap<>() {
+ {
+ put(REMOTE_STORE_ROUTING_TABLE_REPOSITORY_NAME_ATTRIBUTE_KEY, repoName);
+ putIfAbsent(routingTableRepositoryTypeAttributeKey, "s3");
+ putIfAbsent(routingTableRepositorySettingsAttributeKeyPrefix + "bucket", "state_bucket");
+ putIfAbsent(routingTableRepositorySettingsAttributeKeyPrefix + "base_path", "/state/path");
+ }
+ };
+ }
+
private void validateAttributes(Map remoteStoreNodeAttributes, ClusterState currentState, DiscoveryNode existingNode) {
DiscoveryNode joiningNode = newDiscoveryNode(remoteStoreNodeAttributes);
Exception e = assertThrows(
diff --git a/server/src/test/java/org/opensearch/cluster/coordination/LeaderCheckerTests.java b/server/src/test/java/org/opensearch/cluster/coordination/LeaderCheckerTests.java
index fe65058333116..decdeddfa37a1 100644
--- a/server/src/test/java/org/opensearch/cluster/coordination/LeaderCheckerTests.java
+++ b/server/src/test/java/org/opensearch/cluster/coordination/LeaderCheckerTests.java
@@ -34,6 +34,7 @@
import org.opensearch.OpenSearchException;
import org.opensearch.Version;
+import org.opensearch.cluster.ClusterManagerMetrics;
import org.opensearch.cluster.ClusterName;
import org.opensearch.cluster.coordination.LeaderChecker.LeaderCheckRequest;
import org.opensearch.cluster.node.DiscoveryNode;
@@ -44,6 +45,7 @@
import org.opensearch.core.transport.TransportResponse;
import org.opensearch.core.transport.TransportResponse.Empty;
import org.opensearch.monitor.StatusInfo;
+import org.opensearch.telemetry.TestInMemoryMetricsRegistry;
import org.opensearch.telemetry.tracing.noop.NoopTracer;
import org.opensearch.test.EqualsHashCodeTestUtils;
import org.opensearch.test.EqualsHashCodeTestUtils.CopyFunction;
@@ -175,11 +177,13 @@ public String toString() {
transportService.acceptIncomingRequests();
final AtomicBoolean leaderFailed = new AtomicBoolean();
+ TestInMemoryMetricsRegistry metricsRegistry = new TestInMemoryMetricsRegistry();
+ final ClusterManagerMetrics clusterManagerMetrics = new ClusterManagerMetrics(metricsRegistry);
final LeaderChecker leaderChecker = new LeaderChecker(settings, clusterSettings, transportService, e -> {
assertThat(e.getMessage(), matchesRegex("node \\[.*\\] failed \\[[1-9][0-9]*\\] consecutive checks"));
assertTrue(leaderFailed.compareAndSet(false, true));
- }, () -> new StatusInfo(StatusInfo.Status.HEALTHY, "healthy-info"));
+ }, () -> new StatusInfo(StatusInfo.Status.HEALTHY, "healthy-info"), clusterManagerMetrics);
logger.info("--> creating first checker");
leaderChecker.updateLeader(leader1);
@@ -229,6 +233,7 @@ public String toString() {
);
}
leaderChecker.updateLeader(null);
+ assertEquals(Integer.valueOf(1), metricsRegistry.getCounterStore().get("leader.checker.failure.count").getCounterValue());
}
enum Response {
@@ -293,10 +298,13 @@ public String toString() {
transportService.acceptIncomingRequests();
final AtomicBoolean leaderFailed = new AtomicBoolean();
+ TestInMemoryMetricsRegistry metricsRegistry = new TestInMemoryMetricsRegistry();
+ final ClusterManagerMetrics clusterManagerMetrics = new ClusterManagerMetrics(metricsRegistry);
+
final LeaderChecker leaderChecker = new LeaderChecker(settings, clusterSettings, transportService, e -> {
assertThat(e.getMessage(), anyOf(endsWith("disconnected"), endsWith("disconnected during check")));
assertTrue(leaderFailed.compareAndSet(false, true));
- }, () -> new StatusInfo(StatusInfo.Status.HEALTHY, "healthy-info"));
+ }, () -> new StatusInfo(StatusInfo.Status.HEALTHY, "healthy-info"), clusterManagerMetrics);
leaderChecker.updateLeader(leader);
{
@@ -351,6 +359,7 @@ public String toString() {
deterministicTaskQueue.runAllRunnableTasks();
assertTrue(leaderFailed.get());
}
+ assertEquals(Integer.valueOf(3), metricsRegistry.getCounterStore().get("leader.checker.failure.count").getCounterValue());
}
public void testFollowerFailsImmediatelyOnHealthCheckFailure() {
@@ -407,10 +416,12 @@ public String toString() {
transportService.acceptIncomingRequests();
final AtomicBoolean leaderFailed = new AtomicBoolean();
+ TestInMemoryMetricsRegistry metricsRegistry = new TestInMemoryMetricsRegistry();
+ final ClusterManagerMetrics clusterManagerMetrics = new ClusterManagerMetrics(metricsRegistry);
final LeaderChecker leaderChecker = new LeaderChecker(settings, clusterSettings, transportService, e -> {
assertThat(e.getMessage(), endsWith("failed health checks"));
assertTrue(leaderFailed.compareAndSet(false, true));
- }, () -> new StatusInfo(StatusInfo.Status.HEALTHY, "healthy-info"));
+ }, () -> new StatusInfo(StatusInfo.Status.HEALTHY, "healthy-info"), clusterManagerMetrics);
leaderChecker.updateLeader(leader);
@@ -430,6 +441,8 @@ public String toString() {
assertTrue(leaderFailed.get());
}
+
+ assertEquals(Integer.valueOf(1), metricsRegistry.getCounterStore().get("leader.checker.failure.count").getCounterValue());
}
public void testLeaderBehaviour() {
@@ -453,12 +466,15 @@ public void testLeaderBehaviour() {
transportService.start();
transportService.acceptIncomingRequests();
+ final TestInMemoryMetricsRegistry metricsRegistry = new TestInMemoryMetricsRegistry();
+ final ClusterManagerMetrics clusterManagerMetrics = new ClusterManagerMetrics(metricsRegistry);
final LeaderChecker leaderChecker = new LeaderChecker(
settings,
clusterSettings,
transportService,
e -> fail("shouldn't be checking anything"),
- () -> nodeHealthServiceStatus.get()
+ () -> nodeHealthServiceStatus.get(),
+ clusterManagerMetrics
);
final DiscoveryNodes discoveryNodes = DiscoveryNodes.builder()
@@ -523,6 +539,7 @@ public void testLeaderBehaviour() {
equalTo("rejecting leader check from [" + otherNode + "] sent to a node that is no longer the cluster-manager")
);
}
+ assertEquals(Integer.valueOf(0), metricsRegistry.getCounterStore().get("leader.checker.failure.count").getCounterValue());
}
private class CapturingTransportResponseHandler implements TransportResponseHandler {
diff --git a/server/src/test/java/org/opensearch/cluster/coordination/NodeJoinTests.java b/server/src/test/java/org/opensearch/cluster/coordination/NodeJoinTests.java
index 10d5dceb74f55..f84f0326f4a9d 100644
--- a/server/src/test/java/org/opensearch/cluster/coordination/NodeJoinTests.java
+++ b/server/src/test/java/org/opensearch/cluster/coordination/NodeJoinTests.java
@@ -273,7 +273,8 @@ protected void onSendRequest(
ElectionStrategy.DEFAULT_INSTANCE,
nodeHealthService,
persistedStateRegistry,
- Mockito.mock(RemoteStoreNodeService.class)
+ Mockito.mock(RemoteStoreNodeService.class),
+ new ClusterManagerMetrics(NoopMetricsRegistry.INSTANCE)
);
transportService.start();
transportService.acceptIncomingRequests();
diff --git a/server/src/test/java/org/opensearch/cluster/routing/remote/RemoteRoutingTableServiceTests.java b/server/src/test/java/org/opensearch/cluster/routing/remote/RemoteRoutingTableServiceTests.java
new file mode 100644
index 0000000000000..9a9cbfa153259
--- /dev/null
+++ b/server/src/test/java/org/opensearch/cluster/routing/remote/RemoteRoutingTableServiceTests.java
@@ -0,0 +1,77 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.cluster.routing.remote;
+
+import org.opensearch.common.settings.Settings;
+import org.opensearch.common.util.FeatureFlags;
+import org.opensearch.repositories.FilterRepository;
+import org.opensearch.repositories.RepositoriesService;
+import org.opensearch.repositories.RepositoryMissingException;
+import org.opensearch.repositories.blobstore.BlobStoreRepository;
+import org.opensearch.test.OpenSearchTestCase;
+import org.junit.After;
+import org.junit.Before;
+
+import java.util.function.Supplier;
+
+import static org.opensearch.common.util.FeatureFlags.REMOTE_PUBLICATION_EXPERIMENTAL;
+import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_ROUTING_TABLE_REPOSITORY_NAME_ATTRIBUTE_KEY;
+import static org.mockito.Mockito.doThrow;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+public class RemoteRoutingTableServiceTests extends OpenSearchTestCase {
+
+ private RemoteRoutingTableService remoteRoutingTableService;
+ private Supplier repositoriesServiceSupplier;
+ private RepositoriesService repositoriesService;
+ private BlobStoreRepository blobStoreRepository;
+
+ @Before
+ public void setup() {
+ repositoriesServiceSupplier = mock(Supplier.class);
+ repositoriesService = mock(RepositoriesService.class);
+ when(repositoriesServiceSupplier.get()).thenReturn(repositoriesService);
+
+ Settings settings = Settings.builder()
+ .put("node.attr." + REMOTE_STORE_ROUTING_TABLE_REPOSITORY_NAME_ATTRIBUTE_KEY, "routing_repository")
+ .build();
+
+ blobStoreRepository = mock(BlobStoreRepository.class);
+ when(repositoriesService.repository("routing_repository")).thenReturn(blobStoreRepository);
+
+ Settings nodeSettings = Settings.builder().put(REMOTE_PUBLICATION_EXPERIMENTAL, "true").build();
+ FeatureFlags.initializeFeatureFlags(nodeSettings);
+
+ remoteRoutingTableService = new RemoteRoutingTableService(repositoriesServiceSupplier, settings);
+ }
+
+ @After
+ public void teardown() throws Exception {
+ super.tearDown();
+ remoteRoutingTableService.close();
+ }
+
+ public void testFailInitializationWhenRemoteRoutingDisabled() {
+ final Settings settings = Settings.builder().build();
+ assertThrows(AssertionError.class, () -> new RemoteRoutingTableService(repositoriesServiceSupplier, settings));
+ }
+
+ public void testFailStartWhenRepositoryNotSet() {
+ doThrow(new RepositoryMissingException("repository missing")).when(repositoriesService).repository("routing_repository");
+ assertThrows(RepositoryMissingException.class, () -> remoteRoutingTableService.start());
+ }
+
+ public void testFailStartWhenNotBlobRepository() {
+ final FilterRepository filterRepository = mock(FilterRepository.class);
+ when(repositoriesService.repository("routing_repository")).thenReturn(filterRepository);
+ assertThrows(AssertionError.class, () -> remoteRoutingTableService.start());
+ }
+
+}
diff --git a/server/src/test/java/org/opensearch/discovery/DiscoveryModuleTests.java b/server/src/test/java/org/opensearch/discovery/DiscoveryModuleTests.java
index b33ebf8333b36..5539b3237c2bf 100644
--- a/server/src/test/java/org/opensearch/discovery/DiscoveryModuleTests.java
+++ b/server/src/test/java/org/opensearch/discovery/DiscoveryModuleTests.java
@@ -32,6 +32,7 @@
package org.opensearch.discovery;
import org.opensearch.Version;
+import org.opensearch.cluster.ClusterManagerMetrics;
import org.opensearch.cluster.ClusterState;
import org.opensearch.cluster.coordination.Coordinator;
import org.opensearch.cluster.coordination.PersistedStateRegistry;
@@ -48,6 +49,7 @@
import org.opensearch.gateway.GatewayMetaState;
import org.opensearch.node.remotestore.RemoteStoreNodeService;
import org.opensearch.plugins.DiscoveryPlugin;
+import org.opensearch.telemetry.metrics.noop.NoopMetricsRegistry;
import org.opensearch.telemetry.tracing.noop.NoopTracer;
import org.opensearch.test.OpenSearchTestCase;
import org.opensearch.test.transport.MockTransportService;
@@ -128,7 +130,8 @@ private DiscoveryModule newModule(Settings settings, List plugi
mock(RerouteService.class),
null,
new PersistedStateRegistry(),
- remoteStoreNodeService
+ remoteStoreNodeService,
+ new ClusterManagerMetrics(NoopMetricsRegistry.INSTANCE)
);
}
diff --git a/server/src/test/java/org/opensearch/gateway/GatewayMetaStatePersistedStateTests.java b/server/src/test/java/org/opensearch/gateway/GatewayMetaStatePersistedStateTests.java
index 3ba98c44f8d3e..418e6d8de6adb 100644
--- a/server/src/test/java/org/opensearch/gateway/GatewayMetaStatePersistedStateTests.java
+++ b/server/src/test/java/org/opensearch/gateway/GatewayMetaStatePersistedStateTests.java
@@ -462,9 +462,7 @@ public void testDataOnlyNodePersistence() throws Exception {
});
when(transportService.getThreadPool()).thenReturn(threadPool);
ClusterService clusterService = mock(ClusterService.class);
- when(clusterService.getClusterSettings()).thenReturn(
- new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS)
- );
+ when(clusterService.getClusterSettings()).thenReturn(new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS));
final PersistedClusterStateService persistedClusterStateService = new PersistedClusterStateService(
nodeEnvironment,
xContentRegistry(),
@@ -487,7 +485,7 @@ public void testDataOnlyNodePersistence() throws Exception {
nodeEnvironment.nodeId(),
repositoriesServiceSupplier,
settings,
- clusterSettings,
+ clusterService,
() -> 0L,
threadPool,
List.of(new RemoteIndexPathUploader(threadPool, settings, repositoriesServiceSupplier, clusterSettings))
diff --git a/server/src/test/java/org/opensearch/gateway/remote/ClusterMetadataManifestTests.java b/server/src/test/java/org/opensearch/gateway/remote/ClusterMetadataManifestTests.java
index 0b3cd49140939..d1f559eb75f85 100644
--- a/server/src/test/java/org/opensearch/gateway/remote/ClusterMetadataManifestTests.java
+++ b/server/src/test/java/org/opensearch/gateway/remote/ClusterMetadataManifestTests.java
@@ -99,7 +99,7 @@ public void testClusterMetadataManifestXContent() throws IOException {
Version.CURRENT,
"test-node-id",
false,
- ClusterMetadataManifest.CODEC_V2,
+ ClusterMetadataManifest.CODEC_V3,
null,
Collections.singletonList(uploadedIndexMetadata),
"prev-cluster-uuid",
@@ -123,7 +123,9 @@ public void testClusterMetadataManifestXContent() throws IOException {
"custom--weighted_routing_netadata-file"
)
)
- ).stream().collect(Collectors.toMap(UploadedMetadataAttribute::getAttributeName, Function.identity()))
+ ).stream().collect(Collectors.toMap(UploadedMetadataAttribute::getAttributeName, Function.identity())),
+ 1L,
+ randomUploadedIndexMetadataList()
);
final XContentBuilder builder = JsonXContent.contentBuilder();
builder.startObject();
@@ -169,7 +171,9 @@ public void testClusterMetadataManifestSerializationEqualsHashCode() {
"custom--weighted_routing_netadata-file"
)
)
- ).stream().collect(Collectors.toMap(UploadedMetadataAttribute::getAttributeName, Function.identity()))
+ ).stream().collect(Collectors.toMap(UploadedMetadataAttribute::getAttributeName, Function.identity())),
+ 1L,
+ randomUploadedIndexMetadataList()
);
{ // Mutate Cluster Term
EqualsHashCodeTestUtils.checkEqualsAndHashCode(
@@ -309,6 +313,106 @@ public void testClusterMetadataManifestSerializationEqualsHashCode() {
}
}
+ public void testClusterMetadataManifestXContentV2() throws IOException {
+ UploadedIndexMetadata uploadedIndexMetadata = new UploadedIndexMetadata("test-index", "test-uuid", "/test/upload/path");
+ UploadedMetadataAttribute uploadedMetadataAttribute = new UploadedMetadataAttribute("attribute_name", "testing_attribute");
+ ClusterMetadataManifest originalManifest = new ClusterMetadataManifest(
+ 1L,
+ 1L,
+ "test-cluster-uuid",
+ "test-state-uuid",
+ Version.CURRENT,
+ "test-node-id",
+ false,
+ ClusterMetadataManifest.CODEC_V2,
+ null,
+ Collections.singletonList(uploadedIndexMetadata),
+ "prev-cluster-uuid",
+ true,
+ uploadedMetadataAttribute,
+ uploadedMetadataAttribute,
+ uploadedMetadataAttribute,
+ Collections.unmodifiableList(
+ Arrays.asList(
+ new UploadedMetadataAttribute(
+ RemoteClusterStateService.CUSTOM_METADATA + RemoteClusterStateService.CUSTOM_DELIMITER + RepositoriesMetadata.TYPE,
+ "custom--repositories-file"
+ ),
+ new UploadedMetadataAttribute(
+ RemoteClusterStateService.CUSTOM_METADATA + RemoteClusterStateService.CUSTOM_DELIMITER + IndexGraveyard.TYPE,
+ "custom--index_graveyard-file"
+ ),
+ new UploadedMetadataAttribute(
+ RemoteClusterStateService.CUSTOM_METADATA + RemoteClusterStateService.CUSTOM_DELIMITER
+ + WeightedRoutingMetadata.TYPE,
+ "custom--weighted_routing_netadata-file"
+ )
+ )
+ ).stream().collect(Collectors.toMap(UploadedMetadataAttribute::getAttributeName, Function.identity())),
+ 0,
+ new ArrayList<>()
+ );
+ final XContentBuilder builder = JsonXContent.contentBuilder();
+ builder.startObject();
+ originalManifest.toXContent(builder, ToXContent.EMPTY_PARAMS);
+ builder.endObject();
+
+ try (XContentParser parser = createParser(JsonXContent.jsonXContent, BytesReference.bytes(builder))) {
+ final ClusterMetadataManifest fromXContentManifest = ClusterMetadataManifest.fromXContentV2(parser);
+ assertEquals(originalManifest, fromXContentManifest);
+ }
+ }
+
+ public void testClusterMetadataManifestXContentV3() throws IOException {
+ UploadedIndexMetadata uploadedIndexMetadata = new UploadedIndexMetadata("test-index", "test-uuid", "/test/upload/path");
+ UploadedMetadataAttribute uploadedMetadataAttribute = new UploadedMetadataAttribute("attribute_name", "testing_attribute");
+ ClusterMetadataManifest originalManifest = new ClusterMetadataManifest(
+ 1L,
+ 1L,
+ "test-cluster-uuid",
+ "test-state-uuid",
+ Version.CURRENT,
+ "test-node-id",
+ false,
+ ClusterMetadataManifest.CODEC_V3,
+ null,
+ Collections.singletonList(uploadedIndexMetadata),
+ "prev-cluster-uuid",
+ true,
+ uploadedMetadataAttribute,
+ uploadedMetadataAttribute,
+ uploadedMetadataAttribute,
+ Collections.unmodifiableList(
+ Arrays.asList(
+ new UploadedMetadataAttribute(
+ RemoteClusterStateService.CUSTOM_METADATA + RemoteClusterStateService.CUSTOM_DELIMITER + RepositoriesMetadata.TYPE,
+ "custom--repositories-file"
+ ),
+ new UploadedMetadataAttribute(
+ RemoteClusterStateService.CUSTOM_METADATA + RemoteClusterStateService.CUSTOM_DELIMITER + IndexGraveyard.TYPE,
+ "custom--index_graveyard-file"
+ ),
+ new UploadedMetadataAttribute(
+ RemoteClusterStateService.CUSTOM_METADATA + RemoteClusterStateService.CUSTOM_DELIMITER
+ + WeightedRoutingMetadata.TYPE,
+ "custom--weighted_routing_netadata-file"
+ )
+ )
+ ).stream().collect(Collectors.toMap(UploadedMetadataAttribute::getAttributeName, Function.identity())),
+ 1L,
+ Collections.singletonList(uploadedIndexMetadata)
+ );
+ final XContentBuilder builder = JsonXContent.contentBuilder();
+ builder.startObject();
+ originalManifest.toXContent(builder, ToXContent.EMPTY_PARAMS);
+ builder.endObject();
+
+ try (XContentParser parser = createParser(JsonXContent.jsonXContent, BytesReference.bytes(builder))) {
+ final ClusterMetadataManifest fromXContentManifest = ClusterMetadataManifest.fromXContent(parser);
+ assertEquals(originalManifest, fromXContentManifest);
+ }
+ }
+
private List randomUploadedIndexMetadataList() {
final int size = randomIntBetween(1, 10);
final List uploadedIndexMetadataList = new ArrayList<>(size);
diff --git a/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateCleanupManagerTests.java b/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateCleanupManagerTests.java
new file mode 100644
index 0000000000000..24fd1b164a4ff
--- /dev/null
+++ b/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateCleanupManagerTests.java
@@ -0,0 +1,446 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.gateway.remote;
+
+import org.opensearch.cluster.ClusterName;
+import org.opensearch.cluster.ClusterState;
+import org.opensearch.cluster.metadata.Metadata;
+import org.opensearch.cluster.node.DiscoveryNodes;
+import org.opensearch.cluster.service.ClusterApplierService;
+import org.opensearch.cluster.service.ClusterService;
+import org.opensearch.common.blobstore.BlobContainer;
+import org.opensearch.common.blobstore.BlobMetadata;
+import org.opensearch.common.blobstore.BlobPath;
+import org.opensearch.common.blobstore.BlobStore;
+import org.opensearch.common.blobstore.support.PlainBlobMetadata;
+import org.opensearch.common.settings.ClusterSettings;
+import org.opensearch.common.settings.Settings;
+import org.opensearch.common.util.concurrent.AbstractAsyncTask;
+import org.opensearch.core.action.ActionListener;
+import org.opensearch.repositories.RepositoriesService;
+import org.opensearch.repositories.blobstore.BlobStoreRepository;
+import org.opensearch.repositories.fs.FsRepository;
+import org.opensearch.test.OpenSearchTestCase;
+import org.opensearch.test.VersionUtils;
+import org.opensearch.threadpool.TestThreadPool;
+import org.opensearch.threadpool.ThreadPool;
+import org.junit.After;
+import org.junit.Before;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Locale;
+import java.util.Set;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.function.Supplier;
+
+import static org.opensearch.gateway.remote.ClusterMetadataManifest.CODEC_V1;
+import static org.opensearch.gateway.remote.ClusterMetadataManifest.CODEC_V2;
+import static org.opensearch.gateway.remote.ClusterMetadataManifest.UploadedIndexMetadata;
+import static org.opensearch.gateway.remote.ClusterMetadataManifest.UploadedMetadataAttribute;
+import static org.opensearch.gateway.remote.RemoteClusterStateCleanupManager.AsyncStaleFileDeletion;
+import static org.opensearch.gateway.remote.RemoteClusterStateCleanupManager.CLUSTER_STATE_CLEANUP_INTERVAL_DEFAULT;
+import static org.opensearch.gateway.remote.RemoteClusterStateCleanupManager.REMOTE_CLUSTER_STATE_CLEANUP_INTERVAL_SETTING;
+import static org.opensearch.gateway.remote.RemoteClusterStateCleanupManager.RETAINED_MANIFESTS;
+import static org.opensearch.gateway.remote.RemoteClusterStateCleanupManager.SKIP_CLEANUP_STATE_CHANGES;
+import static org.opensearch.gateway.remote.RemoteClusterStateService.CLUSTER_STATE_PATH_TOKEN;
+import static org.opensearch.gateway.remote.RemoteClusterStateService.COORDINATION_METADATA;
+import static org.opensearch.gateway.remote.RemoteClusterStateService.DELIMITER;
+import static org.opensearch.gateway.remote.RemoteClusterStateService.GLOBAL_METADATA_PATH_TOKEN;
+import static org.opensearch.gateway.remote.RemoteClusterStateService.INDEX_PATH_TOKEN;
+import static org.opensearch.gateway.remote.RemoteClusterStateService.MANIFEST_FILE_PREFIX;
+import static org.opensearch.gateway.remote.RemoteClusterStateService.MANIFEST_PATH_TOKEN;
+import static org.opensearch.gateway.remote.RemoteClusterStateService.SETTING_METADATA;
+import static org.opensearch.gateway.remote.RemoteClusterStateService.TEMPLATES_METADATA;
+import static org.opensearch.gateway.remote.RemoteClusterStateService.encodeString;
+import static org.opensearch.gateway.remote.RemoteClusterStateServiceTests.generateClusterStateWithOneIndex;
+import static org.opensearch.gateway.remote.RemoteClusterStateServiceTests.nodesWithLocalNodeClusterManager;
+import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY;
+import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_REPOSITORY_SETTINGS_ATTRIBUTE_KEY_PREFIX;
+import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_REPOSITORY_TYPE_ATTRIBUTE_KEY_FORMAT;
+import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.ArgumentMatchers.anyInt;
+import static org.mockito.ArgumentMatchers.eq;
+import static org.mockito.Mockito.doAnswer;
+import static org.mockito.Mockito.doNothing;
+import static org.mockito.Mockito.doThrow;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.spy;
+import static org.mockito.Mockito.times;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.when;
+
+public class RemoteClusterStateCleanupManagerTests extends OpenSearchTestCase {
+ private RemoteClusterStateCleanupManager remoteClusterStateCleanupManager;
+ private Supplier repositoriesServiceSupplier;
+ private RepositoriesService repositoriesService;
+ private BlobStoreRepository blobStoreRepository;
+ private BlobStore blobStore;
+ private ClusterSettings clusterSettings;
+ private ClusterApplierService clusterApplierService;
+ private ClusterState clusterState;
+ private Metadata metadata;
+ private RemoteClusterStateService remoteClusterStateService;
+ private final ThreadPool threadPool = new TestThreadPool(getClass().getName());
+
+ @Before
+ public void setup() {
+ repositoriesServiceSupplier = mock(Supplier.class);
+ repositoriesService = mock(RepositoriesService.class);
+ when(repositoriesServiceSupplier.get()).thenReturn(repositoriesService);
+
+ String stateRepoTypeAttributeKey = String.format(
+ Locale.getDefault(),
+ "node.attr." + REMOTE_STORE_REPOSITORY_TYPE_ATTRIBUTE_KEY_FORMAT,
+ "remote_store_repository"
+ );
+ String stateRepoSettingsAttributeKeyPrefix = String.format(
+ Locale.getDefault(),
+ "node.attr." + REMOTE_STORE_REPOSITORY_SETTINGS_ATTRIBUTE_KEY_PREFIX,
+ "remote_store_repository"
+ );
+
+ Settings settings = Settings.builder()
+ .put("node.attr." + REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY, "remote_store_repository")
+ .put(stateRepoTypeAttributeKey, FsRepository.TYPE)
+ .put(stateRepoSettingsAttributeKeyPrefix + "location", "randomRepoPath")
+ .put(RemoteClusterStateService.REMOTE_CLUSTER_STATE_ENABLED_SETTING.getKey(), true)
+ .build();
+
+ clusterSettings = new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS);
+ clusterApplierService = mock(ClusterApplierService.class);
+ clusterState = mock(ClusterState.class);
+ metadata = mock(Metadata.class);
+ ClusterService clusterService = mock(ClusterService.class);
+ when(clusterService.getClusterSettings()).thenReturn(clusterSettings);
+ when(clusterState.getClusterName()).thenReturn(new ClusterName("test"));
+ when(metadata.clusterUUID()).thenReturn("testUUID");
+ when(clusterState.metadata()).thenReturn(metadata);
+ when(clusterApplierService.state()).thenReturn(clusterState);
+ when(clusterService.getClusterApplierService()).thenReturn(clusterApplierService);
+
+ blobStoreRepository = mock(BlobStoreRepository.class);
+ blobStore = mock(BlobStore.class);
+ when(blobStoreRepository.blobStore()).thenReturn(blobStore);
+ when(repositoriesService.repository("remote_store_repository")).thenReturn(blobStoreRepository);
+
+ remoteClusterStateService = mock(RemoteClusterStateService.class);
+ when(remoteClusterStateService.getStats()).thenReturn(new RemotePersistenceStats());
+ when(remoteClusterStateService.getThreadpool()).thenReturn(threadPool);
+ when(remoteClusterStateService.getBlobStore()).thenReturn(blobStore);
+ remoteClusterStateCleanupManager = new RemoteClusterStateCleanupManager(remoteClusterStateService, clusterService);
+ }
+
+ @After
+ public void teardown() throws Exception {
+ super.tearDown();
+ remoteClusterStateCleanupManager.close();
+ threadPool.shutdown();
+ }
+
+ public void testDeleteClusterMetadata() throws IOException {
+ String clusterUUID = "clusterUUID";
+ String clusterName = "test-cluster";
+ List inactiveBlobs = Arrays.asList(
+ new PlainBlobMetadata("manifest1.dat", 1L),
+ new PlainBlobMetadata("manifest2.dat", 1L),
+ new PlainBlobMetadata("manifest3.dat", 1L)
+ );
+ List activeBlobs = Arrays.asList(
+ new PlainBlobMetadata("manifest4.dat", 1L),
+ new PlainBlobMetadata("manifest5.dat", 1L)
+ );
+ UploadedIndexMetadata index1Metadata = new UploadedIndexMetadata("index1", "indexUUID1", "index_metadata1");
+ UploadedIndexMetadata index2Metadata = new UploadedIndexMetadata("index2", "indexUUID2", "index_metadata2");
+ UploadedIndexMetadata index1UpdatedMetadata = new UploadedIndexMetadata("index1", "indexUUID1", "index_metadata1_updated");
+ UploadedMetadataAttribute coordinationMetadata = new UploadedMetadataAttribute(COORDINATION_METADATA, "coordination_metadata");
+ UploadedMetadataAttribute templateMetadata = new UploadedMetadataAttribute(TEMPLATES_METADATA, "template_metadata");
+ UploadedMetadataAttribute settingMetadata = new UploadedMetadataAttribute(SETTING_METADATA, "settings_metadata");
+ UploadedMetadataAttribute coordinationMetadataUpdated = new UploadedMetadataAttribute(
+ COORDINATION_METADATA,
+ "coordination_metadata_updated"
+ );
+ UploadedMetadataAttribute templateMetadataUpdated = new UploadedMetadataAttribute(TEMPLATES_METADATA, "template_metadata_updated");
+ UploadedMetadataAttribute settingMetadataUpdated = new UploadedMetadataAttribute(SETTING_METADATA, "settings_metadata_updated");
+ ClusterMetadataManifest manifest1 = ClusterMetadataManifest.builder()
+ .indices(List.of(index1Metadata))
+ .globalMetadataFileName("global_metadata")
+ .clusterTerm(1L)
+ .stateVersion(1L)
+ .codecVersion(CODEC_V1)
+ .stateUUID(randomAlphaOfLength(10))
+ .clusterUUID(clusterUUID)
+ .nodeId("nodeA")
+ .opensearchVersion(VersionUtils.randomOpenSearchVersion(random()))
+ .previousClusterUUID(ClusterState.UNKNOWN_UUID)
+ .committed(true)
+ .build();
+ ClusterMetadataManifest manifest2 = ClusterMetadataManifest.builder(manifest1)
+ .indices(List.of(index1Metadata, index2Metadata))
+ .codecVersion(CODEC_V2)
+ .globalMetadataFileName(null)
+ .coordinationMetadata(coordinationMetadata)
+ .templatesMetadata(templateMetadata)
+ .settingMetadata(settingMetadata)
+ .build();
+ ClusterMetadataManifest manifest3 = ClusterMetadataManifest.builder(manifest2)
+ .indices(List.of(index1UpdatedMetadata, index2Metadata))
+ .settingMetadata(settingMetadataUpdated)
+ .build();
+
+ // active manifest have reference to index1Updated, index2, settingsUpdated, coordinationUpdated, templates, templatesUpdated
+ ClusterMetadataManifest manifest4 = ClusterMetadataManifest.builder(manifest3)
+ .coordinationMetadata(coordinationMetadataUpdated)
+ .build();
+ ClusterMetadataManifest manifest5 = ClusterMetadataManifest.builder(manifest4).templatesMetadata(templateMetadataUpdated).build();
+
+ when(remoteClusterStateService.fetchRemoteClusterMetadataManifest(eq(clusterName), eq(clusterUUID), any())).thenReturn(
+ manifest4,
+ manifest5,
+ manifest1,
+ manifest2,
+ manifest3
+ );
+ BlobContainer container = mock(BlobContainer.class);
+ when(blobStore.blobContainer(any())).thenReturn(container);
+ doNothing().when(container).deleteBlobsIgnoringIfNotExists(any());
+
+ remoteClusterStateCleanupManager.deleteClusterMetadata(clusterName, clusterUUID, activeBlobs, inactiveBlobs);
+ verify(container).deleteBlobsIgnoringIfNotExists(
+ List.of(
+ new BlobPath().add(GLOBAL_METADATA_PATH_TOKEN).buildAsString() + coordinationMetadata.getUploadedFilename() + ".dat",
+ new BlobPath().add(GLOBAL_METADATA_PATH_TOKEN).buildAsString() + settingMetadata.getUploadedFilename() + ".dat",
+ new BlobPath().add(GLOBAL_METADATA_PATH_TOKEN).buildAsString() + "global_metadata.dat"
+ )
+ );
+ verify(container).deleteBlobsIgnoringIfNotExists(
+ List.of(
+ new BlobPath().add(INDEX_PATH_TOKEN).add(index1Metadata.getIndexUUID()).buildAsString()
+ + index1Metadata.getUploadedFilePath()
+ + ".dat"
+ )
+ );
+ Set staleManifest = new HashSet<>();
+ inactiveBlobs.forEach(blob -> staleManifest.add(new BlobPath().add(MANIFEST_PATH_TOKEN).buildAsString() + blob.name()));
+ verify(container).deleteBlobsIgnoringIfNotExists(new ArrayList<>(staleManifest));
+ }
+
+ public void testDeleteStaleClusterUUIDs() throws IOException {
+ final ClusterState clusterState = generateClusterStateWithOneIndex().nodes(nodesWithLocalNodeClusterManager()).build();
+ ClusterMetadataManifest clusterMetadataManifest = ClusterMetadataManifest.builder()
+ .indices(List.of())
+ .clusterTerm(1L)
+ .stateVersion(1L)
+ .stateUUID(randomAlphaOfLength(10))
+ .clusterUUID("cluster-uuid1")
+ .nodeId("nodeA")
+ .opensearchVersion(VersionUtils.randomOpenSearchVersion(random()))
+ .previousClusterUUID(ClusterState.UNKNOWN_UUID)
+ .committed(true)
+ .build();
+
+ BlobPath blobPath = new BlobPath().add("random-path");
+ BlobContainer uuidContainerContainer = mock(BlobContainer.class);
+ BlobContainer manifest2Container = mock(BlobContainer.class);
+ BlobContainer manifest3Container = mock(BlobContainer.class);
+ when(blobStore.blobContainer(any())).then(invocation -> {
+ BlobPath blobPath1 = invocation.getArgument(0);
+ if (blobPath1.buildAsString().endsWith("cluster-state/")) {
+ return uuidContainerContainer;
+ } else if (blobPath1.buildAsString().contains("cluster-state/cluster-uuid2/")) {
+ return manifest2Container;
+ } else if (blobPath1.buildAsString().contains("cluster-state/cluster-uuid3/")) {
+ return manifest3Container;
+ } else {
+ throw new IllegalArgumentException("Unexpected blob path " + blobPath1);
+ }
+ });
+ when(
+ manifest2Container.listBlobsByPrefixInSortedOrder(
+ MANIFEST_FILE_PREFIX + DELIMITER,
+ Integer.MAX_VALUE,
+ BlobContainer.BlobNameSortOrder.LEXICOGRAPHIC
+ )
+ ).thenReturn(List.of(new PlainBlobMetadata("mainfest2", 1L)));
+ when(
+ manifest3Container.listBlobsByPrefixInSortedOrder(
+ MANIFEST_FILE_PREFIX + DELIMITER,
+ Integer.MAX_VALUE,
+ BlobContainer.BlobNameSortOrder.LEXICOGRAPHIC
+ )
+ ).thenReturn(List.of(new PlainBlobMetadata("mainfest3", 1L)));
+ Set uuids = new HashSet<>(Arrays.asList("cluster-uuid1", "cluster-uuid2", "cluster-uuid3"));
+ when(remoteClusterStateService.getAllClusterUUIDs(any())).thenReturn(uuids);
+ when(remoteClusterStateService.getCusterMetadataBasePath(any(), any())).then(
+ invocationOnMock -> blobPath.add(encodeString(invocationOnMock.getArgument(0)))
+ .add(CLUSTER_STATE_PATH_TOKEN)
+ .add((String) invocationOnMock.getArgument(1))
+ );
+ remoteClusterStateCleanupManager.start();
+ remoteClusterStateCleanupManager.deleteStaleClusterUUIDs(clusterState, clusterMetadataManifest);
+ try {
+ assertBusy(() -> {
+ verify(manifest2Container, times(1)).delete();
+ verify(manifest3Container, times(1)).delete();
+ });
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ public void testRemoteStateCleanupFailureStats() throws IOException {
+ BlobContainer blobContainer = mock(BlobContainer.class);
+ doThrow(IOException.class).when(blobContainer).delete();
+ when(blobStore.blobContainer(any())).thenReturn(blobContainer);
+ BlobPath blobPath = new BlobPath().add("random-path");
+ when((blobStoreRepository.basePath())).thenReturn(blobPath);
+ remoteClusterStateCleanupManager.start();
+ remoteClusterStateCleanupManager.deleteStaleUUIDsClusterMetadata("cluster1", List.of("cluster-uuid1"));
+ try {
+ assertBusy(() -> {
+ // wait for stats to get updated
+ assertNotNull(remoteClusterStateCleanupManager.getStats());
+ assertEquals(0, remoteClusterStateCleanupManager.getStats().getSuccessCount());
+ assertEquals(1, remoteClusterStateCleanupManager.getStats().getCleanupAttemptFailedCount());
+ });
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ public void testSingleConcurrentExecutionOfStaleManifestCleanup() throws Exception {
+ BlobContainer blobContainer = mock(BlobContainer.class);
+ when(blobStore.blobContainer(any())).thenReturn(blobContainer);
+
+ CountDownLatch latch = new CountDownLatch(1);
+ AtomicInteger callCount = new AtomicInteger(0);
+ doAnswer(invocation -> {
+ callCount.incrementAndGet();
+ if (latch.await(5000, TimeUnit.SECONDS) == false) {
+ throw new Exception("Timed out waiting for delete task queuing to complete");
+ }
+ return null;
+ }).when(blobContainer)
+ .listBlobsByPrefixInSortedOrder(
+ any(String.class),
+ any(int.class),
+ any(BlobContainer.BlobNameSortOrder.class),
+ any(ActionListener.class)
+ );
+
+ remoteClusterStateCleanupManager.start();
+ remoteClusterStateCleanupManager.deleteStaleClusterMetadata("cluster-name", "cluster-uuid", RETAINED_MANIFESTS);
+ remoteClusterStateCleanupManager.deleteStaleClusterMetadata("cluster-name", "cluster-uuid", RETAINED_MANIFESTS);
+
+ latch.countDown();
+ assertBusy(() -> assertEquals(1, callCount.get()));
+ }
+
+ public void testRemoteClusterStateCleanupSetting() {
+ remoteClusterStateCleanupManager.start();
+ // verify default value
+ assertEquals(CLUSTER_STATE_CLEANUP_INTERVAL_DEFAULT, remoteClusterStateCleanupManager.getStaleFileCleanupInterval());
+
+ // verify update interval
+ int cleanupInterval = randomIntBetween(1, 10);
+ Settings newSettings = Settings.builder().put("cluster.remote_store.state.cleanup_interval", cleanupInterval + "s").build();
+ clusterSettings.applySettings(newSettings);
+ assertEquals(cleanupInterval, remoteClusterStateCleanupManager.getStaleFileCleanupInterval().seconds());
+ }
+
+ public void testRemoteCleanupTaskScheduled() {
+ AbstractAsyncTask cleanupTask = remoteClusterStateCleanupManager.getStaleFileDeletionTask();
+ assertNull(cleanupTask);
+ // now the task should be initialized
+ remoteClusterStateCleanupManager.start();
+ assertNotNull(remoteClusterStateCleanupManager.getStaleFileDeletionTask());
+ assertTrue(remoteClusterStateCleanupManager.getStaleFileDeletionTask().mustReschedule());
+ assertEquals(
+ clusterSettings.get(REMOTE_CLUSTER_STATE_CLEANUP_INTERVAL_SETTING),
+ remoteClusterStateCleanupManager.getStaleFileDeletionTask().getInterval()
+ );
+ assertTrue(remoteClusterStateCleanupManager.getStaleFileDeletionTask().isScheduled());
+ assertFalse(remoteClusterStateCleanupManager.getStaleFileDeletionTask().isClosed());
+ }
+
+ public void testRemoteCleanupSkipsOnOnlyElectedClusterManager() {
+ DiscoveryNodes nodes = mock(DiscoveryNodes.class);
+ when(nodes.isLocalNodeElectedClusterManager()).thenReturn(false);
+ when(clusterState.nodes()).thenReturn(nodes);
+ RemoteClusterStateCleanupManager spyManager = spy(remoteClusterStateCleanupManager);
+ AtomicInteger callCount = new AtomicInteger(0);
+ doAnswer(invocation -> callCount.incrementAndGet()).when(spyManager).deleteStaleClusterMetadata(any(), any(), anyInt());
+ spyManager.cleanUpStaleFiles();
+ assertEquals(0, callCount.get());
+
+ when(nodes.isLocalNodeElectedClusterManager()).thenReturn(true);
+ when(clusterState.version()).thenReturn(randomLongBetween(11, 20));
+ spyManager.cleanUpStaleFiles();
+ assertEquals(1, callCount.get());
+ }
+
+ public void testRemoteCleanupSkipsIfVersionIncrementLessThanThreshold() {
+ DiscoveryNodes nodes = mock(DiscoveryNodes.class);
+ long version = randomLongBetween(1, SKIP_CLEANUP_STATE_CHANGES);
+ when(clusterApplierService.state()).thenReturn(clusterState);
+ when(nodes.isLocalNodeElectedClusterManager()).thenReturn(true);
+ when(clusterState.nodes()).thenReturn(nodes);
+ when(clusterState.version()).thenReturn(version);
+
+ RemoteClusterStateCleanupManager spyManager = spy(remoteClusterStateCleanupManager);
+ AtomicInteger callCount = new AtomicInteger(0);
+ doAnswer(invocation -> callCount.incrementAndGet()).when(spyManager).deleteStaleClusterMetadata(any(), any(), anyInt());
+
+ remoteClusterStateCleanupManager.cleanUpStaleFiles();
+ assertEquals(0, callCount.get());
+ }
+
+ public void testRemoteCleanupCallsDeleteIfVersionIncrementGreaterThanThreshold() {
+ DiscoveryNodes nodes = mock(DiscoveryNodes.class);
+ long version = randomLongBetween(SKIP_CLEANUP_STATE_CHANGES + 1, SKIP_CLEANUP_STATE_CHANGES + 10);
+ when(clusterApplierService.state()).thenReturn(clusterState);
+ when(nodes.isLocalNodeElectedClusterManager()).thenReturn(true);
+ when(clusterState.nodes()).thenReturn(nodes);
+ when(clusterState.version()).thenReturn(version);
+
+ RemoteClusterStateCleanupManager spyManager = spy(remoteClusterStateCleanupManager);
+ AtomicInteger callCount = new AtomicInteger(0);
+ doAnswer(invocation -> callCount.incrementAndGet()).when(spyManager).deleteStaleClusterMetadata(any(), any(), anyInt());
+
+ // using spied cleanup manager so that stubbed deleteStaleClusterMetadata is called
+ spyManager.cleanUpStaleFiles();
+ assertEquals(1, callCount.get());
+ }
+
+ public void testRemoteCleanupSchedulesEvenAfterFailure() {
+ remoteClusterStateCleanupManager.start();
+ RemoteClusterStateCleanupManager spyManager = spy(remoteClusterStateCleanupManager);
+ AtomicInteger callCount = new AtomicInteger(0);
+ doAnswer(invocationOnMock -> {
+ callCount.incrementAndGet();
+ throw new RuntimeException("Test exception");
+ }).when(spyManager).cleanUpStaleFiles();
+ AsyncStaleFileDeletion task = new AsyncStaleFileDeletion(spyManager);
+ assertTrue(task.isScheduled());
+ task.run();
+ // Task is still scheduled after the failure
+ assertTrue(task.isScheduled());
+ assertEquals(1, callCount.get());
+
+ task.run();
+ // Task is still scheduled after the failure
+ assertTrue(task.isScheduled());
+ assertEquals(2, callCount.get());
+ }
+}
diff --git a/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java b/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java
index 1b242b921c0d7..4a53770c76d88 100644
--- a/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java
+++ b/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java
@@ -19,6 +19,7 @@
import org.opensearch.cluster.metadata.Metadata;
import org.opensearch.cluster.metadata.TemplatesMetadata;
import org.opensearch.cluster.node.DiscoveryNodes;
+import org.opensearch.cluster.service.ClusterService;
import org.opensearch.common.blobstore.AsyncMultiStreamBlobContainer;
import org.opensearch.common.blobstore.BlobContainer;
import org.opensearch.common.blobstore.BlobMetadata;
@@ -33,6 +34,7 @@
import org.opensearch.common.network.NetworkModule;
import org.opensearch.common.settings.ClusterSettings;
import org.opensearch.common.settings.Settings;
+import org.opensearch.common.util.FeatureFlags;
import org.opensearch.core.ParseField;
import org.opensearch.core.action.ActionListener;
import org.opensearch.core.common.bytes.BytesArray;
@@ -72,9 +74,6 @@
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
-import java.util.concurrent.CountDownLatch;
-import java.util.concurrent.TimeUnit;
-import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.BiConsumer;
import java.util.function.Function;
import java.util.function.Supplier;
@@ -86,6 +85,7 @@
import org.mockito.ArgumentMatchers;
import static java.util.stream.Collectors.toList;
+import static org.opensearch.common.util.FeatureFlags.REMOTE_PUBLICATION_EXPERIMENTAL;
import static org.opensearch.gateway.remote.RemoteClusterStateService.COORDINATION_METADATA;
import static org.opensearch.gateway.remote.RemoteClusterStateService.DELIMITER;
import static org.opensearch.gateway.remote.RemoteClusterStateService.FORMAT_PARAMS;
@@ -93,12 +93,12 @@
import static org.opensearch.gateway.remote.RemoteClusterStateService.MANIFEST_CURRENT_CODEC_VERSION;
import static org.opensearch.gateway.remote.RemoteClusterStateService.MANIFEST_FILE_PREFIX;
import static org.opensearch.gateway.remote.RemoteClusterStateService.METADATA_FILE_PREFIX;
-import static org.opensearch.gateway.remote.RemoteClusterStateService.RETAINED_MANIFESTS;
import static org.opensearch.gateway.remote.RemoteClusterStateService.SETTING_METADATA;
import static org.opensearch.gateway.remote.RemoteClusterStateService.TEMPLATES_METADATA;
import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY;
import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_REPOSITORY_SETTINGS_ATTRIBUTE_KEY_PREFIX;
import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_REPOSITORY_TYPE_ATTRIBUTE_KEY_FORMAT;
+import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_ROUTING_TABLE_REPOSITORY_NAME_ATTRIBUTE_KEY;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.is;
import static org.hamcrest.Matchers.not;
@@ -109,13 +109,12 @@
import static org.mockito.Mockito.doAnswer;
import static org.mockito.Mockito.doThrow;
import static org.mockito.Mockito.mock;
-import static org.mockito.Mockito.times;
-import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;
public class RemoteClusterStateServiceTests extends OpenSearchTestCase {
private RemoteClusterStateService remoteClusterStateService;
+ private ClusterService clusterService;
private ClusterSettings clusterSettings;
private Supplier repositoriesServiceSupplier;
private RepositoriesService repositoriesService;
@@ -148,6 +147,8 @@ public void setup() {
.build();
clusterSettings = new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS);
+ clusterService = mock(ClusterService.class);
+ when(clusterService.getClusterSettings()).thenReturn(clusterSettings);
NamedXContentRegistry xContentRegistry = new NamedXContentRegistry(
Stream.of(
NetworkModule.getNamedXContents().stream(),
@@ -165,7 +166,7 @@ public void setup() {
"test-node-id",
repositoriesServiceSupplier,
settings,
- clusterSettings,
+ clusterService,
() -> 0L,
threadPool,
List.of(new RemoteIndexPathUploader(threadPool, settings, repositoriesServiceSupplier, clusterSettings))
@@ -187,14 +188,14 @@ public void testFailWriteFullMetadataNonClusterManagerNode() throws IOException
public void testFailInitializationWhenRemoteStateDisabled() {
final Settings settings = Settings.builder().build();
- ClusterSettings clusterSettings = new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS);
+ when(clusterService.getClusterSettings()).thenReturn(new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS));
assertThrows(
AssertionError.class,
() -> new RemoteClusterStateService(
"test-node-id",
repositoriesServiceSupplier,
settings,
- clusterSettings,
+ clusterService,
() -> 0L,
threadPool,
List.of(new RemoteIndexPathUploader(threadPool, settings, repositoriesServiceSupplier, clusterSettings))
@@ -547,7 +548,7 @@ private void verifyWriteIncrementalGlobalMetadataFromOlderCodecSuccess(ClusterMe
);
final ClusterMetadataManifest expectedManifest = ClusterMetadataManifest.builder()
- .codecVersion(2)
+ .codecVersion(3)
.indices(Collections.emptyList())
.clusterTerm(1L)
.stateVersion(1L)
@@ -1071,6 +1072,8 @@ public void testReadGlobalMetadata() throws IOException {
.nodeId("nodeA")
.opensearchVersion(VersionUtils.randomOpenSearchVersion(random()))
.previousClusterUUID("prev-cluster-uuid")
+ .routingTableVersion(1)
+ .indicesRouting(List.of())
.build();
Metadata expactedMetadata = Metadata.builder().persistentSettings(Settings.builder().put("readonly", true).build()).build();
@@ -1280,72 +1283,6 @@ public void testGetValidPreviousClusterUUIDWhenLastUUIDUncommitted() throws IOEx
assertThat(previousClusterUUID, equalTo("cluster-uuid2"));
}
- public void testDeleteStaleClusterUUIDs() throws IOException {
- final ClusterState clusterState = generateClusterStateWithOneIndex().nodes(nodesWithLocalNodeClusterManager()).build();
- ClusterMetadataManifest clusterMetadataManifest = ClusterMetadataManifest.builder()
- .indices(List.of())
- .clusterTerm(1L)
- .stateVersion(1L)
- .stateUUID(randomAlphaOfLength(10))
- .clusterUUID("cluster-uuid1")
- .nodeId("nodeA")
- .opensearchVersion(VersionUtils.randomOpenSearchVersion(random()))
- .previousClusterUUID(ClusterState.UNKNOWN_UUID)
- .committed(true)
- .build();
-
- BlobPath blobPath = new BlobPath().add("random-path");
- when((blobStoreRepository.basePath())).thenReturn(blobPath);
- BlobContainer uuidContainerContainer = mock(BlobContainer.class);
- BlobContainer manifest2Container = mock(BlobContainer.class);
- BlobContainer manifest3Container = mock(BlobContainer.class);
- when(blobStore.blobContainer(any())).then(invocation -> {
- BlobPath blobPath1 = invocation.getArgument(0);
- if (blobPath1.buildAsString().endsWith("cluster-state/")) {
- return uuidContainerContainer;
- } else if (blobPath1.buildAsString().contains("cluster-state/cluster-uuid2/")) {
- return manifest2Container;
- } else if (blobPath1.buildAsString().contains("cluster-state/cluster-uuid3/")) {
- return manifest3Container;
- } else {
- throw new IllegalArgumentException("Unexpected blob path " + blobPath1);
- }
- });
- Map blobMetadataMap = Map.of(
- "cluster-uuid1",
- mock(BlobContainer.class),
- "cluster-uuid2",
- mock(BlobContainer.class),
- "cluster-uuid3",
- mock(BlobContainer.class)
- );
- when(uuidContainerContainer.children()).thenReturn(blobMetadataMap);
- when(
- manifest2Container.listBlobsByPrefixInSortedOrder(
- MANIFEST_FILE_PREFIX + DELIMITER,
- Integer.MAX_VALUE,
- BlobContainer.BlobNameSortOrder.LEXICOGRAPHIC
- )
- ).thenReturn(List.of(new PlainBlobMetadata("mainfest2", 1L)));
- when(
- manifest3Container.listBlobsByPrefixInSortedOrder(
- MANIFEST_FILE_PREFIX + DELIMITER,
- Integer.MAX_VALUE,
- BlobContainer.BlobNameSortOrder.LEXICOGRAPHIC
- )
- ).thenReturn(List.of(new PlainBlobMetadata("mainfest3", 1L)));
- remoteClusterStateService.start();
- remoteClusterStateService.deleteStaleClusterUUIDs(clusterState, clusterMetadataManifest);
- try {
- assertBusy(() -> {
- verify(manifest2Container, times(1)).delete();
- verify(manifest3Container, times(1)).delete();
- });
- } catch (Exception e) {
- throw new RuntimeException(e);
- }
- }
-
public void testRemoteStateStats() throws IOException {
final ClusterState clusterState = generateClusterStateWithOneIndex().nodes(nodesWithLocalNodeClusterManager()).build();
mockBlobStoreObjects();
@@ -1358,26 +1295,6 @@ public void testRemoteStateStats() throws IOException {
assertEquals(0, remoteClusterStateService.getStats().getFailedCount());
}
- public void testRemoteStateCleanupFailureStats() throws IOException {
- BlobContainer blobContainer = mock(BlobContainer.class);
- doThrow(IOException.class).when(blobContainer).delete();
- when(blobStore.blobContainer(any())).thenReturn(blobContainer);
- BlobPath blobPath = new BlobPath().add("random-path");
- when((blobStoreRepository.basePath())).thenReturn(blobPath);
- remoteClusterStateService.start();
- remoteClusterStateService.deleteStaleUUIDsClusterMetadata("cluster1", Arrays.asList("cluster-uuid1"));
- try {
- assertBusy(() -> {
- // wait for stats to get updated
- assertTrue(remoteClusterStateService.getStats() != null);
- assertEquals(0, remoteClusterStateService.getStats().getSuccessCount());
- assertEquals(1, remoteClusterStateService.getStats().getCleanupAttemptFailedCount());
- });
- } catch (Exception e) {
- throw new RuntimeException(e);
- }
- }
-
public void testFileNames() {
final Index index = new Index("test-index", "index-uuid");
final Settings idxSettings = Settings.builder()
@@ -1418,36 +1335,6 @@ private void verifyManifestFileNameWithCodec(int codecVersion) {
assertThat(splittedName[3], is("P"));
}
- public void testSingleConcurrentExecutionOfStaleManifestCleanup() throws Exception {
- BlobContainer blobContainer = mock(BlobContainer.class);
- BlobPath blobPath = new BlobPath().add("random-path");
- when((blobStoreRepository.basePath())).thenReturn(blobPath);
- when(blobStore.blobContainer(any())).thenReturn(blobContainer);
-
- CountDownLatch latch = new CountDownLatch(1);
- AtomicInteger callCount = new AtomicInteger(0);
- doAnswer(invocation -> {
- callCount.incrementAndGet();
- if (latch.await(5000, TimeUnit.SECONDS) == false) {
- throw new Exception("Timed out waiting for delete task queuing to complete");
- }
- return null;
- }).when(blobContainer)
- .listBlobsByPrefixInSortedOrder(
- any(String.class),
- any(int.class),
- any(BlobContainer.BlobNameSortOrder.class),
- any(ActionListener.class)
- );
-
- remoteClusterStateService.start();
- remoteClusterStateService.deleteStaleClusterMetadata("cluster-name", "cluster-uuid", RETAINED_MANIFESTS);
- remoteClusterStateService.deleteStaleClusterMetadata("cluster-name", "cluster-uuid", RETAINED_MANIFESTS);
-
- latch.countDown();
- assertBusy(() -> assertEquals(1, callCount.get()));
- }
-
public void testIndexMetadataUploadWaitTimeSetting() {
// verify default value
assertEquals(
@@ -1496,6 +1383,33 @@ public void testGlobalMetadataUploadWaitTimeSetting() {
assertEquals(globalMetadataUploadTimeout, remoteClusterStateService.getGlobalMetadataUploadTimeout().seconds());
}
+ public void testRemoteRoutingTableNotInitializedWhenDisabled() {
+ assertFalse(remoteClusterStateService.getRemoteRoutingTableService().isPresent());
+ }
+
+ public void testRemoteRoutingTableInitializedWhenEnabled() {
+ Settings newSettings = Settings.builder()
+ .put("node.attr." + REMOTE_STORE_ROUTING_TABLE_REPOSITORY_NAME_ATTRIBUTE_KEY, "routing_repository")
+ .put("node.attr." + REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY, "remote_store_repository")
+ .put(RemoteClusterStateService.REMOTE_CLUSTER_STATE_ENABLED_SETTING.getKey(), true)
+ .build();
+ clusterSettings.applySettings(newSettings);
+
+ Settings nodeSettings = Settings.builder().put(REMOTE_PUBLICATION_EXPERIMENTAL, "true").build();
+ FeatureFlags.initializeFeatureFlags(nodeSettings);
+
+ remoteClusterStateService = new RemoteClusterStateService(
+ "test-node-id",
+ repositoriesServiceSupplier,
+ newSettings,
+ clusterService,
+ () -> 0L,
+ threadPool,
+ List.of(new RemoteIndexPathUploader(threadPool, newSettings, repositoriesServiceSupplier, clusterSettings))
+ );
+ assertTrue(remoteClusterStateService.getRemoteRoutingTableService().isPresent());
+ }
+
private void mockObjectsForGettingPreviousClusterUUID(Map clusterUUIDsPointers) throws IOException {
mockObjectsForGettingPreviousClusterUUID(clusterUUIDsPointers, false, Collections.emptyMap());
}
@@ -1557,7 +1471,7 @@ private void mockObjectsForGettingPreviousClusterUUID(
.build();
Map indexMetadataMap1 = Map.of("index-uuid1", indexMetadata1, "index-uuid2", indexMetadata2);
mockBlobContainerForGlobalMetadata(blobContainer1, clusterManifest1, metadata1);
- mockBlobContainer(blobContainer1, clusterManifest1, indexMetadataMap1, ClusterMetadataManifest.CODEC_V2);
+ mockBlobContainer(blobContainer1, clusterManifest1, indexMetadataMap1, ClusterMetadataManifest.CODEC_V3);
List uploadedIndexMetadataList2 = List.of(
new UploadedIndexMetadata("index1", "index-uuid1", "key1"),
@@ -1589,7 +1503,7 @@ private void mockObjectsForGettingPreviousClusterUUID(
.build();
Map indexMetadataMap2 = Map.of("index-uuid1", indexMetadata3, "index-uuid2", indexMetadata4);
mockBlobContainerForGlobalMetadata(blobContainer2, clusterManifest2, metadata2);
- mockBlobContainer(blobContainer2, clusterManifest2, indexMetadataMap2, ClusterMetadataManifest.CODEC_V2);
+ mockBlobContainer(blobContainer2, clusterManifest2, indexMetadataMap2, ClusterMetadataManifest.CODEC_V3);
// differGlobalMetadata controls which one of IndexMetadata or Metadata object would be different
// when comparing cluster-uuid3 and cluster-uuid1 state.
@@ -1623,7 +1537,7 @@ private void mockObjectsForGettingPreviousClusterUUID(
clusterUUIDCommitted.getOrDefault("cluster-uuid3", true)
);
mockBlobContainerForGlobalMetadata(blobContainer3, clusterManifest3, metadata3);
- mockBlobContainer(blobContainer3, clusterManifest3, indexMetadataMap3, ClusterMetadataManifest.CODEC_V2);
+ mockBlobContainer(blobContainer3, clusterManifest3, indexMetadataMap3, ClusterMetadataManifest.CODEC_V3);
ArrayList mockBlobContainerOrderedList = new ArrayList<>(
List.of(blobContainer1, blobContainer1, blobContainer3, blobContainer3, blobContainer2, blobContainer2)
@@ -1891,7 +1805,7 @@ private static ClusterState.Builder generateClusterStateWithGlobalMetadata() {
);
}
- private static ClusterState.Builder generateClusterStateWithOneIndex() {
+ static ClusterState.Builder generateClusterStateWithOneIndex() {
final Index index = new Index("test-index", "index-uuid");
final Settings idxSettings = Settings.builder()
.put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT)
@@ -1921,7 +1835,7 @@ private static ClusterState.Builder generateClusterStateWithOneIndex() {
);
}
- private static DiscoveryNodes nodesWithLocalNodeClusterManager() {
+ static DiscoveryNodes nodesWithLocalNodeClusterManager() {
return DiscoveryNodes.builder().clusterManagerNodeId("cluster-manager-id").localNodeId("cluster-manager-id").build();
}
diff --git a/server/src/test/java/org/opensearch/gateway/remote/routingtable/IndexRoutingTableHeaderTests.java b/server/src/test/java/org/opensearch/gateway/remote/routingtable/IndexRoutingTableHeaderTests.java
new file mode 100644
index 0000000000000..a3f0ac36a40f1
--- /dev/null
+++ b/server/src/test/java/org/opensearch/gateway/remote/routingtable/IndexRoutingTableHeaderTests.java
@@ -0,0 +1,32 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.gateway.remote.routingtable;
+
+import org.opensearch.common.io.stream.BytesStreamOutput;
+import org.opensearch.core.common.io.stream.BytesStreamInput;
+import org.opensearch.test.OpenSearchTestCase;
+
+import java.io.IOException;
+
+public class IndexRoutingTableHeaderTests extends OpenSearchTestCase {
+
+ public void testIndexRoutingTableHeader() throws IOException {
+ String indexName = randomAlphaOfLength(randomIntBetween(1, 50));
+ IndexRoutingTableHeader header = new IndexRoutingTableHeader(indexName);
+ try (BytesStreamOutput out = new BytesStreamOutput()) {
+ header.writeTo(out);
+
+ BytesStreamInput in = new BytesStreamInput(out.bytes().toBytesRef().bytes);
+ IndexRoutingTableHeader headerRead = new IndexRoutingTableHeader(in);
+ assertEquals(indexName, headerRead.getIndexName());
+
+ }
+ }
+
+}
diff --git a/server/src/test/java/org/opensearch/gateway/remote/routingtable/RemoteIndexRoutingTableTests.java b/server/src/test/java/org/opensearch/gateway/remote/routingtable/RemoteIndexRoutingTableTests.java
new file mode 100644
index 0000000000000..72066d8afb45b
--- /dev/null
+++ b/server/src/test/java/org/opensearch/gateway/remote/routingtable/RemoteIndexRoutingTableTests.java
@@ -0,0 +1,87 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.gateway.remote.routingtable;
+
+import org.opensearch.Version;
+import org.opensearch.cluster.metadata.IndexMetadata;
+import org.opensearch.cluster.metadata.Metadata;
+import org.opensearch.cluster.routing.IndexRoutingTable;
+import org.opensearch.cluster.routing.RoutingTable;
+import org.opensearch.cluster.routing.ShardRoutingState;
+import org.opensearch.common.io.stream.BytesStreamOutput;
+import org.opensearch.test.OpenSearchTestCase;
+
+import java.io.IOException;
+import java.util.concurrent.atomic.AtomicInteger;
+
+public class RemoteIndexRoutingTableTests extends OpenSearchTestCase {
+
+ public void testRoutingTableInput() {
+ String indexName = randomAlphaOfLength(randomIntBetween(1, 50));
+ int numberOfShards = randomIntBetween(1, 10);
+ int numberOfReplicas = randomIntBetween(1, 10);
+ Metadata metadata = Metadata.builder()
+ .put(
+ IndexMetadata.builder(indexName)
+ .settings(settings(Version.CURRENT))
+ .numberOfShards(numberOfShards)
+ .numberOfReplicas(numberOfReplicas)
+ )
+ .build();
+
+ RoutingTable initialRoutingTable = RoutingTable.builder().addAsNew(metadata.index(indexName)).build();
+
+ initialRoutingTable.getIndicesRouting().values().forEach(indexShardRoutingTables -> {
+ RemoteIndexRoutingTable indexRouting = new RemoteIndexRoutingTable(indexShardRoutingTables);
+ try (BytesStreamOutput streamOutput = new BytesStreamOutput();) {
+ indexRouting.writeTo(streamOutput);
+ RemoteIndexRoutingTable remoteIndexRoutingTable = new RemoteIndexRoutingTable(
+ streamOutput.bytes().streamInput(),
+ metadata.index(indexName).getIndex()
+ );
+ IndexRoutingTable indexRoutingTable = remoteIndexRoutingTable.getIndexRoutingTable();
+ assertEquals(numberOfShards, indexRoutingTable.getShards().size());
+ assertEquals(metadata.index(indexName).getIndex(), indexRoutingTable.getIndex());
+ assertEquals(
+ numberOfShards * (1 + numberOfReplicas),
+ indexRoutingTable.shardsWithState(ShardRoutingState.UNASSIGNED).size()
+ );
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ });
+ }
+
+ public void testRoutingTableInputStreamWithInvalidIndex() {
+ Metadata metadata = Metadata.builder()
+ .put(IndexMetadata.builder("test").settings(settings(Version.CURRENT)).numberOfShards(1).numberOfReplicas(1))
+ .put(IndexMetadata.builder("invalid-index").settings(settings(Version.CURRENT)).numberOfShards(1).numberOfReplicas(1))
+ .build();
+
+ RoutingTable initialRoutingTable = RoutingTable.builder().addAsNew(metadata.index("test")).build();
+ AtomicInteger assertionError = new AtomicInteger();
+ initialRoutingTable.getIndicesRouting().values().forEach(indexShardRoutingTables -> {
+ RemoteIndexRoutingTable indexRouting = new RemoteIndexRoutingTable(indexShardRoutingTables);
+ try (BytesStreamOutput streamOutput = new BytesStreamOutput()) {
+ indexRouting.writeTo(streamOutput);
+ RemoteIndexRoutingTable remoteIndexRoutingTable = new RemoteIndexRoutingTable(
+ streamOutput.bytes().streamInput(),
+ metadata.index("invalid-index").getIndex()
+ );
+ } catch (AssertionError e) {
+ assertionError.getAndIncrement();
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ });
+
+ assertEquals(1, assertionError.get());
+ }
+
+}
diff --git a/server/src/test/java/org/opensearch/index/mapper/DateFieldMapperTests.java b/server/src/test/java/org/opensearch/index/mapper/DateFieldMapperTests.java
index 2aa310ae959d9..98bcaa3a1a46b 100644
--- a/server/src/test/java/org/opensearch/index/mapper/DateFieldMapperTests.java
+++ b/server/src/test/java/org/opensearch/index/mapper/DateFieldMapperTests.java
@@ -208,7 +208,7 @@ public void testChangeLocale() throws IOException {
fieldMapping(b -> b.field("type", "date").field("format", "E, d MMM yyyy HH:mm:ss Z").field("locale", "de"))
);
- mapper.parse(source(b -> b.field("field", "Mi, 06 Dez 2000 02:55:00 -0800")));
+ mapper.parse(source(b -> b.field("field", "Mi., 06 Dez. 2000 02:55:00 -0800")));
}
public void testNullValue() throws IOException {
diff --git a/server/src/test/java/org/opensearch/index/shard/RemoteStoreRefreshListenerTests.java b/server/src/test/java/org/opensearch/index/shard/RemoteStoreRefreshListenerTests.java
index bb0776e0ced25..67787e8583930 100644
--- a/server/src/test/java/org/opensearch/index/shard/RemoteStoreRefreshListenerTests.java
+++ b/server/src/test/java/org/opensearch/index/shard/RemoteStoreRefreshListenerTests.java
@@ -48,6 +48,7 @@
import java.util.Map;
import java.util.Objects;
import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicLong;
import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_REPLICATION_TYPE;
@@ -470,6 +471,25 @@ public void testRefreshPersistentFailure() throws Exception {
assertFalse("remote store should not in sync", tuple.v1().isRemoteSegmentStoreInSync());
}
+ public void testRefreshPersistentFailureAndIndexShardClosed() throws Exception {
+ int succeedOnAttempt = 3;
+ int closeShardOnAttempt = 1;
+ CountDownLatch refreshCountLatch = new CountDownLatch(1);
+ CountDownLatch successLatch = new CountDownLatch(10);
+ Tuple tuple = mockIndexShardWithRetryAndScheduleRefresh(
+ succeedOnAttempt,
+ refreshCountLatch,
+ successLatch,
+ true,
+ closeShardOnAttempt
+ );
+ // Giving 10ms for some iterations of remote refresh upload
+ Thread.sleep(TimeUnit.SECONDS.toMillis(2));
+ RemoteStoreRefreshListener listener = tuple.v1();
+ assertFalse("remote store should not in sync", listener.isRemoteSegmentStoreInSync());
+ assertFalse(listener.getRetryScheduledStatus());
+ }
+
private void assertNoLagAndTotalUploadsFailed(RemoteSegmentTransferTracker segmentTracker, long totalUploadsFailed) throws Exception {
assertBusy(() -> {
assertEquals(0, segmentTracker.getBytesLag());
@@ -548,6 +568,49 @@ private Tuple mockIn
return mockIndexShardWithRetryAndScheduleRefresh(succeedOnAttempt, refreshCountLatch, successLatch, 1, noOpLatch);
}
+ private Tuple mockIndexShardWithRetryAndScheduleRefresh(
+ int totalAttempt,
+ CountDownLatch refreshCountLatch,
+ CountDownLatch successLatch,
+ int checkpointPublishSucceedOnAttempt,
+ CountDownLatch reachedCheckpointPublishLatch,
+ boolean mockPrimaryTerm,
+ boolean testUploadTimeout
+ ) throws IOException {
+ return mockIndexShardWithRetryAndScheduleRefresh(
+ totalAttempt,
+ refreshCountLatch,
+ successLatch,
+ checkpointPublishSucceedOnAttempt,
+ reachedCheckpointPublishLatch,
+ mockPrimaryTerm,
+ testUploadTimeout,
+ false,
+ 0
+ );
+ }
+
+ private Tuple mockIndexShardWithRetryAndScheduleRefresh(
+ int succeedOnAttempt,
+ CountDownLatch refreshCountLatch,
+ CountDownLatch successLatch,
+ boolean closedShard,
+ int closeShardAfterAttempt
+ ) throws IOException {
+ CountDownLatch noOpLatch = new CountDownLatch(0);
+ return mockIndexShardWithRetryAndScheduleRefresh(
+ succeedOnAttempt,
+ refreshCountLatch,
+ successLatch,
+ 1,
+ noOpLatch,
+ true,
+ false,
+ closedShard,
+ closeShardAfterAttempt
+ );
+ }
+
private Tuple mockIndexShardWithRetryAndScheduleRefresh(
int succeedOnAttempt,
CountDownLatch refreshCountLatch,
@@ -562,7 +625,9 @@ private Tuple mockIn
succeedCheckpointPublishOnAttempt,
reachedCheckpointPublishLatch,
true,
- false
+ false,
+ false,
+ 0
);
}
@@ -573,7 +638,9 @@ private Tuple mockIn
int succeedCheckpointPublishOnAttempt,
CountDownLatch reachedCheckpointPublishLatch,
boolean mockPrimaryTerm,
- boolean testUploadTimeout
+ boolean testUploadTimeout,
+ boolean closeShard,
+ int closeShardAfterAttempt
) throws IOException {
// Create index shard that we will be using to mock different methods in IndexShard for the unit test
indexShard = newStartedShard(
@@ -601,7 +668,6 @@ private Tuple mockIn
IndexShard shard = mock(IndexShard.class);
Store store = mock(Store.class);
when(shard.store()).thenReturn(store);
- when(shard.state()).thenReturn(IndexShardState.STARTED);
when(store.directory()).thenReturn(indexShard.store().directory());
// Mock (RemoteSegmentStoreDirectory) ((FilterDirectory) ((FilterDirectory) indexShard.remoteStore().directory())
@@ -663,6 +729,14 @@ private Tuple mockIn
return indexShard.getLatestReplicationCheckpoint();
})).when(shard).computeReplicationCheckpoint(any());
+ doAnswer((invocationOnMock -> {
+ if (closeShard && counter.get() == closeShardAfterAttempt) {
+ logger.info("Closing shard...");
+ return IndexShardState.CLOSED;
+ }
+ return IndexShardState.STARTED;
+ })).when(shard).state();
+
doAnswer(invocation -> {
if (Objects.nonNull(successLatch)) {
successLatch.countDown();
diff --git a/server/src/test/java/org/opensearch/indices/RemoteStoreSettingsDynamicUpdateTests.java b/server/src/test/java/org/opensearch/indices/RemoteStoreSettingsDynamicUpdateTests.java
index f89fd3df6e340..cc9096ee41315 100644
--- a/server/src/test/java/org/opensearch/indices/RemoteStoreSettingsDynamicUpdateTests.java
+++ b/server/src/test/java/org/opensearch/indices/RemoteStoreSettingsDynamicUpdateTests.java
@@ -116,4 +116,15 @@ public void testMaxRemoteReferencedTranslogFiles() {
);
assertEquals(500, remoteStoreSettings.getMaxRemoteTranslogReaders());
}
+
+ public void testDisableMaxRemoteReferencedTranslogFiles() {
+ // Test default value
+ assertEquals(1000, remoteStoreSettings.getMaxRemoteTranslogReaders());
+
+ // Test override with valid value
+ clusterSettings.applySettings(
+ Settings.builder().put(RemoteStoreSettings.CLUSTER_REMOTE_MAX_TRANSLOG_READERS.getKey(), "-1").build()
+ );
+ assertEquals(-1, remoteStoreSettings.getMaxRemoteTranslogReaders());
+ }
}
diff --git a/server/src/test/java/org/opensearch/node/RemoteStoreNodeAttributeTests.java b/server/src/test/java/org/opensearch/node/RemoteStoreNodeAttributeTests.java
index c4ba271d27ae9..de7f8977686a7 100644
--- a/server/src/test/java/org/opensearch/node/RemoteStoreNodeAttributeTests.java
+++ b/server/src/test/java/org/opensearch/node/RemoteStoreNodeAttributeTests.java
@@ -19,6 +19,7 @@
import java.net.InetAddress;
import java.net.UnknownHostException;
+import java.util.List;
import java.util.Locale;
import java.util.Map;
@@ -28,6 +29,7 @@
import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_REPOSITORY_CRYPTO_SETTINGS_PREFIX;
import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_REPOSITORY_SETTINGS_ATTRIBUTE_KEY_PREFIX;
import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_REPOSITORY_TYPE_ATTRIBUTE_KEY_FORMAT;
+import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_ROUTING_TABLE_REPOSITORY_NAME_ATTRIBUTE_KEY;
import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY;
import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_TRANSLOG_REPOSITORY_NAME_ATTRIBUTE_KEY;
@@ -148,4 +150,77 @@ public void testNoCryptoMetadata() throws UnknownHostException {
RepositoryMetadata repositoryMetadata = remoteStoreNodeAttribute.getRepositoriesMetadata().repositories().get(0);
assertNull(repositoryMetadata.cryptoMetadata());
}
+
+ public void testEqualsWithRepoSkip() throws UnknownHostException {
+ String repoName = "remote-store-A";
+ String repoTypeSettingKey = String.format(Locale.ROOT, REMOTE_STORE_REPOSITORY_TYPE_ATTRIBUTE_KEY_FORMAT, repoName);
+ String repoSettingsKey = String.format(Locale.ROOT, REMOTE_STORE_REPOSITORY_SETTINGS_ATTRIBUTE_KEY_PREFIX, repoName);
+ Map attr = Map.of(
+ REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY,
+ repoName,
+ REMOTE_STORE_TRANSLOG_REPOSITORY_NAME_ATTRIBUTE_KEY,
+ repoName,
+ REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY,
+ repoName,
+ repoTypeSettingKey,
+ "s3",
+ repoSettingsKey,
+ "abc",
+ repoSettingsKey + "base_path",
+ "xyz"
+ );
+ DiscoveryNode node = new DiscoveryNode(
+ "C",
+ new TransportAddress(InetAddress.getByName("localhost"), 9876),
+ attr,
+ emptySet(),
+ Version.CURRENT
+ );
+
+ RemoteStoreNodeAttribute remoteStoreNodeAttribute = new RemoteStoreNodeAttribute(node);
+
+ String routingTableRepoName = "remote-store-B";
+ String routingTableRepoTypeSettingKey = String.format(
+ Locale.ROOT,
+ REMOTE_STORE_REPOSITORY_TYPE_ATTRIBUTE_KEY_FORMAT,
+ routingTableRepoName
+ );
+ String routingTableRepoSettingsKey = String.format(
+ Locale.ROOT,
+ REMOTE_STORE_REPOSITORY_SETTINGS_ATTRIBUTE_KEY_PREFIX,
+ routingTableRepoName
+ );
+
+ Map attr2 = Map.of(
+ REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY,
+ repoName,
+ REMOTE_STORE_TRANSLOG_REPOSITORY_NAME_ATTRIBUTE_KEY,
+ repoName,
+ REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY,
+ repoName,
+ REMOTE_STORE_ROUTING_TABLE_REPOSITORY_NAME_ATTRIBUTE_KEY,
+ routingTableRepoName,
+ repoTypeSettingKey,
+ "s3",
+ repoSettingsKey,
+ "abc",
+ repoSettingsKey + "base_path",
+ "xyz",
+ routingTableRepoTypeSettingKey,
+ "s3",
+ routingTableRepoSettingsKey,
+ "xyz"
+ );
+ DiscoveryNode node2 = new DiscoveryNode(
+ "C",
+ new TransportAddress(InetAddress.getByName("localhost"), 9876),
+ attr2,
+ emptySet(),
+ Version.CURRENT
+ );
+ RemoteStoreNodeAttribute remoteStoreNodeAttribute2 = new RemoteStoreNodeAttribute(node2);
+
+ assertFalse(remoteStoreNodeAttribute.equalsWithRepoSkip(remoteStoreNodeAttribute2, List.of()));
+ assertTrue(remoteStoreNodeAttribute.equalsWithRepoSkip(remoteStoreNodeAttribute2, List.of(routingTableRepoName)));
+ }
}
diff --git a/server/src/test/java/org/opensearch/snapshots/BlobStoreFormatTests.java b/server/src/test/java/org/opensearch/snapshots/BlobStoreFormatTests.java
index c5f36fcc01983..95a8267734a07 100644
--- a/server/src/test/java/org/opensearch/snapshots/BlobStoreFormatTests.java
+++ b/server/src/test/java/org/opensearch/snapshots/BlobStoreFormatTests.java
@@ -49,13 +49,13 @@
import org.opensearch.common.io.stream.BytesStreamOutput;
import org.opensearch.core.action.ActionListener;
import org.opensearch.core.common.bytes.BytesArray;
+import org.opensearch.core.common.io.stream.BufferedChecksumStreamOutput;
import org.opensearch.core.common.io.stream.StreamInput;
import org.opensearch.core.compress.CompressorRegistry;
import org.opensearch.core.xcontent.ToXContent;
import org.opensearch.core.xcontent.ToXContentFragment;
import org.opensearch.core.xcontent.XContentBuilder;
import org.opensearch.core.xcontent.XContentParser;
-import org.opensearch.index.translog.BufferedChecksumStreamOutput;
import org.opensearch.repositories.blobstore.ChecksumBlobStoreFormat;
import org.opensearch.test.OpenSearchTestCase;
diff --git a/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java b/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java
index 5b39880930984..86de008b5dee5 100644
--- a/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java
+++ b/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java
@@ -2559,7 +2559,8 @@ public void start(ClusterState initialState) {
ElectionStrategy.DEFAULT_INSTANCE,
() -> new StatusInfo(HEALTHY, "healthy-info"),
persistedStateRegistry,
- remoteStoreNodeService
+ remoteStoreNodeService,
+ new ClusterManagerMetrics(NoopMetricsRegistry.INSTANCE)
);
clusterManagerService.setClusterStatePublisher(coordinator);
coordinator.start();
diff --git a/server/src/test/java/org/opensearch/telemetry/TestInMemoryCounter.java b/server/src/test/java/org/opensearch/telemetry/TestInMemoryCounter.java
new file mode 100644
index 0000000000000..d9aee5ebfa941
--- /dev/null
+++ b/server/src/test/java/org/opensearch/telemetry/TestInMemoryCounter.java
@@ -0,0 +1,52 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.telemetry;
+
+import org.opensearch.telemetry.metrics.Counter;
+import org.opensearch.telemetry.metrics.tags.Tags;
+
+import java.util.HashMap;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.atomic.AtomicInteger;
+
+/**
+ * This is a simple implementation of Counter which is utilized by TestInMemoryMetricsRegistry for
+ * Unit Tests. It initializes an atomic integer to add the values of counter which doesn't have any tags
+ * along with a map to store the values recorded against the tags.
+ * The map and atomic integer can then be used to get the added values.
+ */
+public class TestInMemoryCounter implements Counter {
+
+ private AtomicInteger counterValue = new AtomicInteger(0);
+ private ConcurrentHashMap, Double> counterValueForTags = new ConcurrentHashMap<>();
+
+ public Integer getCounterValue() {
+ return this.counterValue.get();
+ }
+
+ public ConcurrentHashMap, Double> getCounterValueForTags() {
+ return this.counterValueForTags;
+ }
+
+ @Override
+ public void add(double value) {
+ counterValue.addAndGet((int) value);
+ }
+
+ @Override
+ public synchronized void add(double value, Tags tags) {
+ HashMap hashMap = (HashMap) tags.getTagsMap();
+ if (counterValueForTags.get(hashMap) == null) {
+ counterValueForTags.put(hashMap, value);
+ } else {
+ value = counterValueForTags.get(hashMap) + value;
+ counterValueForTags.put(hashMap, value);
+ }
+ }
+}
diff --git a/server/src/test/java/org/opensearch/telemetry/TestInMemoryHistogram.java b/server/src/test/java/org/opensearch/telemetry/TestInMemoryHistogram.java
new file mode 100644
index 0000000000000..ff28df2b6529d
--- /dev/null
+++ b/server/src/test/java/org/opensearch/telemetry/TestInMemoryHistogram.java
@@ -0,0 +1,47 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.telemetry;
+
+import org.opensearch.telemetry.metrics.Histogram;
+import org.opensearch.telemetry.metrics.tags.Tags;
+
+import java.util.HashMap;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.atomic.AtomicInteger;
+
+/**
+ * This is a simple implementation of Histogram which is utilized by TestInMemoryMetricsRegistry for
+ * Unit Tests. It initializes an atomic integer to record the value of histogram which doesn't have any tags
+ * along with a map to store the values recorded against the tags.
+ * The map and atomic integer can then be used to get the recorded values.
+ */
+public class TestInMemoryHistogram implements Histogram {
+
+ private AtomicInteger histogramValue = new AtomicInteger(0);
+ private ConcurrentHashMap, Double> histogramValueForTags = new ConcurrentHashMap<>();
+
+ public Integer getHistogramValue() {
+ return this.histogramValue.get();
+ }
+
+ public ConcurrentHashMap, Double> getHistogramValueForTags() {
+ return this.histogramValueForTags;
+ }
+
+ @Override
+ public void record(double value) {
+ histogramValue.addAndGet((int) value);
+ }
+
+ @Override
+ public synchronized void record(double value, Tags tags) {
+ HashMap hashMap = (HashMap) tags.getTagsMap();
+ histogramValueForTags.put(hashMap, value);
+ }
+}
diff --git a/server/src/test/java/org/opensearch/telemetry/TestInMemoryMetricsRegistry.java b/server/src/test/java/org/opensearch/telemetry/TestInMemoryMetricsRegistry.java
new file mode 100644
index 0000000000000..6d395085b12ea
--- /dev/null
+++ b/server/src/test/java/org/opensearch/telemetry/TestInMemoryMetricsRegistry.java
@@ -0,0 +1,71 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.telemetry;
+
+import org.opensearch.telemetry.metrics.Counter;
+import org.opensearch.telemetry.metrics.Histogram;
+import org.opensearch.telemetry.metrics.MetricsRegistry;
+import org.opensearch.telemetry.metrics.tags.Tags;
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.function.Supplier;
+
+/**
+ * This is a simple implementation of MetricsRegistry which can be utilized by Unit Tests.
+ * It just initializes and stores counters/histograms within a map, once created.
+ * The maps can then be used to get the counters/histograms by their names.
+ */
+public class TestInMemoryMetricsRegistry implements MetricsRegistry {
+
+ private ConcurrentHashMap counterStore = new ConcurrentHashMap<>();
+ private ConcurrentHashMap histogramStore = new ConcurrentHashMap<>();
+
+ public ConcurrentHashMap getCounterStore() {
+ return this.counterStore;
+ }
+
+ public ConcurrentHashMap getHistogramStore() {
+ return this.histogramStore;
+ }
+
+ @Override
+ public Counter createCounter(String name, String description, String unit) {
+ TestInMemoryCounter counter = new TestInMemoryCounter();
+ counterStore.putIfAbsent(name, counter);
+ return counter;
+ }
+
+ @Override
+ public Counter createUpDownCounter(String name, String description, String unit) {
+ /**
+ * ToDo: To be implemented when required.
+ */
+ return null;
+ }
+
+ @Override
+ public Histogram createHistogram(String name, String description, String unit) {
+ TestInMemoryHistogram histogram = new TestInMemoryHistogram();
+ histogramStore.putIfAbsent(name, histogram);
+ return histogram;
+ }
+
+ @Override
+ public Closeable createGauge(String name, String description, String unit, Supplier valueProvider, Tags tags) {
+ /**
+ * ToDo: To be implemented when required.
+ */
+ return null;
+ }
+
+ @Override
+ public void close() throws IOException {}
+}
diff --git a/test/framework/src/main/java/org/opensearch/cluster/coordination/AbstractCoordinatorTestCase.java b/test/framework/src/main/java/org/opensearch/cluster/coordination/AbstractCoordinatorTestCase.java
index 3ae737bf63923..1c2270bab1260 100644
--- a/test/framework/src/main/java/org/opensearch/cluster/coordination/AbstractCoordinatorTestCase.java
+++ b/test/framework/src/main/java/org/opensearch/cluster/coordination/AbstractCoordinatorTestCase.java
@@ -1183,7 +1183,8 @@ protected Optional getDisruptableMockTransport(Transpo
getElectionStrategy(),
nodeHealthService,
persistedStateRegistry,
- remoteStoreNodeService
+ remoteStoreNodeService,
+ new ClusterManagerMetrics(NoopMetricsRegistry.INSTANCE)
);
clusterManagerService.setClusterStatePublisher(coordinator);
final GatewayService gatewayService = new GatewayService(