diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index 6281fa0af3e36..5476637b84e92 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -13,15 +13,9 @@ Resolves #[Issue number to be closed when this PR is merged] ### Check List -- [ ] New functionality includes testing. - - [ ] All tests pass -- [ ] New functionality has been documented. - - [ ] New functionality has javadoc added -- [ ] API changes companion pull request [created](https://github.com/opensearch-project/opensearch-api-specification/blob/main/DEVELOPER_GUIDE.md). -- [ ] Failing checks are inspected and point to the corresponding known issue(s) (See: [Troubleshooting Failing Builds](../blob/main/CONTRIBUTING.md#troubleshooting-failing-builds)) -- [ ] Commits are signed per the DCO using --signoff -- [ ] Commit changes are listed out in CHANGELOG.md file (See: [Changelog](../blob/main/CONTRIBUTING.md#changelog)) -- [ ] Public documentation issue/PR [created](https://github.com/opensearch-project/documentation-website/issues/new/choose) +- [ ] Functionality includes testing. +- [ ] API changes companion pull request [created](https://github.com/opensearch-project/opensearch-api-specification/blob/main/DEVELOPER_GUIDE.md), if applicable. +- [ ] Public documentation issue/PR [created](https://github.com/opensearch-project/documentation-website/issues/new/choose), if applicable. By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/OpenSearch/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). diff --git a/.github/workflows/assemble.yml b/.github/workflows/assemble.yml index 4bff5b7d60d1f..51ae075ffa2c9 100644 --- a/.github/workflows/assemble.yml +++ b/.github/workflows/assemble.yml @@ -17,10 +17,23 @@ jobs: java-version: ${{ matrix.java }} distribution: temurin - name: Setup docker (missing on MacOS) + id: setup_docker if: runner.os == 'macos' uses: douglascamata/setup-docker-macos-action@main + continue-on-error: true with: upgrade-qemu: true + colima: v0.6.8 - name: Run Gradle (assemble) + if: runner.os == 'macos' && steps.setup_docker.outcome != 'success' + run: | + # Report success even if previous step failed (Docker on MacOS runner is very unstable) + exit 0; + - name: Run Gradle (assemble) + if: runner.os != 'macos' + run: | + ./gradlew assemble --parallel --no-build-cache -PDISABLE_BUILD_CACHE + - name: Run Gradle (assemble) + if: runner.os == 'macos' && steps.setup_docker.outcome == 'success' run: | ./gradlew assemble --parallel --no-build-cache -PDISABLE_BUILD_CACHE diff --git a/.github/workflows/pull-request-checks.yml b/.github/workflows/pull-request-checks.yml deleted file mode 100644 index eec363572478c..0000000000000 --- a/.github/workflows/pull-request-checks.yml +++ /dev/null @@ -1,29 +0,0 @@ -name: Pull Request Checks - -on: - pull_request: - types: - [ - opened, - edited, - review_requested, - synchronize, - reopened, - ready_for_review, - ] - -jobs: - verify-description-checklist: - name: Verify Description Checklist - runs-on: ubuntu-latest - steps: - - uses: peternied/check-pull-request-description-checklist@v1.1 - if: github.event.pull_request.user.login != 'dependabot[bot]' - with: - checklist-items: | - New functionality includes testing. - All tests pass - New functionality has been documented. - New functionality has javadoc added - Commits are signed per the DCO using --signoff - Commit changes are listed out in CHANGELOG.md file (See: [Changelog](../blob/main/CONTRIBUTING.md#changelog)) diff --git a/CHANGELOG-3.0.md b/CHANGELOG-3.0.md index 964383078c38d..1cc12f66d52e1 100644 --- a/CHANGELOG-3.0.md +++ b/CHANGELOG-3.0.md @@ -17,6 +17,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ### Dependencies ### Changed +- Changed locale provider from COMPAT to CLDR ([13988](https://github.com/opensearch-project/OpenSearch/pull/13988)) - Migrate client transports to Apache HttpClient / Core 5.x ([#4459](https://github.com/opensearch-project/OpenSearch/pull/4459)) - Change http code on create index API with bad input raising NotXContentException from 500 to 400 ([#4773](https://github.com/opensearch-project/OpenSearch/pull/4773)) - Improve summary error message for invalid setting updates ([#4792](https://github.com/opensearch-project/OpenSearch/pull/4792)) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5ce11dabb6e9c..098405f8f1d44 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ## [Unreleased 2.x] ### Added +- Add leader and follower check failure counter metrics ([#12439](https://github.com/opensearch-project/OpenSearch/pull/12439)) - Add latency metrics for instrumenting critical clusterManager code paths ([#12333](https://github.com/opensearch-project/OpenSearch/pull/12333)) - Add support for Azure Managed Identity in repository-azure ([#12423](https://github.com/opensearch-project/OpenSearch/issues/12423)) - Add useCompoundFile index setting ([#13478](https://github.com/opensearch-project/OpenSearch/pull/13478)) @@ -14,12 +15,14 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Add getMetadataFields to MapperService ([#13819](https://github.com/opensearch-project/OpenSearch/pull/13819)) - [Remote State] Add async remote state deletion task running on an interval, configurable by a setting ([#13131](https://github.com/opensearch-project/OpenSearch/pull/13131)) - Allow setting query parameters on requests ([#13776](https://github.com/opensearch-project/OpenSearch/issues/13776)) +- Add remote routing table for remote state publication with experimental feature flag ([#13304](https://github.com/opensearch-project/OpenSearch/pull/13304)) +- [Remote Store] Add support to disable flush based on translog reader count ([#14027](https://github.com/opensearch-project/OpenSearch/pull/14027)) ### Dependencies - Bump `com.github.spullara.mustache.java:compiler` from 0.9.10 to 0.9.13 ([#13329](https://github.com/opensearch-project/OpenSearch/pull/13329), [#13559](https://github.com/opensearch-project/OpenSearch/pull/13559)) - Bump `org.gradle.test-retry` from 1.5.8 to 1.5.9 ([#13442](https://github.com/opensearch-project/OpenSearch/pull/13442)) - Bump `org.apache.commons:commons-text` from 1.11.0 to 1.12.0 ([#13557](https://github.com/opensearch-project/OpenSearch/pull/13557)) -- Bump `org.hdrhistogram:HdrHistogram` from 2.1.12 to 2.2.1 ([#13556](https://github.com/opensearch-project/OpenSearch/pull/13556)) +- Bump `org.hdrhistogram:HdrHistogram` from 2.1.12 to 2.2.2 ([#13556](https://github.com/opensearch-project/OpenSearch/pull/13556), [#13986](https://github.com/opensearch-project/OpenSearch/pull/13986)) - Bump `com.gradle.enterprise` from 3.17.2 to 3.17.4 ([#13641](https://github.com/opensearch-project/OpenSearch/pull/13641), [#13753](https://github.com/opensearch-project/OpenSearch/pull/13753)) - Bump `org.apache.hadoop:hadoop-minicluster` from 3.3.6 to 3.4.0 ([#13642](https://github.com/opensearch-project/OpenSearch/pull/13642)) - Bump `mockito` from 5.11.0 to 5.12.0 ([#13665](https://github.com/opensearch-project/OpenSearch/pull/13665)) diff --git a/buildSrc/src/main/java/org/opensearch/gradle/OpenSearchTestBasePlugin.java b/buildSrc/src/main/java/org/opensearch/gradle/OpenSearchTestBasePlugin.java index 2ea8c2d015ecc..d0cb2da9c1dd3 100644 --- a/buildSrc/src/main/java/org/opensearch/gradle/OpenSearchTestBasePlugin.java +++ b/buildSrc/src/main/java/org/opensearch/gradle/OpenSearchTestBasePlugin.java @@ -110,7 +110,7 @@ public void execute(Task t) { if (BuildParams.getRuntimeJavaVersion() == JavaVersion.VERSION_1_8) { test.systemProperty("java.locale.providers", "SPI,JRE"); } else { - test.systemProperty("java.locale.providers", "SPI,COMPAT"); + test.systemProperty("java.locale.providers", "SPI,CLDR"); if (test.getJavaVersion().compareTo(JavaVersion.VERSION_17) < 0) { test.jvmArgs("--illegal-access=warn"); } diff --git a/buildSrc/version.properties b/buildSrc/version.properties index 0a36ed5e200f7..3a76cf6e9b7ad 100644 --- a/buildSrc/version.properties +++ b/buildSrc/version.properties @@ -23,6 +23,8 @@ guava = 32.1.1-jre protobuf = 3.22.3 jakarta_annotation = 1.3.5 google_http_client = 1.44.1 +tdigest = 3.3 +hdrhistogram = 2.2.2 # when updating the JNA version, also update the version in buildSrc/build.gradle jna = 5.13.0 diff --git a/distribution/tools/launchers/src/main/java/org/opensearch/tools/launchers/SystemJvmOptions.java b/distribution/tools/launchers/src/main/java/org/opensearch/tools/launchers/SystemJvmOptions.java index 726c381db09f6..af7138569972a 100644 --- a/distribution/tools/launchers/src/main/java/org/opensearch/tools/launchers/SystemJvmOptions.java +++ b/distribution/tools/launchers/src/main/java/org/opensearch/tools/launchers/SystemJvmOptions.java @@ -105,13 +105,8 @@ private static String javaLocaleProviders() { SPI setting is used to allow loading custom CalendarDataProvider in jdk8 it has to be loaded from jre/lib/ext, in jdk9+ it is already within ES project and on a classpath - - Due to internationalization enhancements in JDK 9 OpenSearch need to set the provider to COMPAT otherwise time/date - parsing will break in an incompatible way for some date patterns and locales. - //TODO COMPAT will be deprecated in at some point, see please https://bugs.openjdk.java.net/browse/JDK-8232906 - See also: documentation in server/org.opensearch.common.time.IsoCalendarDataProvider */ - return "-Djava.locale.providers=SPI,COMPAT"; + return "-Djava.locale.providers=SPI,CLDR"; } } diff --git a/gradle/ide.gradle b/gradle/ide.gradle index 14d6b2982ccd0..e266d9add172d 100644 --- a/gradle/ide.gradle +++ b/gradle/ide.gradle @@ -28,7 +28,7 @@ allprojects { apply plugin: 'idea' tasks.named('idea').configure { - doFirst { throw new GradleException("Use of the 'idea' task has been deprecated. For details on importing into IntelliJ see CONTRIBUTING.md.") } + doFirst { throw new GradleException("Use of the 'idea' task has been deprecated. For details on importing into IntelliJ see DEVELOPER_GUIDE.md.") } } } @@ -81,7 +81,7 @@ if (System.getProperty('idea.active') == 'true') { } runConfigurations { defaults(JUnit) { - vmParameters = '-ea -Djava.locale.providers=SPI,COMPAT' + vmParameters = '-ea -Djava.locale.providers=SPI,CLDR' if (BuildParams.runtimeJavaVersion > JavaVersion.VERSION_17) { vmParameters += ' -Djava.security.manager=allow' } diff --git a/server/src/main/java/org/opensearch/index/translog/BufferedChecksumStreamInput.java b/libs/core/src/main/java/org/opensearch/core/common/io/stream/BufferedChecksumStreamInput.java similarity index 96% rename from server/src/main/java/org/opensearch/index/translog/BufferedChecksumStreamInput.java rename to libs/core/src/main/java/org/opensearch/core/common/io/stream/BufferedChecksumStreamInput.java index f75f27b7bcb91..41680961b36e9 100644 --- a/server/src/main/java/org/opensearch/index/translog/BufferedChecksumStreamInput.java +++ b/libs/core/src/main/java/org/opensearch/core/common/io/stream/BufferedChecksumStreamInput.java @@ -30,12 +30,10 @@ * GitHub history for details. */ -package org.opensearch.index.translog; +package org.opensearch.core.common.io.stream; import org.apache.lucene.store.BufferedChecksum; import org.apache.lucene.util.BitUtil; -import org.opensearch.core.common.io.stream.FilterStreamInput; -import org.opensearch.core.common.io.stream.StreamInput; import java.io.EOFException; import java.io.IOException; diff --git a/server/src/main/java/org/opensearch/index/translog/BufferedChecksumStreamOutput.java b/libs/core/src/main/java/org/opensearch/core/common/io/stream/BufferedChecksumStreamOutput.java similarity index 96% rename from server/src/main/java/org/opensearch/index/translog/BufferedChecksumStreamOutput.java rename to libs/core/src/main/java/org/opensearch/core/common/io/stream/BufferedChecksumStreamOutput.java index 9e96664c79cc5..422f956c0cd47 100644 --- a/server/src/main/java/org/opensearch/index/translog/BufferedChecksumStreamOutput.java +++ b/libs/core/src/main/java/org/opensearch/core/common/io/stream/BufferedChecksumStreamOutput.java @@ -30,11 +30,10 @@ * GitHub history for details. */ -package org.opensearch.index.translog; +package org.opensearch.core.common.io.stream; import org.apache.lucene.store.BufferedChecksum; import org.opensearch.common.annotation.PublicApi; -import org.opensearch.core.common.io.stream.StreamOutput; import java.io.IOException; import java.util.zip.CRC32; diff --git a/release-notes/opensearch.release-notes-1.3.17.md b/release-notes/opensearch.release-notes-1.3.17.md new file mode 100644 index 0000000000000..5218b9e3be20c --- /dev/null +++ b/release-notes/opensearch.release-notes-1.3.17.md @@ -0,0 +1,6 @@ +## 2024-05-30 Version 1.3.17 Release Notes + +### Upgrades +- OpenJDK Update (April 2024 Patch releases), update to Eclipse Temurin 11.0.23+9 ([#13406](https://github.com/opensearch-project/OpenSearch/pull/13406)) +- Upgrade BouncyCastle dependencies from 1.75 to 1.78.1 resolving [CVE-2024-30172], [CVE-2024-30171] and [CVE-2024-29857] +- Bump `netty` from 4.1.109.Final to 4.1.110.Final ([#13802](https://github.com/opensearch-project/OpenSearch/pull/13802)) diff --git a/server/build.gradle b/server/build.gradle index 15301e68fca3d..624e5fe332662 100644 --- a/server/build.gradle +++ b/server/build.gradle @@ -96,9 +96,9 @@ dependencies { api "joda-time:joda-time:${versions.joda}" // percentiles aggregation - api 'com.tdunning:t-digest:3.3' - // precentil ranks aggregation - api 'org.hdrhistogram:HdrHistogram:2.2.1' + api "com.tdunning:t-digest:${versions.tdigest}" + // percentile ranks aggregation + api "org.hdrhistogram:HdrHistogram:${versions.hdrhistogram}" // lucene spatial api "org.locationtech.spatial4j:spatial4j:${versions.spatial4j}", optional diff --git a/server/licenses/HdrHistogram-2.2.1.jar.sha1 b/server/licenses/HdrHistogram-2.2.1.jar.sha1 deleted file mode 100644 index 68225950d4744..0000000000000 --- a/server/licenses/HdrHistogram-2.2.1.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -0eb1feb351f64176c377772a30174e582c0274d5 \ No newline at end of file diff --git a/server/licenses/HdrHistogram-2.2.2.jar.sha1 b/server/licenses/HdrHistogram-2.2.2.jar.sha1 new file mode 100644 index 0000000000000..2c895841bce81 --- /dev/null +++ b/server/licenses/HdrHistogram-2.2.2.jar.sha1 @@ -0,0 +1 @@ +7959933ebcc0f05b2eaa5af0a0c8689fa257b15c diff --git a/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteClusterStateCleanupManagerIT.java b/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteClusterStateCleanupManagerIT.java new file mode 100644 index 0000000000000..e96dedaa3e6a0 --- /dev/null +++ b/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteClusterStateCleanupManagerIT.java @@ -0,0 +1,154 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.gateway.remote; + +import org.opensearch.action.admin.cluster.settings.ClusterUpdateSettingsResponse; +import org.opensearch.common.blobstore.BlobPath; +import org.opensearch.common.settings.Settings; +import org.opensearch.remotestore.RemoteStoreBaseIntegTestCase; +import org.opensearch.repositories.RepositoriesService; +import org.opensearch.repositories.blobstore.BlobStoreRepository; +import org.opensearch.test.OpenSearchIntegTestCase; +import org.junit.Before; + +import java.nio.charset.StandardCharsets; +import java.util.Base64; +import java.util.Map; +import java.util.concurrent.TimeUnit; + +import static org.opensearch.gateway.remote.RemoteClusterStateCleanupManager.CLUSTER_STATE_CLEANUP_INTERVAL_DEFAULT; +import static org.opensearch.gateway.remote.RemoteClusterStateCleanupManager.REMOTE_CLUSTER_STATE_CLEANUP_INTERVAL_SETTING; +import static org.opensearch.gateway.remote.RemoteClusterStateCleanupManager.RETAINED_MANIFESTS; +import static org.opensearch.gateway.remote.RemoteClusterStateCleanupManager.SKIP_CLEANUP_STATE_CHANGES; +import static org.opensearch.gateway.remote.RemoteClusterStateService.REMOTE_CLUSTER_STATE_ENABLED_SETTING; +import static org.opensearch.indices.IndicesService.CLUSTER_DEFAULT_INDEX_REFRESH_INTERVAL_SETTING; + +@OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, numDataNodes = 0) +public class RemoteClusterStateCleanupManagerIT extends RemoteStoreBaseIntegTestCase { + + private static final String INDEX_NAME = "test-index"; + + @Before + public void setup() { + asyncUploadMockFsRepo = false; + } + + @Override + protected Settings nodeSettings(int nodeOrdinal) { + return Settings.builder().put(super.nodeSettings(nodeOrdinal)).put(REMOTE_CLUSTER_STATE_ENABLED_SETTING.getKey(), true).build(); + } + + private Map initialTestSetup(int shardCount, int replicaCount, int dataNodeCount, int clusterManagerNodeCount) { + prepareCluster(clusterManagerNodeCount, dataNodeCount, INDEX_NAME, replicaCount, shardCount); + Map indexStats = indexData(1, false, INDEX_NAME); + assertEquals(shardCount * (replicaCount + 1), getNumShards(INDEX_NAME).totalNumShards); + ensureGreen(INDEX_NAME); + return indexStats; + } + + public void testRemoteCleanupTaskUpdated() { + int shardCount = randomIntBetween(1, 2); + int replicaCount = 1; + int dataNodeCount = shardCount * (replicaCount + 1); + int clusterManagerNodeCount = 1; + + initialTestSetup(shardCount, replicaCount, dataNodeCount, clusterManagerNodeCount); + RemoteClusterStateCleanupManager remoteClusterStateCleanupManager = internalCluster().getClusterManagerNodeInstance( + RemoteClusterStateCleanupManager.class + ); + + assertEquals(CLUSTER_STATE_CLEANUP_INTERVAL_DEFAULT, remoteClusterStateCleanupManager.getStaleFileDeletionTask().getInterval()); + assertTrue(remoteClusterStateCleanupManager.getStaleFileDeletionTask().isScheduled()); + + // now disable + client().admin() + .cluster() + .prepareUpdateSettings() + .setPersistentSettings(Settings.builder().put(REMOTE_CLUSTER_STATE_CLEANUP_INTERVAL_SETTING.getKey(), -1)) + .get(); + + assertEquals(-1, remoteClusterStateCleanupManager.getStaleFileDeletionTask().getInterval().getMillis()); + assertFalse(remoteClusterStateCleanupManager.getStaleFileDeletionTask().isScheduled()); + + // now set Clean up interval to 1 min + client().admin() + .cluster() + .prepareUpdateSettings() + .setPersistentSettings(Settings.builder().put(REMOTE_CLUSTER_STATE_CLEANUP_INTERVAL_SETTING.getKey(), "1m")) + .get(); + assertEquals(1, remoteClusterStateCleanupManager.getStaleFileDeletionTask().getInterval().getMinutes()); + } + + public void testRemoteCleanupDeleteStale() throws Exception { + int shardCount = randomIntBetween(1, 2); + int replicaCount = 1; + int dataNodeCount = shardCount * (replicaCount + 1); + int clusterManagerNodeCount = 1; + + initialTestSetup(shardCount, replicaCount, dataNodeCount, clusterManagerNodeCount); + + // update cluster state 21 times to ensure that clean up has run after this will upload 42 manifest files + // to repository, if manifest files are less than that it means clean up has run + updateClusterStateNTimes(RETAINED_MANIFESTS + SKIP_CLEANUP_STATE_CHANGES + 1); + + RepositoriesService repositoriesService = internalCluster().getClusterManagerNodeInstance(RepositoriesService.class); + BlobStoreRepository repository = (BlobStoreRepository) repositoriesService.repository(REPOSITORY_NAME); + BlobPath baseMetadataPath = repository.basePath() + .add( + Base64.getUrlEncoder() + .withoutPadding() + .encodeToString(getClusterState().getClusterName().value().getBytes(StandardCharsets.UTF_8)) + ) + .add("cluster-state") + .add(getClusterState().metadata().clusterUUID()); + BlobPath manifestContainerPath = baseMetadataPath.add("manifest"); + + // set cleanup interval to 100 ms to make the test faster + ClusterUpdateSettingsResponse response = client().admin() + .cluster() + .prepareUpdateSettings() + .setPersistentSettings(Settings.builder().put(REMOTE_CLUSTER_STATE_CLEANUP_INTERVAL_SETTING.getKey(), "100ms")) + .get(); + + assertTrue(response.isAcknowledged()); + + assertBusy(() -> { + int manifestFiles = repository.blobStore().blobContainer(manifestContainerPath).listBlobsByPrefix("manifest").size(); + logger.info("number of current manifest file: {}", manifestFiles); + // we can't guarantee that we have same number of manifest as Retained manifest in our repo as there can be other queued task + // other than replica count change which can upload new manifest files, that's why we check that number of manifests is between + // Retained manifests and Retained manifests + 2 * Skip cleanup state changes (each cluster state update uploads 2 manifests) + assertTrue( + "Current number of manifest files: " + manifestFiles, + manifestFiles >= RETAINED_MANIFESTS && manifestFiles < RETAINED_MANIFESTS + 2 * SKIP_CLEANUP_STATE_CHANGES + ); + }, 500, TimeUnit.MILLISECONDS); + + // disable the clean up to avoid race condition during shutdown + response = client().admin() + .cluster() + .prepareUpdateSettings() + .setPersistentSettings(Settings.builder().put(REMOTE_CLUSTER_STATE_CLEANUP_INTERVAL_SETTING.getKey(), "-1")) + .get(); + + assertTrue(response.isAcknowledged()); + } + + private void updateClusterStateNTimes(int n) { + int newReplicaCount = randomIntBetween(0, 3); + for (int i = n; i > 0; i--) { + ClusterUpdateSettingsResponse response = client().admin() + .cluster() + .prepareUpdateSettings() + .setPersistentSettings(Settings.builder().put(CLUSTER_DEFAULT_INDEX_REFRESH_INTERVAL_SETTING.getKey(), i, TimeUnit.SECONDS)) + .get(); + assertTrue(response.isAcknowledged()); + } + } +} diff --git a/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteClusterStateServiceIT.java b/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteClusterStateServiceIT.java index 42120aa32eb47..ab2f0f0080566 100644 --- a/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteClusterStateServiceIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteClusterStateServiceIT.java @@ -10,7 +10,6 @@ import org.opensearch.action.admin.cluster.node.stats.NodesStatsRequest; import org.opensearch.action.admin.cluster.node.stats.NodesStatsResponse; -import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.common.blobstore.BlobPath; import org.opensearch.common.settings.Settings; import org.opensearch.discovery.DiscoveryStats; @@ -27,7 +26,6 @@ import java.util.function.Function; import java.util.stream.Collectors; -import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_REPLICAS; import static org.opensearch.gateway.remote.RemoteClusterStateService.COORDINATION_METADATA; import static org.opensearch.gateway.remote.RemoteClusterStateService.CUSTOM_METADATA; import static org.opensearch.gateway.remote.RemoteClusterStateService.DELIMITER; @@ -51,16 +49,6 @@ protected Settings nodeSettings(int nodeOrdinal) { return Settings.builder().put(super.nodeSettings(nodeOrdinal)).put(REMOTE_CLUSTER_STATE_ENABLED_SETTING.getKey(), true).build(); } - private void prepareCluster(int numClusterManagerNodes, int numDataOnlyNodes, String indices, int replicaCount, int shardCount) { - internalCluster().startClusterManagerOnlyNodes(numClusterManagerNodes); - internalCluster().startDataOnlyNodes(numDataOnlyNodes); - for (String index : indices.split(",")) { - createIndex(index, remoteStoreIndexSettings(replicaCount, shardCount)); - ensureYellowAndNoInitializingShards(index); - ensureGreen(index); - } - } - private Map initialTestSetup(int shardCount, int replicaCount, int dataNodeCount, int clusterManagerNodeCount) { prepareCluster(clusterManagerNodeCount, dataNodeCount, INDEX_NAME, replicaCount, shardCount); Map indexStats = indexData(1, false, INDEX_NAME); @@ -69,49 +57,6 @@ private Map initialTestSetup(int shardCount, int replicaCount, int return indexStats; } - public void testFullClusterRestoreStaleDelete() throws Exception { - int shardCount = randomIntBetween(1, 2); - int replicaCount = 1; - int dataNodeCount = shardCount * (replicaCount + 1); - int clusterManagerNodeCount = 1; - - initialTestSetup(shardCount, replicaCount, dataNodeCount, clusterManagerNodeCount); - setReplicaCount(0); - setReplicaCount(2); - setReplicaCount(0); - setReplicaCount(1); - setReplicaCount(0); - setReplicaCount(1); - setReplicaCount(0); - setReplicaCount(2); - setReplicaCount(0); - - RemoteClusterStateService remoteClusterStateService = internalCluster().getClusterManagerNodeInstance( - RemoteClusterStateService.class - ); - - RepositoriesService repositoriesService = internalCluster().getClusterManagerNodeInstance(RepositoriesService.class); - - BlobStoreRepository repository = (BlobStoreRepository) repositoriesService.repository(REPOSITORY_NAME); - BlobPath baseMetadataPath = repository.basePath() - .add( - Base64.getUrlEncoder() - .withoutPadding() - .encodeToString(getClusterState().getClusterName().value().getBytes(StandardCharsets.UTF_8)) - ) - .add("cluster-state") - .add(getClusterState().metadata().clusterUUID()); - - assertEquals(10, repository.blobStore().blobContainer(baseMetadataPath.add("manifest")).listBlobsByPrefix("manifest").size()); - - Map indexMetadataMap = remoteClusterStateService.getLatestClusterState( - cluster().getClusterName(), - getClusterState().metadata().clusterUUID() - ).getMetadata().getIndices(); - assertEquals(0, indexMetadataMap.values().stream().findFirst().get().getNumberOfReplicas()); - assertEquals(shardCount, indexMetadataMap.values().stream().findFirst().get().getNumberOfShards()); - } - public void testRemoteStateStats() { int shardCount = randomIntBetween(1, 2); int replicaCount = 1; @@ -241,12 +186,4 @@ private void validateNodesStatsResponse(NodesStatsResponse nodesStatsResponse) { assertNotNull(nodesStatsResponse.getNodes().get(0)); assertNotNull(nodesStatsResponse.getNodes().get(0).getDiscoveryStats()); } - - private void setReplicaCount(int replicaCount) { - client().admin() - .indices() - .prepareUpdateSettings(INDEX_NAME) - .setSettings(Settings.builder().put(SETTING_NUMBER_OF_REPLICAS, replicaCount)) - .get(); - } } diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreBaseIntegTestCase.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreBaseIntegTestCase.java index 740aee69f7d80..64efcee6ef1b5 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreBaseIntegTestCase.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreBaseIntegTestCase.java @@ -350,4 +350,14 @@ protected void restore(boolean restoreAllShards, String... indices) { PlainActionFuture.newFuture() ); } + + protected void prepareCluster(int numClusterManagerNodes, int numDataOnlyNodes, String indices, int replicaCount, int shardCount) { + internalCluster().startClusterManagerOnlyNodes(numClusterManagerNodes); + internalCluster().startDataOnlyNodes(numDataOnlyNodes); + for (String index : indices.split(",")) { + createIndex(index, remoteStoreIndexSettings(replicaCount, shardCount)); + ensureYellowAndNoInitializingShards(index); + ensureGreen(index); + } + } } diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreIT.java index 7721b18a4fe6b..96d6338e5913b 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreIT.java @@ -852,7 +852,9 @@ public void testFlushOnTooManyRemoteTranslogFiles() throws Exception { ClusterUpdateSettingsRequest updateSettingsRequest = new ClusterUpdateSettingsRequest(); updateSettingsRequest.persistentSettings( - Settings.builder().put(RemoteStoreSettings.CLUSTER_REMOTE_MAX_TRANSLOG_READERS.getKey(), "100") + Settings.builder() + .put(RemoteStoreSettings.CLUSTER_REMOTE_MAX_TRANSLOG_READERS.getKey(), "100") + .put(CLUSTER_REMOTE_TRANSLOG_BUFFER_INTERVAL_SETTING.getKey(), "0ms") ); assertAcked(client().admin().cluster().updateSettings(updateSettingsRequest).actionGet()); @@ -883,5 +885,27 @@ public void testFlushOnTooManyRemoteTranslogFiles() throws Exception { assertEquals(totalFiles, 1L); } }, 30, TimeUnit.SECONDS); + + // Disabling max translog readers + assertAcked( + internalCluster().client() + .admin() + .cluster() + .prepareUpdateSettings() + .setPersistentSettings(Settings.builder().put(RemoteStoreSettings.CLUSTER_REMOTE_MAX_TRANSLOG_READERS.getKey(), "-1")) + .get() + ); + + // Indexing 500 more docs + for (int i = 0; i < 500; i++) { + indexBulk(INDEX_NAME, 1); + } + + // No flush is triggered since max_translog_readers is set to -1 + // Total tlog files would be incremented by 500 + try (Stream files = Files.list(translogLocation)) { + long totalFiles = files.filter(f -> f.getFileName().toString().endsWith(Translog.TRANSLOG_FILE_SUFFIX)).count(); + assertEquals(totalFiles, 501L); + } } } diff --git a/server/src/internalClusterTest/java/org/opensearch/search/query/SearchQueryIT.java b/server/src/internalClusterTest/java/org/opensearch/search/query/SearchQueryIT.java index a58db51780826..01ad06757640c 100644 --- a/server/src/internalClusterTest/java/org/opensearch/search/query/SearchQueryIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/search/query/SearchQueryIT.java @@ -1914,14 +1914,8 @@ public void testRangeQueryWithTimeZone() throws Exception { * Test range with a custom locale, e.g. "de" in this case. Documents here mention the day of week * as "Mi" for "Mittwoch (Wednesday" and "Do" for "Donnerstag (Thursday)" and the month in the query * as "Dez" for "Dezember (December)". - * Note: this test currently needs the JVM arg `-Djava.locale.providers=SPI,COMPAT` to be set. - * When running with gradle this is done implicitly through the BuildPlugin, but when running from - * an IDE this might need to be set manually in the run configuration. See also CONTRIBUTING.md section - * on "Configuring IDEs And Running Tests". */ public void testRangeQueryWithLocaleMapping() throws Exception { - assert ("SPI,COMPAT".equals(System.getProperty("java.locale.providers"))) : "`-Djava.locale.providers=SPI,COMPAT` needs to be set"; - assertAcked( prepareCreate("test").setMapping( jsonBuilder().startObject() @@ -1938,17 +1932,21 @@ public void testRangeQueryWithLocaleMapping() throws Exception { indexRandom( true, - client().prepareIndex("test").setId("1").setSource("date_field", "Mi, 06 Dez 2000 02:55:00 -0800"), - client().prepareIndex("test").setId("2").setSource("date_field", "Do, 07 Dez 2000 02:55:00 -0800") + client().prepareIndex("test").setId("1").setSource("date_field", "Mi., 06 Dez. 2000 02:55:00 -0800"), + client().prepareIndex("test").setId("2").setSource("date_field", "Do., 07 Dez. 2000 02:55:00 -0800") ); SearchResponse searchResponse = client().prepareSearch("test") - .setQuery(QueryBuilders.rangeQuery("date_field").gte("Di, 05 Dez 2000 02:55:00 -0800").lte("Do, 07 Dez 2000 00:00:00 -0800")) + .setQuery( + QueryBuilders.rangeQuery("date_field").gte("Di., 05 Dez. 2000 02:55:00 -0800").lte("Do., 07 Dez. 2000 00:00:00 -0800") + ) .get(); assertHitCount(searchResponse, 1L); searchResponse = client().prepareSearch("test") - .setQuery(QueryBuilders.rangeQuery("date_field").gte("Di, 05 Dez 2000 02:55:00 -0800").lte("Fr, 08 Dez 2000 00:00:00 -0800")) + .setQuery( + QueryBuilders.rangeQuery("date_field").gte("Di., 05 Dez. 2000 02:55:00 -0800").lte("Fr., 08 Dez. 2000 00:00:00 -0800") + ) .get(); assertHitCount(searchResponse, 2L); } diff --git a/server/src/main/java/org/opensearch/cluster/ClusterManagerMetrics.java b/server/src/main/java/org/opensearch/cluster/ClusterManagerMetrics.java index d48f82a388245..a98349a4af5cd 100644 --- a/server/src/main/java/org/opensearch/cluster/ClusterManagerMetrics.java +++ b/server/src/main/java/org/opensearch/cluster/ClusterManagerMetrics.java @@ -8,6 +8,7 @@ package org.opensearch.cluster; +import org.opensearch.telemetry.metrics.Counter; import org.opensearch.telemetry.metrics.Histogram; import org.opensearch.telemetry.metrics.MetricsRegistry; import org.opensearch.telemetry.metrics.tags.Tags; @@ -23,6 +24,7 @@ public final class ClusterManagerMetrics { private static final String LATENCY_METRIC_UNIT_MS = "ms"; + private static final String COUNTER_METRICS_UNIT = "1"; public final Histogram clusterStateAppliersHistogram; public final Histogram clusterStateListenersHistogram; @@ -30,6 +32,9 @@ public final class ClusterManagerMetrics { public final Histogram clusterStateComputeHistogram; public final Histogram clusterStatePublishHistogram; + public final Counter leaderCheckFailureCounter; + public final Counter followerChecksFailureCounter; + public ClusterManagerMetrics(MetricsRegistry metricsRegistry) { clusterStateAppliersHistogram = metricsRegistry.createHistogram( "cluster.state.appliers.latency", @@ -56,6 +61,16 @@ public ClusterManagerMetrics(MetricsRegistry metricsRegistry) { "Histogram for recording time taken to publish a new cluster state", LATENCY_METRIC_UNIT_MS ); + followerChecksFailureCounter = metricsRegistry.createCounter( + "followers.checker.failure.count", + "Counter for number of failed follower checks", + COUNTER_METRICS_UNIT + ); + leaderCheckFailureCounter = metricsRegistry.createCounter( + "leader.checker.failure.count", + "Counter for number of failed leader checks", + COUNTER_METRICS_UNIT + ); } public void recordLatency(Histogram histogram, Double value) { @@ -69,4 +84,16 @@ public void recordLatency(Histogram histogram, Double value, Optional tags } histogram.record(value, tags.get()); } + + public void incrementCounter(Counter counter, Double value) { + incrementCounter(counter, value, Optional.empty()); + } + + public void incrementCounter(Counter counter, Double value, Optional tags) { + if (Objects.isNull(tags) || tags.isEmpty()) { + counter.add(value); + return; + } + counter.add(value, tags.get()); + } } diff --git a/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java b/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java index 3d74feddfa261..f53e6837a67f5 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java @@ -36,6 +36,7 @@ import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.message.ParameterizedMessage; import org.opensearch.cluster.ClusterChangedEvent; +import org.opensearch.cluster.ClusterManagerMetrics; import org.opensearch.cluster.ClusterName; import org.opensearch.cluster.ClusterState; import org.opensearch.cluster.ClusterStateTaskConfig; @@ -207,7 +208,8 @@ public Coordinator( ElectionStrategy electionStrategy, NodeHealthService nodeHealthService, PersistedStateRegistry persistedStateRegistry, - RemoteStoreNodeService remoteStoreNodeService + RemoteStoreNodeService remoteStoreNodeService, + ClusterManagerMetrics clusterManagerMetrics ) { this.settings = settings; this.transportService = transportService; @@ -261,14 +263,22 @@ public Coordinator( this::handlePublishRequest, this::handleApplyCommit ); - this.leaderChecker = new LeaderChecker(settings, clusterSettings, transportService, this::onLeaderFailure, nodeHealthService); + this.leaderChecker = new LeaderChecker( + settings, + clusterSettings, + transportService, + this::onLeaderFailure, + nodeHealthService, + clusterManagerMetrics + ); this.followersChecker = new FollowersChecker( settings, clusterSettings, transportService, this::onFollowerCheckRequest, this::removeNode, - nodeHealthService + nodeHealthService, + clusterManagerMetrics ); this.nodeRemovalExecutor = new NodeRemovalClusterStateTaskExecutor(allocationService, logger); this.clusterApplier = clusterApplier; diff --git a/server/src/main/java/org/opensearch/cluster/coordination/FollowersChecker.java b/server/src/main/java/org/opensearch/cluster/coordination/FollowersChecker.java index 70bb0515bb022..2ec0dabd91786 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/FollowersChecker.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/FollowersChecker.java @@ -35,6 +35,7 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.message.ParameterizedMessage; +import org.opensearch.cluster.ClusterManagerMetrics; import org.opensearch.cluster.coordination.Coordinator.Mode; import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.cluster.node.DiscoveryNodes; @@ -127,6 +128,7 @@ public class FollowersChecker { private final TransportService transportService; private final NodeHealthService nodeHealthService; private volatile FastResponseState fastResponseState; + private ClusterManagerMetrics clusterManagerMetrics; public FollowersChecker( Settings settings, @@ -134,7 +136,8 @@ public FollowersChecker( TransportService transportService, Consumer handleRequestAndUpdateState, BiConsumer onNodeFailure, - NodeHealthService nodeHealthService + NodeHealthService nodeHealthService, + ClusterManagerMetrics clusterManagerMetrics ) { this.settings = settings; this.transportService = transportService; @@ -161,6 +164,7 @@ public void onNodeDisconnected(DiscoveryNode node, Transport.Connection connecti handleDisconnectedNode(node); } }); + this.clusterManagerMetrics = clusterManagerMetrics; } private void setFollowerCheckTimeout(TimeValue followerCheckTimeout) { @@ -413,6 +417,7 @@ public String executor() { } void failNode(String reason) { + clusterManagerMetrics.incrementCounter(clusterManagerMetrics.followerChecksFailureCounter, 1.0); transportService.getThreadPool().generic().execute(new Runnable() { @Override public void run() { diff --git a/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java b/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java index 5475470b81b93..f77a7ffc8ce8e 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java @@ -511,11 +511,27 @@ private static void ensureRemoteStoreNodesCompatibility(DiscoveryNode joiningNod assert existingNodes.isEmpty() == false; CompatibilityMode remoteStoreCompatibilityMode = REMOTE_STORE_COMPATIBILITY_MODE_SETTING.get(metadata.settings()); - if (STRICT.equals(remoteStoreCompatibilityMode)) { - DiscoveryNode existingNode = existingNodes.get(0); + List reposToSkip = new ArrayList<>(1); + Optional remoteRoutingTableNode = existingNodes.stream() + .filter( + node -> node.getAttributes().get(RemoteStoreNodeAttribute.REMOTE_STORE_ROUTING_TABLE_REPOSITORY_NAME_ATTRIBUTE_KEY) != null + ) + .findFirst(); + // If none of the existing nodes have routing table repo, then we skip this repo check if present in joining node. + // This ensures a new node with remote routing table repo is able to join the cluster. + if (remoteRoutingTableNode.isEmpty()) { + String joiningNodeRepoName = joiningNode.getAttributes() + .get(RemoteStoreNodeAttribute.REMOTE_STORE_ROUTING_TABLE_REPOSITORY_NAME_ATTRIBUTE_KEY); + if (joiningNodeRepoName != null) { + reposToSkip.add(joiningNodeRepoName); + } + } + + if (STRICT.equals(remoteStoreCompatibilityMode)) { + DiscoveryNode existingNode = remoteRoutingTableNode.orElseGet(() -> existingNodes.get(0)); if (joiningNode.isRemoteStoreNode()) { - ensureRemoteStoreNodesCompatibility(joiningNode, existingNode); + ensureRemoteStoreNodesCompatibility(joiningNode, existingNode, reposToSkip); } else { if (existingNode.isRemoteStoreNode()) { throw new IllegalStateException( @@ -537,19 +553,25 @@ private static void ensureRemoteStoreNodesCompatibility(DiscoveryNode joiningNod throw new IllegalStateException(reason); } if (joiningNode.isRemoteStoreNode()) { - Optional remoteDN = existingNodes.stream().filter(DiscoveryNode::isRemoteStoreNode).findFirst(); - remoteDN.ifPresent(discoveryNode -> ensureRemoteStoreNodesCompatibility(joiningNode, discoveryNode)); + Optional remoteDN = remoteRoutingTableNode.isPresent() + ? remoteRoutingTableNode + : existingNodes.stream().filter(DiscoveryNode::isRemoteStoreNode).findFirst(); + remoteDN.ifPresent(discoveryNode -> ensureRemoteStoreNodesCompatibility(joiningNode, discoveryNode, reposToSkip)); } } } } - private static void ensureRemoteStoreNodesCompatibility(DiscoveryNode joiningNode, DiscoveryNode existingNode) { + private static void ensureRemoteStoreNodesCompatibility( + DiscoveryNode joiningNode, + DiscoveryNode existingNode, + List reposToSkip + ) { if (joiningNode.isRemoteStoreNode()) { if (existingNode.isRemoteStoreNode()) { RemoteStoreNodeAttribute joiningRemoteStoreNodeAttribute = new RemoteStoreNodeAttribute(joiningNode); RemoteStoreNodeAttribute existingRemoteStoreNodeAttribute = new RemoteStoreNodeAttribute(existingNode); - if (existingRemoteStoreNodeAttribute.equals(joiningRemoteStoreNodeAttribute) == false) { + if (existingRemoteStoreNodeAttribute.equalsWithRepoSkip(joiningRemoteStoreNodeAttribute, reposToSkip) == false) { throw new IllegalStateException( "a remote store node [" + joiningNode diff --git a/server/src/main/java/org/opensearch/cluster/coordination/LeaderChecker.java b/server/src/main/java/org/opensearch/cluster/coordination/LeaderChecker.java index 8d4373b865f62..4fd2c0eb13073 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/LeaderChecker.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/LeaderChecker.java @@ -36,6 +36,7 @@ import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.message.ParameterizedMessage; import org.opensearch.OpenSearchException; +import org.opensearch.cluster.ClusterManagerMetrics; import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.cluster.node.DiscoveryNodes; import org.opensearch.common.Nullable; @@ -119,17 +120,17 @@ public class LeaderChecker { private final TransportService transportService; private final Consumer onLeaderFailure; private final NodeHealthService nodeHealthService; - private AtomicReference currentChecker = new AtomicReference<>(); - private volatile DiscoveryNodes discoveryNodes; + private final ClusterManagerMetrics clusterManagerMetrics; LeaderChecker( final Settings settings, final ClusterSettings clusterSettings, final TransportService transportService, final Consumer onLeaderFailure, - NodeHealthService nodeHealthService + NodeHealthService nodeHealthService, + final ClusterManagerMetrics clusterManagerMetrics ) { this.settings = settings; leaderCheckInterval = LEADER_CHECK_INTERVAL_SETTING.get(settings); @@ -138,6 +139,7 @@ public class LeaderChecker { this.transportService = transportService; this.onLeaderFailure = onLeaderFailure; this.nodeHealthService = nodeHealthService; + this.clusterManagerMetrics = clusterManagerMetrics; clusterSettings.addSettingsUpdateConsumer(LEADER_CHECK_TIMEOUT_SETTING, this::setLeaderCheckTimeout); transportService.registerRequestHandler( @@ -293,7 +295,6 @@ public void handleResponse(Empty response) { logger.debug("closed check scheduler received a response, doing nothing"); return; } - failureCountSinceLastSuccess.set(0); scheduleNextWakeUp(); // logs trace message indicating success } @@ -304,7 +305,6 @@ public void handleException(TransportException exp) { logger.debug("closed check scheduler received a response, doing nothing"); return; } - if (exp instanceof ConnectTransportException || exp.getCause() instanceof ConnectTransportException) { logger.debug(new ParameterizedMessage("leader [{}] disconnected during check", leader), exp); leaderFailed(new ConnectTransportException(leader, "disconnected during check", exp)); @@ -355,6 +355,7 @@ public String executor() { void leaderFailed(Exception e) { if (isClosed.compareAndSet(false, true)) { + clusterManagerMetrics.incrementCounter(clusterManagerMetrics.leaderCheckFailureCounter, 1.0); transportService.getThreadPool().generic().execute(new Runnable() { @Override public void run() { diff --git a/server/src/main/java/org/opensearch/cluster/metadata/RepositoriesMetadata.java b/server/src/main/java/org/opensearch/cluster/metadata/RepositoriesMetadata.java index 9b52bdd1b16c5..4b3dc7964a87b 100644 --- a/server/src/main/java/org/opensearch/cluster/metadata/RepositoriesMetadata.java +++ b/server/src/main/java/org/opensearch/cluster/metadata/RepositoriesMetadata.java @@ -51,8 +51,10 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Collections; +import java.util.Comparator; import java.util.EnumSet; import java.util.List; +import java.util.stream.Collectors; import static org.opensearch.repositories.blobstore.BlobStoreRepository.SYSTEM_REPOSITORY_SETTING; @@ -164,6 +166,40 @@ public boolean equalsIgnoreGenerations(@Nullable RepositoriesMetadata other) { return true; } + /** + * Checks if this instance and the give instance share the same repositories, with option to skip checking for a list of repos. + * This will support + * @param other other repositories metadata + * @param reposToSkip list of repos to skip check for equality + * @return {@code true} iff both instances contain the same repositories apart from differences in generations, not including repos provided in reposToSkip. + */ + public boolean equalsIgnoreGenerationsWithRepoSkip(@Nullable RepositoriesMetadata other, List reposToSkip) { + if (other == null) { + return false; + } + List currentRepositories = repositories.stream() + .filter(repo -> !reposToSkip.contains(repo.name())) + .collect(Collectors.toList()); + List otherRepositories = other.repositories.stream() + .filter(repo -> !reposToSkip.contains(repo.name())) + .collect(Collectors.toList()); + + if (otherRepositories.size() != currentRepositories.size()) { + return false; + } + // Sort repos by name for ordered comparison + Comparator compareByName = (o1, o2) -> o1.name().compareTo(o2.name()); + currentRepositories.sort(compareByName); + otherRepositories.sort(compareByName); + + for (int i = 0; i < currentRepositories.size(); i++) { + if (currentRepositories.get(i).equalsIgnoreGenerations(otherRepositories.get(i)) == false) { + return false; + } + } + return true; + } + @Override public int hashCode() { return repositories.hashCode(); diff --git a/server/src/main/java/org/opensearch/cluster/routing/remote/RemoteRoutingTableService.java b/server/src/main/java/org/opensearch/cluster/routing/remote/RemoteRoutingTableService.java new file mode 100644 index 0000000000000..ba2208e17df1f --- /dev/null +++ b/server/src/main/java/org/opensearch/cluster/routing/remote/RemoteRoutingTableService.java @@ -0,0 +1,67 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.routing.remote; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.opensearch.common.lifecycle.AbstractLifecycleComponent; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.util.io.IOUtils; +import org.opensearch.node.Node; +import org.opensearch.node.remotestore.RemoteStoreNodeAttribute; +import org.opensearch.repositories.RepositoriesService; +import org.opensearch.repositories.Repository; +import org.opensearch.repositories.blobstore.BlobStoreRepository; + +import java.io.IOException; +import java.util.function.Supplier; + +import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.isRemoteRoutingTableEnabled; + +/** + * A Service which provides APIs to upload and download routing table from remote store. + * + * @opensearch.internal + */ +public class RemoteRoutingTableService extends AbstractLifecycleComponent { + + private static final Logger logger = LogManager.getLogger(RemoteRoutingTableService.class); + private final Settings settings; + private final Supplier repositoriesService; + private BlobStoreRepository blobStoreRepository; + + public RemoteRoutingTableService(Supplier repositoriesService, Settings settings) { + assert isRemoteRoutingTableEnabled(settings) : "Remote routing table is not enabled"; + this.repositoriesService = repositoriesService; + this.settings = settings; + } + + @Override + protected void doClose() throws IOException { + if (blobStoreRepository != null) { + IOUtils.close(blobStoreRepository); + } + } + + @Override + protected void doStart() { + assert isRemoteRoutingTableEnabled(settings) == true : "Remote routing table is not enabled"; + final String remoteStoreRepo = settings.get( + Node.NODE_ATTRIBUTES.getKey() + RemoteStoreNodeAttribute.REMOTE_STORE_ROUTING_TABLE_REPOSITORY_NAME_ATTRIBUTE_KEY + ); + assert remoteStoreRepo != null : "Remote routing table repository is not configured"; + final Repository repository = repositoriesService.get().repository(remoteStoreRepo); + assert repository instanceof BlobStoreRepository : "Repository should be instance of BlobStoreRepository"; + blobStoreRepository = (BlobStoreRepository) repository; + } + + @Override + protected void doStop() {} + +} diff --git a/server/src/main/java/org/opensearch/cluster/routing/remote/package-info.java b/server/src/main/java/org/opensearch/cluster/routing/remote/package-info.java new file mode 100644 index 0000000000000..9fe016e783f20 --- /dev/null +++ b/server/src/main/java/org/opensearch/cluster/routing/remote/package-info.java @@ -0,0 +1,10 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/** Package containing class to perform operations on remote routing table */ +package org.opensearch.cluster.routing.remote; diff --git a/server/src/main/java/org/opensearch/common/remote/AbstractRemoteWritableBlobEntity.java b/server/src/main/java/org/opensearch/common/remote/AbstractRemoteWritableBlobEntity.java new file mode 100644 index 0000000000000..632b2b70d61df --- /dev/null +++ b/server/src/main/java/org/opensearch/common/remote/AbstractRemoteWritableBlobEntity.java @@ -0,0 +1,91 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.common.remote; + +import org.opensearch.common.blobstore.BlobPath; +import org.opensearch.core.compress.Compressor; +import org.opensearch.core.xcontent.NamedXContentRegistry; +import org.opensearch.gateway.remote.ClusterMetadataManifest.UploadedMetadata; + +import static org.opensearch.gateway.remote.RemoteClusterStateUtils.PATH_DELIMITER; + +/** + * An extension of {@link RemoteWriteableEntity} class which caters to the use case of writing to and reading from a blob storage + * + * @param The class type which can be uploaded to or downloaded from a blob storage. + */ +public abstract class AbstractRemoteWritableBlobEntity implements RemoteWriteableEntity { + + protected String blobFileName; + + protected String blobName; + private final String clusterUUID; + private final Compressor compressor; + private final NamedXContentRegistry namedXContentRegistry; + private String[] pathTokens; + + public AbstractRemoteWritableBlobEntity( + final String clusterUUID, + final Compressor compressor, + final NamedXContentRegistry namedXContentRegistry + ) { + this.clusterUUID = clusterUUID; + this.compressor = compressor; + this.namedXContentRegistry = namedXContentRegistry; + } + + public abstract BlobPathParameters getBlobPathParameters(); + + public String getFullBlobName() { + return blobName; + } + + public String getBlobFileName() { + if (blobFileName == null) { + String[] pathTokens = getBlobPathTokens(); + if (pathTokens == null || pathTokens.length < 1) { + return null; + } + blobFileName = pathTokens[pathTokens.length - 1]; + } + return blobFileName; + } + + public String[] getBlobPathTokens() { + if (pathTokens != null) { + return pathTokens; + } + if (blobName == null) { + return null; + } + pathTokens = blobName.split(PATH_DELIMITER); + return pathTokens; + } + + public abstract String generateBlobFileName(); + + public String clusterUUID() { + return clusterUUID; + } + + public abstract UploadedMetadata getUploadedMetadata(); + + public void setFullBlobName(BlobPath blobPath) { + this.blobName = blobPath.buildAsString() + blobFileName; + } + + public NamedXContentRegistry getNamedXContentRegistry() { + return namedXContentRegistry; + } + + protected Compressor getCompressor() { + return compressor; + } + +} diff --git a/server/src/main/java/org/opensearch/common/remote/BlobPathParameters.java b/server/src/main/java/org/opensearch/common/remote/BlobPathParameters.java new file mode 100644 index 0000000000000..58c73a804b66a --- /dev/null +++ b/server/src/main/java/org/opensearch/common/remote/BlobPathParameters.java @@ -0,0 +1,34 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.common.remote; + +import java.util.List; + +/** + * Parameters which can be used to construct a blob path + * + */ +public class BlobPathParameters { + + private final List pathTokens; + private final String filePrefix; + + public BlobPathParameters(final List pathTokens, final String filePrefix) { + this.pathTokens = pathTokens; + this.filePrefix = filePrefix; + } + + public List getPathTokens() { + return pathTokens; + } + + public String getFilePrefix() { + return filePrefix; + } +} diff --git a/server/src/main/java/org/opensearch/common/remote/RemoteWritableEntityStore.java b/server/src/main/java/org/opensearch/common/remote/RemoteWritableEntityStore.java new file mode 100644 index 0000000000000..ccf7cafff1730 --- /dev/null +++ b/server/src/main/java/org/opensearch/common/remote/RemoteWritableEntityStore.java @@ -0,0 +1,28 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.common.remote; + +import org.opensearch.core.action.ActionListener; + +import java.io.IOException; + +/** + * An interface to read/write an object from/to a remote storage. This interface is agnostic of the remote storage type. + * + * @param The object type which can be uploaded to or downloaded from remote storage. + * @param The wrapper entity which provides methods for serializing/deserializing entity T. + */ +public interface RemoteWritableEntityStore> { + + public void writeAsync(U entity, ActionListener listener); + + public T read(U entity) throws IOException; + + public void readAsync(U entity, ActionListener listener); +} diff --git a/server/src/main/java/org/opensearch/common/remote/RemoteWriteableEntity.java b/server/src/main/java/org/opensearch/common/remote/RemoteWriteableEntity.java new file mode 100644 index 0000000000000..778c24dce2e27 --- /dev/null +++ b/server/src/main/java/org/opensearch/common/remote/RemoteWriteableEntity.java @@ -0,0 +1,34 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.common.remote; + +import java.io.IOException; +import java.io.InputStream; + +/** + * An interface to which provides defines the serialization/deserialization methods for objects to be uploaded to or downloaded from remote store. + * This interface is agnostic of the remote storage type. + * + * @param The object type which can be uploaded to or downloaded from remote storage. + */ +public interface RemoteWriteableEntity { + /** + * @return An InputStream created by serializing the entity T + * @throws IOException Exception encountered while serialization + */ + public InputStream serialize() throws IOException; + + /** + * @param inputStream The InputStream which is used to read the serialized entity + * @return The entity T after deserialization + * @throws IOException Exception encountered while deserialization + */ + public T deserialize(InputStream inputStream) throws IOException; + +} diff --git a/server/src/main/java/org/opensearch/common/remote/package-info.java b/server/src/main/java/org/opensearch/common/remote/package-info.java new file mode 100644 index 0000000000000..08ff9e910dc98 --- /dev/null +++ b/server/src/main/java/org/opensearch/common/remote/package-info.java @@ -0,0 +1,11 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ +/** + * Common remote store package + */ +package org.opensearch.common.remote; diff --git a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java index 7814518af471b..297fc98764d07 100644 --- a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java +++ b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java @@ -104,6 +104,7 @@ import org.opensearch.gateway.GatewayService; import org.opensearch.gateway.PersistedClusterStateService; import org.opensearch.gateway.ShardsBatchGatewayAllocator; +import org.opensearch.gateway.remote.RemoteClusterStateCleanupManager; import org.opensearch.gateway.remote.RemoteClusterStateService; import org.opensearch.http.HttpTransportSettings; import org.opensearch.index.IndexModule; @@ -711,6 +712,7 @@ public void apply(Settings value, Settings current, Settings previous) { SearchRequestSlowLog.CLUSTER_SEARCH_REQUEST_SLOWLOG_LEVEL, // Remote cluster state settings + RemoteClusterStateCleanupManager.REMOTE_CLUSTER_STATE_CLEANUP_INTERVAL_SETTING, RemoteClusterStateService.REMOTE_CLUSTER_STATE_ENABLED_SETTING, RemoteClusterStateService.INDEX_METADATA_UPLOAD_TIMEOUT_SETTING, RemoteClusterStateService.GLOBAL_METADATA_UPLOAD_TIMEOUT_SETTING, diff --git a/server/src/main/java/org/opensearch/common/settings/FeatureFlagSettings.java b/server/src/main/java/org/opensearch/common/settings/FeatureFlagSettings.java index 7a364de1c5dc6..238df1bd90113 100644 --- a/server/src/main/java/org/opensearch/common/settings/FeatureFlagSettings.java +++ b/server/src/main/java/org/opensearch/common/settings/FeatureFlagSettings.java @@ -36,6 +36,7 @@ protected FeatureFlagSettings( FeatureFlags.DATETIME_FORMATTER_CACHING_SETTING, FeatureFlags.TIERED_REMOTE_INDEX_SETTING, FeatureFlags.REMOTE_STORE_MIGRATION_EXPERIMENTAL_SETTING, - FeatureFlags.PLUGGABLE_CACHE_SETTING + FeatureFlags.PLUGGABLE_CACHE_SETTING, + FeatureFlags.REMOTE_PUBLICATION_EXPERIMENTAL_SETTING ); } diff --git a/server/src/main/java/org/opensearch/common/util/FeatureFlags.java b/server/src/main/java/org/opensearch/common/util/FeatureFlags.java index 62cfbd861d4d9..82f43921d2d28 100644 --- a/server/src/main/java/org/opensearch/common/util/FeatureFlags.java +++ b/server/src/main/java/org/opensearch/common/util/FeatureFlags.java @@ -67,6 +67,11 @@ public class FeatureFlags { */ public static final String PLUGGABLE_CACHE = "opensearch.experimental.feature.pluggable.caching.enabled"; + /** + * Gates the functionality of remote routing table. + */ + public static final String REMOTE_PUBLICATION_EXPERIMENTAL = "opensearch.experimental.feature.remote_store.publication.enabled"; + public static final Setting REMOTE_STORE_MIGRATION_EXPERIMENTAL_SETTING = Setting.boolSetting( REMOTE_STORE_MIGRATION_EXPERIMENTAL, false, @@ -89,6 +94,12 @@ public class FeatureFlags { public static final Setting PLUGGABLE_CACHE_SETTING = Setting.boolSetting(PLUGGABLE_CACHE, false, Property.NodeScope); + public static final Setting REMOTE_PUBLICATION_EXPERIMENTAL_SETTING = Setting.boolSetting( + REMOTE_PUBLICATION_EXPERIMENTAL, + false, + Property.NodeScope + ); + private static final List> ALL_FEATURE_FLAG_SETTINGS = List.of( REMOTE_STORE_MIGRATION_EXPERIMENTAL_SETTING, EXTENSIONS_SETTING, @@ -96,7 +107,8 @@ public class FeatureFlags { TELEMETRY_SETTING, DATETIME_FORMATTER_CACHING_SETTING, TIERED_REMOTE_INDEX_SETTING, - PLUGGABLE_CACHE_SETTING + PLUGGABLE_CACHE_SETTING, + REMOTE_PUBLICATION_EXPERIMENTAL_SETTING ); /** * Should store the settings from opensearch.yml. diff --git a/server/src/main/java/org/opensearch/discovery/DiscoveryModule.java b/server/src/main/java/org/opensearch/discovery/DiscoveryModule.java index 288371aa240a0..538dea5b2e60b 100644 --- a/server/src/main/java/org/opensearch/discovery/DiscoveryModule.java +++ b/server/src/main/java/org/opensearch/discovery/DiscoveryModule.java @@ -34,6 +34,7 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.opensearch.cluster.ClusterManagerMetrics; import org.opensearch.cluster.ClusterState; import org.opensearch.cluster.coordination.Coordinator; import org.opensearch.cluster.coordination.ElectionStrategy; @@ -133,7 +134,8 @@ public DiscoveryModule( RerouteService rerouteService, NodeHealthService nodeHealthService, PersistedStateRegistry persistedStateRegistry, - RemoteStoreNodeService remoteStoreNodeService + RemoteStoreNodeService remoteStoreNodeService, + ClusterManagerMetrics clusterManagerMetrics ) { final Collection> joinValidators = new ArrayList<>(); final Map> hostProviders = new HashMap<>(); @@ -211,7 +213,8 @@ public DiscoveryModule( electionStrategy, nodeHealthService, persistedStateRegistry, - remoteStoreNodeService + remoteStoreNodeService, + clusterManagerMetrics ); } else { throw new IllegalArgumentException("Unknown discovery type [" + discoveryType + "]"); diff --git a/server/src/main/java/org/opensearch/gateway/remote/ClusterMetadataManifest.java b/server/src/main/java/org/opensearch/gateway/remote/ClusterMetadataManifest.java index 0242e94cc4da1..4337d539ef425 100644 --- a/server/src/main/java/org/opensearch/gateway/remote/ClusterMetadataManifest.java +++ b/server/src/main/java/org/opensearch/gateway/remote/ClusterMetadataManifest.java @@ -43,7 +43,7 @@ public class ClusterMetadataManifest implements Writeable, ToXContentFragment { public static final int CODEC_V0 = 0; // Older codec version, where we haven't introduced codec versions for manifest. public static final int CODEC_V1 = 1; // In Codec V1 we have introduced global-metadata and codec version in Manifest file. public static final int CODEC_V2 = 2; // In Codec V2, there are seperate metadata files rather than a single global metadata file. - public static final int CODEC_V3 = 3; // In Codec V3, we introduce index routing-metadata, diff and other attributes as part of manifest required for state publication + public static final int CODEC_V3 = 3; // In Codec V3, we introduce index routing-metadata in manifest file. private static final ParseField CLUSTER_TERM_FIELD = new ParseField("cluster_term"); private static final ParseField STATE_VERSION_FIELD = new ParseField("state_version"); @@ -61,6 +61,8 @@ public class ClusterMetadataManifest implements Writeable, ToXContentFragment { private static final ParseField UPLOADED_SETTINGS_METADATA = new ParseField("uploaded_settings_metadata"); private static final ParseField UPLOADED_TEMPLATES_METADATA = new ParseField("uploaded_templates_metadata"); private static final ParseField UPLOADED_CUSTOM_METADATA = new ParseField("uploaded_custom_metadata"); + private static final ParseField ROUTING_TABLE_VERSION_FIELD = new ParseField("routing_table_version"); + private static final ParseField INDICES_ROUTING_FIELD = new ParseField("indices_routing"); private static final ParseField METADATA_VERSION = new ParseField("metadata_version"); private static final ParseField UPLOADED_TRANSIENT_SETTINGS_METADATA = new ParseField("uploaded_transient_settings_metadata"); private static final ParseField UPLOADED_DISCOVERY_NODES_METADATA = new ParseField("uploaded_discovery_nodes_metadata"); @@ -98,6 +100,8 @@ private static ClusterMetadataManifest.Builder manifestV2Builder(Object[] fields private static ClusterMetadataManifest.Builder manifestV3Builder(Object[] fields) { return manifestV2Builder(fields) + .routingTableVersion(routingTableVersion(fields)) + .indicesRouting(indicesRouting(fields)) .discoveryNodesMetadata(discoveryNodesMetadata(fields)) .clusterBlocksMetadata(clusterBlocksMetadata(fields)) .diffManifest(diffManifest(fields)) @@ -172,33 +176,41 @@ private static Map customMetadata(Object[] fi return customs.stream().collect(Collectors.toMap(UploadedMetadataAttribute::getAttributeName, Function.identity())); } + private static long routingTableVersion(Object[] fields) { + return (long) fields[15]; + } + + private static List indicesRouting(Object[] fields) { + return (List) fields[16]; + } + private static UploadedMetadataAttribute discoveryNodesMetadata(Object[] fields) { - return (UploadedMetadataAttribute) fields[15]; + return (UploadedMetadataAttribute) fields[17]; } private static UploadedMetadataAttribute clusterBlocksMetadata(Object[] fields) { - return (UploadedMetadataAttribute) fields[16]; + return (UploadedMetadataAttribute) fields[18]; } private static long metadataVersion(Object[] fields) { - return (long) fields[17]; + return (long) fields[19]; } private static UploadedMetadataAttribute transientSettingsMetadata(Object[] fields) { - return (UploadedMetadataAttribute) fields[18]; + return (UploadedMetadataAttribute) fields[20]; } private static UploadedMetadataAttribute hashesOfConsistentSettings(Object[] fields) { - return (UploadedMetadataAttribute) fields[19]; + return (UploadedMetadataAttribute) fields[21]; } private static Map clusterStateCustomMetadata(Object[] fields) { - List customs = (List) fields[20]; + List customs = (List) fields[22]; return customs.stream().collect(Collectors.toMap(UploadedMetadataAttribute::getAttributeName, Function.identity())); } private static ClusterStateDiffManifest diffManifest(Object[] fields) { - return (ClusterStateDiffManifest) fields[21]; + return (ClusterStateDiffManifest) fields[23]; } private static final ConstructingObjectParser PARSER_V0 = new ConstructingObjectParser<>( @@ -221,7 +233,7 @@ private static ClusterStateDiffManifest diffManifest(Object[] fields) { fields -> manifestV3Builder(fields).build() ); - private static final ConstructingObjectParser CURRENT_PARSER = PARSER_V2; + private static final ConstructingObjectParser CURRENT_PARSER = PARSER_V3; static { declareParser(PARSER_V0, CODEC_V0); @@ -273,6 +285,12 @@ private static void declareParser(ConstructingObjectParser= CODEC_V3) { + parser.declareLong(ConstructingObjectParser.constructorArg(), ROUTING_TABLE_VERSION_FIELD); + parser.declareObjectArray( + ConstructingObjectParser.constructorArg(), + (p, c) -> UploadedIndexMetadata.fromXContent(p), + INDICES_ROUTING_FIELD + ); parser.declareNamedObject( ConstructingObjectParser.optionalConstructorArg(), UploadedMetadataAttribute.PARSER, @@ -323,6 +341,8 @@ private static void declareParser(ConstructingObjectParser indicesRouting; private final long metadataVersion; private final UploadedMetadataAttribute uploadedTransientSettingsMetadata; private final UploadedMetadataAttribute uploadedDiscoveryNodesMetadata; @@ -434,6 +454,14 @@ public boolean hasMetadataAttributesFiles() { || !uploadedCustomMetadataMap.isEmpty(); } + public long getRoutingTableVersion() { + return routingTableVersion; + } + + public List getIndicesRouting() { + return indicesRouting; + } + public ClusterMetadataManifest( long clusterTerm, long version, @@ -451,6 +479,8 @@ public ClusterMetadataManifest( UploadedMetadataAttribute uploadedSettingsMetadata, UploadedMetadataAttribute uploadedTemplatesMetadata, Map uploadedCustomMetadataMap, + long routingTableVersion, + List indicesRouting, long metadataVersion, UploadedMetadataAttribute discoveryNodesMetadata, UploadedMetadataAttribute clusterBlocksMetadata, @@ -471,6 +501,8 @@ public ClusterMetadataManifest( this.indices = Collections.unmodifiableList(indices); this.previousClusterUUID = previousClusterUUID; this.clusterUUIDCommitted = clusterUUIDCommitted; + this.routingTableVersion = routingTableVersion; + this.indicesRouting = Collections.unmodifiableList(indicesRouting); this.uploadedCoordinationMetadata = uploadedCoordinationMetadata; this.uploadedSettingsMetadata = uploadedSettingsMetadata; this.uploadedTemplatesMetadata = uploadedTemplatesMetadata; @@ -509,6 +541,8 @@ public ClusterMetadataManifest(StreamInput in) throws IOException { ); this.globalMetadataFileName = null; // ToDo: change the version check and initialize these + this.routingTableVersion = in.readLong(); + this.indicesRouting = Collections.unmodifiableList(in.readList(UploadedIndexMetadata::new)); this.metadataVersion = in.readLong(); this.uploadedDiscoveryNodesMetadata = new UploadedMetadataAttribute(in); this.uploadedClusterBlocksMetadata = new UploadedMetadataAttribute(in); @@ -530,6 +564,8 @@ public ClusterMetadataManifest(StreamInput in) throws IOException { this.uploadedSettingsMetadata = null; this.uploadedTemplatesMetadata = null; this.uploadedCustomMetadataMap = null; + this.routingTableVersion = -1; + this.indicesRouting = null; this.uploadedDiscoveryNodesMetadata = null; this.uploadedClusterBlocksMetadata = null; this.diffManifest = null; @@ -595,6 +631,16 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws builder.field(GLOBAL_METADATA_FIELD.getPreferredName(), getGlobalMetadataFileName()); } if (onOrAfterCodecVersion(CODEC_V3)) { + builder.field(ROUTING_TABLE_VERSION_FIELD.getPreferredName(), getRoutingTableVersion()); + builder.startArray(INDICES_ROUTING_FIELD.getPreferredName()); + { + for (UploadedIndexMetadata uploadedIndexMetadata : indicesRouting) { + builder.startObject(); + uploadedIndexMetadata.toXContent(builder, params); + builder.endObject(); + } + } + builder.endArray(); if (getDiscoveryNodesMetadata() != null) { builder.startObject(UPLOADED_DISCOVERY_NODES_METADATA.getPreferredName()); getDiscoveryNodesMetadata().toXContent(builder, params); @@ -648,6 +694,8 @@ public void writeTo(StreamOutput out) throws IOException { uploadedSettingsMetadata.writeTo(out); uploadedTemplatesMetadata.writeTo(out); out.writeMap(uploadedCustomMetadataMap, StreamOutput::writeString, (o, v) -> v.writeTo(o)); + out.writeLong(routingTableVersion); + out.writeCollection(indicesRouting); out.writeLong(metadataVersion); uploadedDiscoveryNodesMetadata.writeTo(out); uploadedClusterBlocksMetadata.writeTo(out); @@ -682,6 +730,8 @@ public boolean equals(Object o) { && Objects.equals(clusterUUIDCommitted, that.clusterUUIDCommitted) && Objects.equals(globalMetadataFileName, that.globalMetadataFileName) && Objects.equals(codecVersion, that.codecVersion) + && Objects.equals(routingTableVersion, that.routingTableVersion) + && Objects.equals(indicesRouting, that.indicesRouting) && Objects.equals(uploadedCoordinationMetadata, that.uploadedCoordinationMetadata) && Objects.equals(uploadedSettingsMetadata, that.uploadedSettingsMetadata) && Objects.equals(uploadedTemplatesMetadata, that.uploadedTemplatesMetadata) @@ -710,6 +760,8 @@ public int hashCode() { committed, previousClusterUUID, clusterUUIDCommitted, + routingTableVersion, + indicesRouting, uploadedCoordinationMetadata, uploadedSettingsMetadata, uploadedTemplatesMetadata, @@ -740,6 +792,10 @@ public static ClusterMetadataManifest fromXContentV1(XContentParser parser) thro return PARSER_V1.parse(parser, null); } + public static ClusterMetadataManifest fromXContentV2(XContentParser parser) throws IOException { + return PARSER_V2.parse(parser, null); + } + public static ClusterMetadataManifest fromXContent(XContentParser parser) throws IOException { return CURRENT_PARSER.parse(parser, null); } @@ -767,6 +823,8 @@ public static class Builder { private String previousClusterUUID; private boolean committed; private boolean clusterUUIDCommitted; + private long routingTableVersion; + private List indicesRouting; private long metadataVersion; private UploadedMetadataAttribute discoveryNodesMetadata; private UploadedMetadataAttribute clusterBlocksMetadata; @@ -780,6 +838,16 @@ public Builder indices(List indices) { return this; } + public Builder routingTableVersion(long routingTableVersion) { + this.routingTableVersion = routingTableVersion; + return this; + } + + public Builder indicesRouting(List indicesRouting) { + this.indicesRouting = indicesRouting; + return this; + } + public Builder codecVersion(int codecVersion) { this.codecVersion = codecVersion; return this; @@ -854,6 +922,10 @@ public List getIndices() { return indices; } + public List getIndicesRouting() { + return indicesRouting; + } + public Builder previousClusterUUID(String previousClusterUUID) { this.previousClusterUUID = previousClusterUUID; return this; @@ -903,6 +975,7 @@ public Builder() { indices = new ArrayList<>(); customMetadataMap = new HashMap<>(); clusterStateCustomMetadataMap = new HashMap<>(); + indicesRouting = new ArrayList<>(); } public Builder(ClusterMetadataManifest manifest) { @@ -922,6 +995,8 @@ public Builder(ClusterMetadataManifest manifest) { this.indices = new ArrayList<>(manifest.indices); this.previousClusterUUID = manifest.previousClusterUUID; this.clusterUUIDCommitted = manifest.clusterUUIDCommitted; + this.routingTableVersion = manifest.routingTableVersion; + this.indicesRouting = new ArrayList<>(manifest.indicesRouting); this.diffManifest = manifest.diffManifest; this.hashesOfConsistentSettings = manifest.uploadedHashesOfConsistentSettings; this.clusterStateCustomMetadataMap = manifest.uploadedClusterStateCustomMap; @@ -945,6 +1020,8 @@ public ClusterMetadataManifest build() { settingsMetadata, templatesMetadata, customMetadataMap, + routingTableVersion, + indicesRouting, metadataVersion, discoveryNodesMetadata, clusterBlocksMetadata, @@ -1051,11 +1128,9 @@ public String getIndexUUID() { @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { - return builder.startObject() - .field(INDEX_NAME_FIELD.getPreferredName(), getIndexName()) + return builder.field(INDEX_NAME_FIELD.getPreferredName(), getIndexName()) .field(INDEX_UUID_FIELD.getPreferredName(), getIndexUUID()) - .field(UPLOADED_FILENAME_FIELD.getPreferredName(), getUploadedFilePath()) - .endObject(); + .field(UPLOADED_FILENAME_FIELD.getPreferredName(), getUploadedFilePath()); } @Override diff --git a/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateCleanupManager.java b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateCleanupManager.java new file mode 100644 index 0000000000000..2fca239b10efd --- /dev/null +++ b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateCleanupManager.java @@ -0,0 +1,413 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.gateway.remote; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.logging.log4j.message.ParameterizedMessage; +import org.apache.logging.log4j.util.Strings; +import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.service.ClusterApplierService; +import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.blobstore.BlobMetadata; +import org.opensearch.common.blobstore.BlobPath; +import org.opensearch.common.settings.ClusterSettings; +import org.opensearch.common.settings.Setting; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.common.util.concurrent.AbstractAsyncTask; +import org.opensearch.core.action.ActionListener; +import org.opensearch.index.translog.transfer.BlobStoreTransferService; +import org.opensearch.threadpool.ThreadPool; + +import java.io.Closeable; +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Locale; +import java.util.Set; +import java.util.concurrent.atomic.AtomicBoolean; + +import static org.opensearch.gateway.remote.RemoteClusterStateService.GLOBAL_METADATA_FORMAT; +import static org.opensearch.gateway.remote.RemoteClusterStateService.GLOBAL_METADATA_PATH_TOKEN; +import static org.opensearch.gateway.remote.RemoteClusterStateService.INDEX_METADATA_FORMAT; +import static org.opensearch.gateway.remote.RemoteClusterStateService.INDEX_PATH_TOKEN; +import static org.opensearch.gateway.remote.RemoteClusterStateService.MANIFEST_FILE_PREFIX; +import static org.opensearch.gateway.remote.RemoteClusterStateService.MANIFEST_PATH_TOKEN; + +/** + * A Manager which provides APIs to clean up stale cluster state files and runs an async stale cleanup task + * + * @opensearch.internal + */ +public class RemoteClusterStateCleanupManager implements Closeable { + + public static final int RETAINED_MANIFESTS = 10; + public static final int SKIP_CLEANUP_STATE_CHANGES = 10; + public static final TimeValue CLUSTER_STATE_CLEANUP_INTERVAL_DEFAULT = TimeValue.timeValueMinutes(5); + public static final TimeValue CLUSTER_STATE_CLEANUP_INTERVAL_MINIMUM = TimeValue.MINUS_ONE; + + /** + * Setting to specify the interval to do run stale file cleanup job + * Min value -1 indicates that the stale file cleanup job should be disabled + */ + public static final Setting REMOTE_CLUSTER_STATE_CLEANUP_INTERVAL_SETTING = Setting.timeSetting( + "cluster.remote_store.state.cleanup_interval", + CLUSTER_STATE_CLEANUP_INTERVAL_DEFAULT, + CLUSTER_STATE_CLEANUP_INTERVAL_MINIMUM, + Setting.Property.NodeScope, + Setting.Property.Dynamic + ); + private static final Logger logger = LogManager.getLogger(RemoteClusterStateCleanupManager.class); + private final RemoteClusterStateService remoteClusterStateService; + private final RemotePersistenceStats remoteStateStats; + private BlobStoreTransferService blobStoreTransferService; + private TimeValue staleFileCleanupInterval; + private final AtomicBoolean deleteStaleMetadataRunning = new AtomicBoolean(false); + private volatile AsyncStaleFileDeletion staleFileDeletionTask; + private long lastCleanupAttemptStateVersion; + private final ThreadPool threadpool; + private final ClusterApplierService clusterApplierService; + + public RemoteClusterStateCleanupManager(RemoteClusterStateService remoteClusterStateService, ClusterService clusterService) { + this.remoteClusterStateService = remoteClusterStateService; + this.remoteStateStats = remoteClusterStateService.getStats(); + ClusterSettings clusterSettings = clusterService.getClusterSettings(); + this.clusterApplierService = clusterService.getClusterApplierService(); + this.staleFileCleanupInterval = clusterSettings.get(REMOTE_CLUSTER_STATE_CLEANUP_INTERVAL_SETTING); + this.threadpool = remoteClusterStateService.getThreadpool(); + // initialize with 0, a cleanup will be done when this node is elected master node and version is incremented more than threshold + this.lastCleanupAttemptStateVersion = 0; + clusterSettings.addSettingsUpdateConsumer(REMOTE_CLUSTER_STATE_CLEANUP_INTERVAL_SETTING, this::updateCleanupInterval); + } + + void start() { + staleFileDeletionTask = new AsyncStaleFileDeletion(this); + } + + @Override + public void close() throws IOException { + if (staleFileDeletionTask != null) { + staleFileDeletionTask.close(); + } + } + + private BlobStoreTransferService getBlobStoreTransferService() { + if (blobStoreTransferService == null) { + blobStoreTransferService = new BlobStoreTransferService(remoteClusterStateService.getBlobStore(), threadpool); + } + return blobStoreTransferService; + } + + private void updateCleanupInterval(TimeValue updatedInterval) { + this.staleFileCleanupInterval = updatedInterval; + logger.info("updated remote state cleanup interval to {}", updatedInterval); + // After updating the interval, we need to close the current task and create a new one which will run with updated interval + if (staleFileDeletionTask != null && !staleFileDeletionTask.getInterval().equals(updatedInterval)) { + staleFileDeletionTask.setInterval(updatedInterval); + } + } + + // visible for testing + void cleanUpStaleFiles() { + ClusterState currentAppliedState = clusterApplierService.state(); + if (currentAppliedState.nodes().isLocalNodeElectedClusterManager()) { + long cleanUpAttemptStateVersion = currentAppliedState.version(); + assert Strings.isNotEmpty(currentAppliedState.getClusterName().value()) : "cluster name is not set"; + assert Strings.isNotEmpty(currentAppliedState.metadata().clusterUUID()) : "cluster uuid is not set"; + if (cleanUpAttemptStateVersion - lastCleanupAttemptStateVersion > SKIP_CLEANUP_STATE_CHANGES) { + logger.info( + "Cleaning up stale remote state files for cluster [{}] with uuid [{}]. Last clean was done before {} updates", + currentAppliedState.getClusterName().value(), + currentAppliedState.metadata().clusterUUID(), + cleanUpAttemptStateVersion - lastCleanupAttemptStateVersion + ); + this.deleteStaleClusterMetadata( + currentAppliedState.getClusterName().value(), + currentAppliedState.metadata().clusterUUID(), + RETAINED_MANIFESTS + ); + lastCleanupAttemptStateVersion = cleanUpAttemptStateVersion; + } else { + logger.debug( + "Skipping cleanup of stale remote state files for cluster [{}] with uuid [{}]. Last clean was done before {} updates, which is less than threshold {}", + currentAppliedState.getClusterName().value(), + currentAppliedState.metadata().clusterUUID(), + cleanUpAttemptStateVersion - lastCleanupAttemptStateVersion, + SKIP_CLEANUP_STATE_CHANGES + ); + } + } else { + logger.debug("Skipping cleanup task as local node is not elected Cluster Manager"); + } + } + + private void addStaleGlobalMetadataPath(String fileName, Set filesToKeep, Set staleGlobalMetadataPaths) { + if (!filesToKeep.contains(fileName)) { + String[] splitPath = fileName.split("/"); + staleGlobalMetadataPaths.add( + new BlobPath().add(GLOBAL_METADATA_PATH_TOKEN).buildAsString() + GLOBAL_METADATA_FORMAT.blobName( + splitPath[splitPath.length - 1] + ) + ); + } + } + + // visible for testing + void deleteClusterMetadata( + String clusterName, + String clusterUUID, + List activeManifestBlobMetadata, + List staleManifestBlobMetadata + ) { + try { + Set filesToKeep = new HashSet<>(); + Set staleManifestPaths = new HashSet<>(); + Set staleIndexMetadataPaths = new HashSet<>(); + Set staleGlobalMetadataPaths = new HashSet<>(); + activeManifestBlobMetadata.forEach(blobMetadata -> { + ClusterMetadataManifest clusterMetadataManifest = remoteClusterStateService.fetchRemoteClusterMetadataManifest( + clusterName, + clusterUUID, + blobMetadata.name() + ); + clusterMetadataManifest.getIndices() + .forEach(uploadedIndexMetadata -> filesToKeep.add(uploadedIndexMetadata.getUploadedFilename())); + if (clusterMetadataManifest.getCodecVersion() == ClusterMetadataManifest.CODEC_V1) { + filesToKeep.add(clusterMetadataManifest.getGlobalMetadataFileName()); + } else if (clusterMetadataManifest.getCodecVersion() >= ClusterMetadataManifest.CODEC_V2) { + filesToKeep.add(clusterMetadataManifest.getCoordinationMetadata().getUploadedFilename()); + filesToKeep.add(clusterMetadataManifest.getSettingsMetadata().getUploadedFilename()); + filesToKeep.add(clusterMetadataManifest.getTemplatesMetadata().getUploadedFilename()); + clusterMetadataManifest.getCustomMetadataMap() + .values() + .forEach(attribute -> filesToKeep.add(attribute.getUploadedFilename())); + } + }); + staleManifestBlobMetadata.forEach(blobMetadata -> { + ClusterMetadataManifest clusterMetadataManifest = remoteClusterStateService.fetchRemoteClusterMetadataManifest( + clusterName, + clusterUUID, + blobMetadata.name() + ); + staleManifestPaths.add(new BlobPath().add(MANIFEST_PATH_TOKEN).buildAsString() + blobMetadata.name()); + if (clusterMetadataManifest.getCodecVersion() == ClusterMetadataManifest.CODEC_V1) { + addStaleGlobalMetadataPath(clusterMetadataManifest.getGlobalMetadataFileName(), filesToKeep, staleGlobalMetadataPaths); + } else if (clusterMetadataManifest.getCodecVersion() >= ClusterMetadataManifest.CODEC_V2) { + addStaleGlobalMetadataPath( + clusterMetadataManifest.getCoordinationMetadata().getUploadedFilename(), + filesToKeep, + staleGlobalMetadataPaths + ); + addStaleGlobalMetadataPath( + clusterMetadataManifest.getSettingsMetadata().getUploadedFilename(), + filesToKeep, + staleGlobalMetadataPaths + ); + addStaleGlobalMetadataPath( + clusterMetadataManifest.getTemplatesMetadata().getUploadedFilename(), + filesToKeep, + staleGlobalMetadataPaths + ); + clusterMetadataManifest.getCustomMetadataMap() + .values() + .forEach( + attribute -> addStaleGlobalMetadataPath(attribute.getUploadedFilename(), filesToKeep, staleGlobalMetadataPaths) + ); + } + + clusterMetadataManifest.getIndices().forEach(uploadedIndexMetadata -> { + if (filesToKeep.contains(uploadedIndexMetadata.getUploadedFilename()) == false) { + staleIndexMetadataPaths.add( + new BlobPath().add(INDEX_PATH_TOKEN).add(uploadedIndexMetadata.getIndexUUID()).buildAsString() + + INDEX_METADATA_FORMAT.blobName(uploadedIndexMetadata.getUploadedFilename()) + ); + } + }); + }); + + if (staleManifestPaths.isEmpty()) { + logger.debug("No stale Remote Cluster Metadata files found"); + return; + } + + deleteStalePaths(clusterName, clusterUUID, new ArrayList<>(staleGlobalMetadataPaths)); + deleteStalePaths(clusterName, clusterUUID, new ArrayList<>(staleIndexMetadataPaths)); + deleteStalePaths(clusterName, clusterUUID, new ArrayList<>(staleManifestPaths)); + } catch (IllegalStateException e) { + logger.error("Error while fetching Remote Cluster Metadata manifests", e); + } catch (IOException e) { + logger.error("Error while deleting stale Remote Cluster Metadata files", e); + remoteStateStats.cleanUpAttemptFailed(); + } catch (Exception e) { + logger.error("Unexpected error while deleting stale Remote Cluster Metadata files", e); + remoteStateStats.cleanUpAttemptFailed(); + } + } + + /** + * Deletes older than last {@code versionsToRetain} manifests. Also cleans up unreferenced IndexMetadata associated with older manifests + * + * @param clusterName name of the cluster + * @param clusterUUID uuid of cluster state to refer to in remote + * @param manifestsToRetain no of latest manifest files to keep in remote + */ + // package private for testing + void deleteStaleClusterMetadata(String clusterName, String clusterUUID, int manifestsToRetain) { + if (deleteStaleMetadataRunning.compareAndSet(false, true) == false) { + logger.info("Delete stale cluster metadata task is already in progress."); + return; + } + try { + getBlobStoreTransferService().listAllInSortedOrderAsync( + ThreadPool.Names.REMOTE_PURGE, + remoteClusterStateService.getManifestFolderPath(clusterName, clusterUUID), + MANIFEST_FILE_PREFIX, + Integer.MAX_VALUE, + new ActionListener<>() { + @Override + public void onResponse(List blobMetadata) { + if (blobMetadata.size() > manifestsToRetain) { + deleteClusterMetadata( + clusterName, + clusterUUID, + blobMetadata.subList(0, manifestsToRetain), + blobMetadata.subList(manifestsToRetain, blobMetadata.size()) + ); + } + deleteStaleMetadataRunning.set(false); + } + + @Override + public void onFailure(Exception e) { + logger.error( + new ParameterizedMessage( + "Exception occurred while deleting Remote Cluster Metadata for clusterUUIDs {}", + clusterUUID + ) + ); + deleteStaleMetadataRunning.set(false); + } + } + ); + } catch (Exception e) { + deleteStaleMetadataRunning.set(false); + throw e; + } + } + + /** + * Purges all remote cluster state against provided cluster UUIDs + * + * @param clusterName name of the cluster + * @param clusterUUIDs clusteUUIDs for which the remote state needs to be purged + */ + void deleteStaleUUIDsClusterMetadata(String clusterName, List clusterUUIDs) { + clusterUUIDs.forEach( + clusterUUID -> getBlobStoreTransferService().deleteAsync( + ThreadPool.Names.REMOTE_PURGE, + remoteClusterStateService.getCusterMetadataBasePath(clusterName, clusterUUID), + new ActionListener<>() { + @Override + public void onResponse(Void unused) { + logger.info("Deleted all remote cluster metadata for cluster UUID - {}", clusterUUID); + } + + @Override + public void onFailure(Exception e) { + logger.error( + new ParameterizedMessage( + "Exception occurred while deleting all remote cluster metadata for cluster UUID {}", + clusterUUID + ), + e + ); + remoteStateStats.cleanUpAttemptFailed(); + } + } + ) + ); + } + + // package private for testing + void deleteStalePaths(String clusterName, String clusterUUID, List stalePaths) throws IOException { + logger.debug(String.format(Locale.ROOT, "Deleting stale files from remote - %s", stalePaths)); + getBlobStoreTransferService().deleteBlobs( + remoteClusterStateService.getCusterMetadataBasePath(clusterName, clusterUUID), + stalePaths + ); + } + + /** + * Purges all remote cluster state against provided cluster UUIDs + * @param clusterState current state of the cluster + * @param committedManifest last committed ClusterMetadataManifest + */ + public void deleteStaleClusterUUIDs(ClusterState clusterState, ClusterMetadataManifest committedManifest) { + threadpool.executor(ThreadPool.Names.REMOTE_PURGE).execute(() -> { + String clusterName = clusterState.getClusterName().value(); + logger.debug("Deleting stale cluster UUIDs data from remote [{}]", clusterName); + Set allClustersUUIDsInRemote; + try { + allClustersUUIDsInRemote = new HashSet<>( + remoteClusterStateService.getAllClusterUUIDs(clusterState.getClusterName().value()) + ); + } catch (IOException e) { + logger.info(String.format(Locale.ROOT, "Error while fetching all cluster UUIDs for [%s]", clusterName)); + return; + } + // Retain last 2 cluster uuids data + allClustersUUIDsInRemote.remove(committedManifest.getClusterUUID()); + allClustersUUIDsInRemote.remove(committedManifest.getPreviousClusterUUID()); + deleteStaleUUIDsClusterMetadata(clusterName, new ArrayList<>(allClustersUUIDsInRemote)); + }); + } + + public TimeValue getStaleFileCleanupInterval() { + return this.staleFileCleanupInterval; + } + + AsyncStaleFileDeletion getStaleFileDeletionTask() { // for testing + return this.staleFileDeletionTask; + } + + RemotePersistenceStats getStats() { + return this.remoteStateStats; + } + + static final class AsyncStaleFileDeletion extends AbstractAsyncTask { + private final RemoteClusterStateCleanupManager remoteClusterStateCleanupManager; + + AsyncStaleFileDeletion(RemoteClusterStateCleanupManager remoteClusterStateCleanupManager) { + super( + logger, + remoteClusterStateCleanupManager.threadpool, + remoteClusterStateCleanupManager.getStaleFileCleanupInterval(), + true + ); + this.remoteClusterStateCleanupManager = remoteClusterStateCleanupManager; + rescheduleIfNecessary(); + } + + @Override + protected boolean mustReschedule() { + return true; + } + + @Override + protected void runInternal() { + remoteClusterStateCleanupManager.cleanUpStaleFiles(); + } + + @Override + protected String getThreadPool() { + return ThreadPool.Names.REMOTE_PURGE; + } + } +} diff --git a/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java index 0f862d1b68820..d0593dcd51475 100644 --- a/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java +++ b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java @@ -18,11 +18,14 @@ import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.cluster.metadata.Metadata; import org.opensearch.cluster.metadata.TemplatesMetadata; +import org.opensearch.cluster.routing.remote.RemoteRoutingTableService; +import org.opensearch.cluster.service.ClusterService; import org.opensearch.common.CheckedRunnable; import org.opensearch.common.Nullable; import org.opensearch.common.blobstore.BlobContainer; import org.opensearch.common.blobstore.BlobMetadata; import org.opensearch.common.blobstore.BlobPath; +import org.opensearch.common.blobstore.BlobStore; import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Setting; import org.opensearch.common.settings.Setting.Property; @@ -59,7 +62,6 @@ import java.util.Set; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicReference; import java.util.function.Function; import java.util.function.LongSupplier; @@ -68,6 +70,7 @@ import static java.util.Objects.requireNonNull; import static org.opensearch.gateway.PersistedClusterStateService.SLOW_WRITE_LOGGING_THRESHOLD; +import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.isRemoteRoutingTableEnabled; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.isRemoteStoreClusterStateEnabled; /** @@ -81,8 +84,6 @@ public class RemoteClusterStateService implements Closeable { public static final String METADATA_MANIFEST_NAME_FORMAT = "%s"; - public static final int RETAINED_MANIFESTS = 10; - public static final String DELIMITER = "__"; public static final String CUSTOM_DELIMITER = "--"; @@ -202,20 +203,20 @@ public class RemoteClusterStateService implements Closeable { private final List indexMetadataUploadListeners; private BlobStoreRepository blobStoreRepository; private BlobStoreTransferService blobStoreTransferService; + private Optional remoteRoutingTableService; private volatile TimeValue slowWriteLoggingThreshold; private volatile TimeValue indexMetadataUploadTimeout; private volatile TimeValue globalMetadataUploadTimeout; private volatile TimeValue metadataManifestUploadTimeout; - - private final AtomicBoolean deleteStaleMetadataRunning = new AtomicBoolean(false); + private RemoteClusterStateCleanupManager remoteClusterStateCleanupManager; private final RemotePersistenceStats remoteStateStats; private final String CLUSTER_STATE_UPLOAD_TIME_LOG_STRING = "writing cluster state for version [{}] took [{}ms]"; private final String METADATA_UPDATE_LOG_STRING = "wrote metadata for [{}] indices and skipped [{}] unchanged " + "indices, coordination metadata updated : [{}], settings metadata updated : [{}], templates metadata " + "updated : [{}], custom metadata updated : [{}]"; public static final int INDEX_METADATA_CURRENT_CODEC_VERSION = 1; - public static final int MANIFEST_CURRENT_CODEC_VERSION = ClusterMetadataManifest.CODEC_V2; + public static final int MANIFEST_CURRENT_CODEC_VERSION = ClusterMetadataManifest.CODEC_V3; public static final int GLOBAL_METADATA_CURRENT_CODEC_VERSION = 2; // ToXContent Params with gateway mode. @@ -232,7 +233,7 @@ public RemoteClusterStateService( String nodeId, Supplier repositoriesService, Settings settings, - ClusterSettings clusterSettings, + ClusterService clusterService, LongSupplier relativeTimeNanosSupplier, ThreadPool threadPool, List indexMetadataUploadListeners @@ -243,6 +244,7 @@ public RemoteClusterStateService( this.settings = settings; this.relativeTimeNanosSupplier = relativeTimeNanosSupplier; this.threadpool = threadPool; + ClusterSettings clusterSettings = clusterService.getClusterSettings(); this.slowWriteLoggingThreshold = clusterSettings.get(SLOW_WRITE_LOGGING_THRESHOLD); this.indexMetadataUploadTimeout = clusterSettings.get(INDEX_METADATA_UPLOAD_TIMEOUT_SETTING); this.globalMetadataUploadTimeout = clusterSettings.get(GLOBAL_METADATA_UPLOAD_TIMEOUT_SETTING); @@ -252,14 +254,11 @@ public RemoteClusterStateService( clusterSettings.addSettingsUpdateConsumer(GLOBAL_METADATA_UPLOAD_TIMEOUT_SETTING, this::setGlobalMetadataUploadTimeout); clusterSettings.addSettingsUpdateConsumer(METADATA_MANIFEST_UPLOAD_TIMEOUT_SETTING, this::setMetadataManifestUploadTimeout); this.remoteStateStats = new RemotePersistenceStats(); + this.remoteClusterStateCleanupManager = new RemoteClusterStateCleanupManager(this, clusterService); this.indexMetadataUploadListeners = indexMetadataUploadListeners; - } - - private BlobStoreTransferService getBlobStoreTransferService() { - if (blobStoreTransferService == null) { - blobStoreTransferService = new BlobStoreTransferService(blobStoreRepository.blobStore(), threadpool); - } - return blobStoreTransferService; + this.remoteRoutingTableService = isRemoteRoutingTableEnabled(settings) + ? Optional.of(new RemoteRoutingTableService(repositoriesService, settings)) + : Optional.empty(); } /** @@ -417,7 +416,6 @@ public ClusterMetadataManifest writeIncrementalMetadata( : previousManifest.getCustomMetadataMap(), false ); - deleteStaleClusterMetadata(clusterState.getClusterName().value(), clusterState.metadata().clusterUUID(), RETAINED_MANIFESTS); final long durationMillis = TimeValue.nsecToMSec(relativeTimeNanosSupplier.getAsLong() - startTimeNanos); remoteStateStats.stateSucceeded(); @@ -721,6 +719,10 @@ private CheckedRunnable getAsyncMetadataWriteAction( ); } + public RemoteClusterStateCleanupManager getCleanupManager() { + return remoteClusterStateCleanupManager; + } + @Nullable public ClusterMetadataManifest markLastStateAsCommitted(ClusterState clusterState, ClusterMetadataManifest previousManifest) throws IOException { @@ -740,15 +742,22 @@ public ClusterMetadataManifest markLastStateAsCommitted(ClusterState clusterStat previousManifest.getCustomMetadataMap(), true ); - deleteStaleClusterUUIDs(clusterState, committedManifest); + if (!previousManifest.isClusterUUIDCommitted() && committedManifest.isClusterUUIDCommitted()) { + remoteClusterStateCleanupManager.deleteStaleClusterUUIDs(clusterState, committedManifest); + } + return committedManifest; } @Override public void close() throws IOException { + remoteClusterStateCleanupManager.close(); if (blobStoreRepository != null) { IOUtils.close(blobStoreRepository); } + if (this.remoteRoutingTableService.isPresent()) { + this.remoteRoutingTableService.get().close(); + } } public void start() { @@ -760,6 +769,8 @@ public void start() { final Repository repository = repositoriesService.get().repository(remoteStoreRepo); assert repository instanceof BlobStoreRepository : "Repository should be instance of BlobStoreRepository"; blobStoreRepository = (BlobStoreRepository) repository; + remoteClusterStateCleanupManager.start(); + this.remoteRoutingTableService.ifPresent(RemoteRoutingTableService::start); } private ClusterMetadataManifest uploadManifest( @@ -795,7 +806,10 @@ private ClusterMetadataManifest uploadManifest( uploadedCoordinationMetadata, uploadedSettingsMetadata, uploadedTemplatesMetadata, - uploadedCustomMetadataMap + uploadedCustomMetadataMap, + clusterState.routingTable().version(), + // TODO: Add actual list of changed indices routing with index routing upload flow. + new ArrayList<>() ); writeMetadataManifest(clusterState.getClusterName().value(), clusterState.metadata().clusterUUID(), manifest, manifestFileName); return manifest; @@ -850,6 +864,14 @@ private void writeMetadataManifest(String clusterName, String clusterUUID, Clust ); } + ThreadPool getThreadpool() { + return threadpool; + } + + BlobStore getBlobStore() { + return blobStoreRepository.blobStore(); + } + private BlobContainer indexMetadataContainer(String clusterName, String clusterUUID, String indexUUID) { // 123456789012_test-cluster/cluster-state/dsgYj10Nkso7/index/ftqsCnn9TgOX return blobStoreRepository.blobStore() @@ -867,7 +889,7 @@ private BlobContainer manifestContainer(String clusterName, String clusterUUID) return blobStoreRepository.blobStore().blobContainer(getManifestFolderPath(clusterName, clusterUUID)); } - private BlobPath getCusterMetadataBasePath(String clusterName, String clusterUUID) { + BlobPath getCusterMetadataBasePath(String clusterName, String clusterUUID) { return blobStoreRepository.basePath().add(encodeString(clusterName)).add(CLUSTER_STATE_PATH_TOKEN).add(clusterUUID); } @@ -933,6 +955,11 @@ public TimeValue getMetadataManifestUploadTimeout() { return this.metadataManifestUploadTimeout; } + // Package private for unit test + Optional getRemoteRoutingTableService() { + return this.remoteRoutingTableService; + } + static String getManifestFileName(long term, long version, boolean committed, int codecVersion) { // 123456789012_test-cluster/cluster-state/dsgYj10Nkso7/manifest/manifest______C/P____ return String.join( @@ -982,7 +1009,7 @@ private static String metadataAttributeFileName(String componentPrefix, Long met ); } - private BlobPath getManifestFolderPath(String clusterName, String clusterUUID) { + BlobPath getManifestFolderPath(String clusterName, String clusterUUID) { return getCusterMetadataBasePath(clusterName, clusterUUID).add(MANIFEST_PATH_TOKEN); } @@ -1235,7 +1262,7 @@ public String getLastKnownUUIDFromRemote(String clusterName) { } } - private Set getAllClusterUUIDs(String clusterName) throws IOException { + Set getAllClusterUUIDs(String clusterName) throws IOException { Map clusterUUIDMetadata = clusterUUIDContainer(clusterName).children(); if (clusterUUIDMetadata == null) { return Collections.emptySet(); @@ -1426,7 +1453,7 @@ private Optional getLatestManifestFileName(String clusterName, String cl * @param clusterName name of the cluster * @return ClusterMetadataManifest */ - private ClusterMetadataManifest fetchRemoteClusterMetadataManifest(String clusterName, String clusterUUID, String filename) + ClusterMetadataManifest fetchRemoteClusterMetadataManifest(String clusterName, String clusterUUID, String filename) throws IllegalStateException { try { return getClusterMetadataManifestBlobStoreFormat(filename).read( @@ -1486,234 +1513,6 @@ public RemoteStateTransferException(String errorDesc, Throwable cause) { } } - /** - * Purges all remote cluster state against provided cluster UUIDs - * - * @param clusterName name of the cluster - * @param clusterUUIDs clusteUUIDs for which the remote state needs to be purged - */ - void deleteStaleUUIDsClusterMetadata(String clusterName, List clusterUUIDs) { - clusterUUIDs.forEach(clusterUUID -> { - getBlobStoreTransferService().deleteAsync( - ThreadPool.Names.REMOTE_PURGE, - getCusterMetadataBasePath(clusterName, clusterUUID), - new ActionListener<>() { - @Override - public void onResponse(Void unused) { - logger.info("Deleted all remote cluster metadata for cluster UUID - {}", clusterUUID); - } - - @Override - public void onFailure(Exception e) { - logger.error( - new ParameterizedMessage( - "Exception occurred while deleting all remote cluster metadata for cluster UUID {}", - clusterUUID - ), - e - ); - remoteStateStats.cleanUpAttemptFailed(); - } - } - ); - }); - } - - /** - * Deletes older than last {@code versionsToRetain} manifests. Also cleans up unreferenced IndexMetadata associated with older manifests - * - * @param clusterName name of the cluster - * @param clusterUUID uuid of cluster state to refer to in remote - * @param manifestsToRetain no of latest manifest files to keep in remote - */ - // package private for testing - void deleteStaleClusterMetadata(String clusterName, String clusterUUID, int manifestsToRetain) { - if (deleteStaleMetadataRunning.compareAndSet(false, true) == false) { - logger.info("Delete stale cluster metadata task is already in progress."); - return; - } - try { - getBlobStoreTransferService().listAllInSortedOrderAsync( - ThreadPool.Names.REMOTE_PURGE, - getManifestFolderPath(clusterName, clusterUUID), - "manifest", - Integer.MAX_VALUE, - new ActionListener<>() { - @Override - public void onResponse(List blobMetadata) { - if (blobMetadata.size() > manifestsToRetain) { - deleteClusterMetadata( - clusterName, - clusterUUID, - blobMetadata.subList(0, manifestsToRetain - 1), - blobMetadata.subList(manifestsToRetain - 1, blobMetadata.size()) - ); - } - deleteStaleMetadataRunning.set(false); - } - - @Override - public void onFailure(Exception e) { - logger.error( - new ParameterizedMessage( - "Exception occurred while deleting Remote Cluster Metadata for clusterUUIDs {}", - clusterUUID - ) - ); - deleteStaleMetadataRunning.set(false); - } - } - ); - } catch (Exception e) { - deleteStaleMetadataRunning.set(false); - throw e; - } - } - - private void deleteClusterMetadata( - String clusterName, - String clusterUUID, - List activeManifestBlobMetadata, - List staleManifestBlobMetadata - ) { - try { - Set filesToKeep = new HashSet<>(); - Set staleManifestPaths = new HashSet<>(); - Set staleIndexMetadataPaths = new HashSet<>(); - Set staleGlobalMetadataPaths = new HashSet<>(); - activeManifestBlobMetadata.forEach(blobMetadata -> { - ClusterMetadataManifest clusterMetadataManifest = fetchRemoteClusterMetadataManifest( - clusterName, - clusterUUID, - blobMetadata.name() - ); - clusterMetadataManifest.getIndices() - .forEach(uploadedIndexMetadata -> filesToKeep.add(uploadedIndexMetadata.getUploadedFilename())); - if (clusterMetadataManifest.getGlobalMetadataFileName() != null) { - filesToKeep.add(clusterMetadataManifest.getGlobalMetadataFileName()); - } else { - filesToKeep.add(clusterMetadataManifest.getCoordinationMetadata().getUploadedFilename()); - filesToKeep.add(clusterMetadataManifest.getTemplatesMetadata().getUploadedFilename()); - filesToKeep.add(clusterMetadataManifest.getSettingsMetadata().getUploadedFilename()); - clusterMetadataManifest.getCustomMetadataMap() - .forEach((key, value) -> { filesToKeep.add(value.getUploadedFilename()); }); - } - }); - staleManifestBlobMetadata.forEach(blobMetadata -> { - ClusterMetadataManifest clusterMetadataManifest = fetchRemoteClusterMetadataManifest( - clusterName, - clusterUUID, - blobMetadata.name() - ); - staleManifestPaths.add(new BlobPath().add(MANIFEST_PATH_TOKEN).buildAsString() + blobMetadata.name()); - if (clusterMetadataManifest.getGlobalMetadataFileName() != null) { - if (filesToKeep.contains(clusterMetadataManifest.getGlobalMetadataFileName()) == false) { - String[] globalMetadataSplitPath = clusterMetadataManifest.getGlobalMetadataFileName().split("/"); - staleGlobalMetadataPaths.add( - new BlobPath().add(GLOBAL_METADATA_PATH_TOKEN).buildAsString() + GLOBAL_METADATA_FORMAT.blobName( - globalMetadataSplitPath[globalMetadataSplitPath.length - 1] - ) - ); - } - } else { - if (filesToKeep.contains(clusterMetadataManifest.getCoordinationMetadata().getUploadedFilename()) == false) { - String[] coordinationMetadataSplitPath = clusterMetadataManifest.getCoordinationMetadata() - .getUploadedFilename() - .split("/"); - staleGlobalMetadataPaths.add( - new BlobPath().add(GLOBAL_METADATA_PATH_TOKEN).buildAsString() + GLOBAL_METADATA_FORMAT.blobName( - coordinationMetadataSplitPath[coordinationMetadataSplitPath.length - 1] - ) - ); - } - if (filesToKeep.contains(clusterMetadataManifest.getTemplatesMetadata().getUploadedFilename()) == false) { - String[] templatesMetadataSplitPath = clusterMetadataManifest.getTemplatesMetadata() - .getUploadedFilename() - .split("/"); - staleGlobalMetadataPaths.add( - new BlobPath().add(GLOBAL_METADATA_PATH_TOKEN).buildAsString() + GLOBAL_METADATA_FORMAT.blobName( - templatesMetadataSplitPath[templatesMetadataSplitPath.length - 1] - ) - ); - } - if (filesToKeep.contains(clusterMetadataManifest.getSettingsMetadata().getUploadedFilename()) == false) { - String[] settingsMetadataSplitPath = clusterMetadataManifest.getSettingsMetadata().getUploadedFilename().split("/"); - staleGlobalMetadataPaths.add( - new BlobPath().add(GLOBAL_METADATA_PATH_TOKEN).buildAsString() + GLOBAL_METADATA_FORMAT.blobName( - settingsMetadataSplitPath[settingsMetadataSplitPath.length - 1] - ) - ); - } - clusterMetadataManifest.getCustomMetadataMap().forEach((key, value) -> { - if (filesToKeep.contains(value.getUploadedFilename()) == false) { - String[] customMetadataSplitPath = value.getUploadedFilename().split("/"); - staleGlobalMetadataPaths.add( - new BlobPath().add(GLOBAL_METADATA_PATH_TOKEN).buildAsString() + GLOBAL_METADATA_FORMAT.blobName( - customMetadataSplitPath[customMetadataSplitPath.length - 1] - ) - ); - } - }); - } - - clusterMetadataManifest.getIndices().forEach(uploadedIndexMetadata -> { - if (filesToKeep.contains(uploadedIndexMetadata.getUploadedFilename()) == false) { - staleIndexMetadataPaths.add( - new BlobPath().add(INDEX_PATH_TOKEN).add(uploadedIndexMetadata.getIndexUUID()).buildAsString() - + INDEX_METADATA_FORMAT.blobName(uploadedIndexMetadata.getUploadedFilename()) - ); - } - }); - }); - - if (staleManifestPaths.isEmpty()) { - logger.debug("No stale Remote Cluster Metadata files found"); - return; - } - - deleteStalePaths(clusterName, clusterUUID, new ArrayList<>(staleGlobalMetadataPaths)); - deleteStalePaths(clusterName, clusterUUID, new ArrayList<>(staleIndexMetadataPaths)); - deleteStalePaths(clusterName, clusterUUID, new ArrayList<>(staleManifestPaths)); - } catch (IllegalStateException e) { - logger.error("Error while fetching Remote Cluster Metadata manifests", e); - } catch (IOException e) { - logger.error("Error while deleting stale Remote Cluster Metadata files", e); - remoteStateStats.cleanUpAttemptFailed(); - } catch (Exception e) { - logger.error("Unexpected error while deleting stale Remote Cluster Metadata files", e); - remoteStateStats.cleanUpAttemptFailed(); - } - } - - private void deleteStalePaths(String clusterName, String clusterUUID, List stalePaths) throws IOException { - logger.debug(String.format(Locale.ROOT, "Deleting stale files from remote - %s", stalePaths)); - getBlobStoreTransferService().deleteBlobs(getCusterMetadataBasePath(clusterName, clusterUUID), stalePaths); - } - - /** - * Purges all remote cluster state against provided cluster UUIDs - * - * @param clusterState current state of the cluster - * @param committedManifest last committed ClusterMetadataManifest - */ - public void deleteStaleClusterUUIDs(ClusterState clusterState, ClusterMetadataManifest committedManifest) { - threadpool.executor(ThreadPool.Names.REMOTE_PURGE).execute(() -> { - String clusterName = clusterState.getClusterName().value(); - logger.debug("Deleting stale cluster UUIDs data from remote [{}]", clusterName); - Set allClustersUUIDsInRemote; - try { - allClustersUUIDsInRemote = new HashSet<>(getAllClusterUUIDs(clusterState.getClusterName().value())); - } catch (IOException e) { - logger.info(String.format(Locale.ROOT, "Error while fetching all cluster UUIDs for [%s]", clusterName)); - return; - } - // Retain last 2 cluster uuids data - allClustersUUIDsInRemote.remove(committedManifest.getClusterUUID()); - allClustersUUIDsInRemote.remove(committedManifest.getPreviousClusterUUID()); - deleteStaleUUIDsClusterMetadata(clusterName, new ArrayList<>(allClustersUUIDsInRemote)); - }); - } - public RemotePersistenceStats getStats() { return remoteStateStats; } diff --git a/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateUtils.java b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateUtils.java new file mode 100644 index 0000000000000..500d1af0211e8 --- /dev/null +++ b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateUtils.java @@ -0,0 +1,23 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.gateway.remote; + +import java.nio.charset.StandardCharsets; +import java.util.Base64; + +/** + * Utility class for Remote Cluster State + */ +public class RemoteClusterStateUtils { + public static final String PATH_DELIMITER = "/"; + + public static String encodeString(String content) { + return Base64.getUrlEncoder().withoutPadding().encodeToString(content.getBytes(StandardCharsets.UTF_8)); + } +} diff --git a/server/src/main/java/org/opensearch/gateway/remote/model/RemoteClusterStateBlobStore.java b/server/src/main/java/org/opensearch/gateway/remote/model/RemoteClusterStateBlobStore.java new file mode 100644 index 0000000000000..1aeecc4e70382 --- /dev/null +++ b/server/src/main/java/org/opensearch/gateway/remote/model/RemoteClusterStateBlobStore.java @@ -0,0 +1,107 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.gateway.remote.model; + +import org.opensearch.common.blobstore.BlobPath; +import org.opensearch.common.remote.AbstractRemoteWritableBlobEntity; +import org.opensearch.common.remote.RemoteWritableEntityStore; +import org.opensearch.common.remote.RemoteWriteableEntity; +import org.opensearch.core.action.ActionListener; +import org.opensearch.gateway.remote.RemoteClusterStateUtils; +import org.opensearch.index.translog.transfer.BlobStoreTransferService; +import org.opensearch.repositories.blobstore.BlobStoreRepository; +import org.opensearch.threadpool.ThreadPool; + +import java.io.IOException; +import java.io.InputStream; +import java.util.concurrent.ExecutorService; + +/** + * Abstract class for a blob type storage + * + * @param The entity which can be uploaded to / downloaded from blob store + * @param The concrete class implementing {@link RemoteWriteableEntity} which is used as a wrapper for T entity. + */ +public class RemoteClusterStateBlobStore> implements RemoteWritableEntityStore { + + private final BlobStoreTransferService transferService; + private final BlobStoreRepository blobStoreRepository; + private final String clusterName; + private final ExecutorService executorService; + + public RemoteClusterStateBlobStore( + final BlobStoreTransferService blobStoreTransferService, + final BlobStoreRepository blobStoreRepository, + final String clusterName, + final ThreadPool threadPool, + final String executor + ) { + this.transferService = blobStoreTransferService; + this.blobStoreRepository = blobStoreRepository; + this.clusterName = clusterName; + this.executorService = threadPool.executor(executor); + } + + @Override + public void writeAsync(final U entity, final ActionListener listener) { + try { + try (InputStream inputStream = entity.serialize()) { + BlobPath blobPath = getBlobPathForUpload(entity); + entity.setFullBlobName(blobPath); + // TODO uncomment below logic after merging PR https://github.com/opensearch-project/OpenSearch/pull/13836 + // transferService.uploadBlob(inputStream, getBlobPathForUpload(entity), entity.getBlobFileName(), WritePriority.URGENT, + // listener); + } + } catch (Exception e) { + listener.onFailure(e); + } + } + + public T read(final U entity) throws IOException { + // TODO Add timing logs and tracing + assert entity.getFullBlobName() != null; + return entity.deserialize(transferService.downloadBlob(getBlobPathForDownload(entity), entity.getBlobFileName())); + } + + @Override + public void readAsync(final U entity, final ActionListener listener) { + executorService.execute(() -> { + try { + listener.onResponse(read(entity)); + } catch (Exception e) { + listener.onFailure(e); + } + }); + } + + private BlobPath getBlobPathForUpload(final AbstractRemoteWritableBlobEntity obj) { + BlobPath blobPath = blobStoreRepository.basePath() + .add(RemoteClusterStateUtils.encodeString(clusterName)) + .add("cluster-state") + .add(obj.clusterUUID()); + for (String token : obj.getBlobPathParameters().getPathTokens()) { + blobPath = blobPath.add(token); + } + return blobPath; + } + + private BlobPath getBlobPathForDownload(final AbstractRemoteWritableBlobEntity obj) { + String[] pathTokens = obj.getBlobPathTokens(); + BlobPath blobPath = new BlobPath(); + if (pathTokens == null || pathTokens.length < 1) { + return blobPath; + } + // Iterate till second last path token to get the blob folder + for (int i = 0; i < pathTokens.length - 1; i++) { + blobPath = blobPath.add(pathTokens[i]); + } + return blobPath; + } + +} diff --git a/server/src/main/java/org/opensearch/gateway/remote/model/package-info.java b/server/src/main/java/org/opensearch/gateway/remote/model/package-info.java new file mode 100644 index 0000000000000..c0d13d15cc885 --- /dev/null +++ b/server/src/main/java/org/opensearch/gateway/remote/model/package-info.java @@ -0,0 +1,12 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/** + * Package containing models for remote cluster state + */ +package org.opensearch.gateway.remote.model; diff --git a/server/src/main/java/org/opensearch/gateway/remote/routingtable/IndexRoutingTableHeader.java b/server/src/main/java/org/opensearch/gateway/remote/routingtable/IndexRoutingTableHeader.java new file mode 100644 index 0000000000000..5baea6adba0c7 --- /dev/null +++ b/server/src/main/java/org/opensearch/gateway/remote/routingtable/IndexRoutingTableHeader.java @@ -0,0 +1,81 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.gateway.remote.routingtable; + +import org.apache.lucene.codecs.CodecUtil; +import org.apache.lucene.index.CorruptIndexException; +import org.apache.lucene.index.IndexFormatTooNewException; +import org.apache.lucene.index.IndexFormatTooOldException; +import org.apache.lucene.store.InputStreamDataInput; +import org.apache.lucene.store.OutputStreamDataOutput; +import org.opensearch.core.common.io.stream.StreamInput; +import org.opensearch.core.common.io.stream.StreamOutput; +import org.opensearch.core.common.io.stream.Writeable; + +import java.io.EOFException; +import java.io.IOException; + +/** + * The stored header information for the individual index routing table + */ +public class IndexRoutingTableHeader implements Writeable { + + public static final String INDEX_ROUTING_HEADER_CODEC = "index_routing_header_codec"; + public static final int INITIAL_VERSION = 1; + public static final int CURRENT_VERSION = INITIAL_VERSION; + private final String indexName; + + public IndexRoutingTableHeader(String indexName) { + this.indexName = indexName; + } + + /** + * Reads the contents on the stream into the corresponding {@link IndexRoutingTableHeader} + * + * @param in streamInput + * @throws IOException exception thrown on failing to read from stream. + */ + public IndexRoutingTableHeader(StreamInput in) throws IOException { + try { + readHeaderVersion(in); + indexName = in.readString(); + } catch (EOFException e) { + throw new IOException("index routing header truncated", e); + } + } + + private void readHeaderVersion(final StreamInput in) throws IOException { + try { + CodecUtil.checkHeader(new InputStreamDataInput(in), INDEX_ROUTING_HEADER_CODEC, INITIAL_VERSION, CURRENT_VERSION); + } catch (CorruptIndexException | IndexFormatTooOldException | IndexFormatTooNewException e) { + throw new IOException("index routing table header corrupted", e); + } + } + + /** + * Write the IndexRoutingTable to given stream. + * + * @param out stream to write + * @throws IOException exception thrown on failing to write to stream. + */ + public void writeTo(StreamOutput out) throws IOException { + try { + CodecUtil.writeHeader(new OutputStreamDataOutput(out), INDEX_ROUTING_HEADER_CODEC, CURRENT_VERSION); + out.writeString(indexName); + out.flush(); + } catch (IOException e) { + throw new IOException("Failed to write IndexRoutingTable header", e); + } + } + + public String getIndexName() { + return indexName; + } + +} diff --git a/server/src/main/java/org/opensearch/gateway/remote/routingtable/RemoteIndexRoutingTable.java b/server/src/main/java/org/opensearch/gateway/remote/routingtable/RemoteIndexRoutingTable.java new file mode 100644 index 0000000000000..17c55190da07f --- /dev/null +++ b/server/src/main/java/org/opensearch/gateway/remote/routingtable/RemoteIndexRoutingTable.java @@ -0,0 +1,100 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.gateway.remote.routingtable; + +import org.opensearch.cluster.routing.IndexRoutingTable; +import org.opensearch.cluster.routing.IndexShardRoutingTable; +import org.opensearch.core.common.io.stream.BufferedChecksumStreamInput; +import org.opensearch.core.common.io.stream.BufferedChecksumStreamOutput; +import org.opensearch.core.common.io.stream.InputStreamStreamInput; +import org.opensearch.core.common.io.stream.StreamOutput; +import org.opensearch.core.common.io.stream.Writeable; +import org.opensearch.core.index.Index; + +import java.io.EOFException; +import java.io.IOException; +import java.io.InputStream; + +/** + * Remote store object for IndexRoutingTable + */ +public class RemoteIndexRoutingTable implements Writeable { + + private final IndexRoutingTable indexRoutingTable; + + public RemoteIndexRoutingTable(IndexRoutingTable indexRoutingTable) { + this.indexRoutingTable = indexRoutingTable; + } + + /** + * Reads data from inputStream and creates RemoteIndexRoutingTable object with the {@link IndexRoutingTable} + * @param inputStream input stream with index routing data + * @param index index for the current routing data + * @throws IOException exception thrown on failing to read from stream. + */ + public RemoteIndexRoutingTable(InputStream inputStream, Index index) throws IOException { + try { + try (BufferedChecksumStreamInput in = new BufferedChecksumStreamInput(new InputStreamStreamInput(inputStream), "assertion")) { + // Read the Table Header first and confirm the index + IndexRoutingTableHeader indexRoutingTableHeader = new IndexRoutingTableHeader(in); + assert indexRoutingTableHeader.getIndexName().equals(index.getName()); + + int numberOfShardRouting = in.readVInt(); + IndexRoutingTable.Builder indicesRoutingTable = IndexRoutingTable.builder(index); + for (int idx = 0; idx < numberOfShardRouting; idx++) { + IndexShardRoutingTable indexShardRoutingTable = IndexShardRoutingTable.Builder.readFrom(in); + indicesRoutingTable.addIndexShard(indexShardRoutingTable); + } + verifyCheckSum(in); + indexRoutingTable = indicesRoutingTable.build(); + } + } catch (EOFException e) { + throw new IOException("Indices Routing table is corrupted", e); + } + } + + public IndexRoutingTable getIndexRoutingTable() { + return indexRoutingTable; + } + + /** + * Writes {@link IndexRoutingTable} to the given stream + * @param streamOutput output stream to write + * @throws IOException exception thrown on failing to write to stream. + */ + @Override + public void writeTo(StreamOutput streamOutput) throws IOException { + try { + BufferedChecksumStreamOutput out = new BufferedChecksumStreamOutput(streamOutput); + IndexRoutingTableHeader indexRoutingTableHeader = new IndexRoutingTableHeader(indexRoutingTable.getIndex().getName()); + indexRoutingTableHeader.writeTo(out); + out.writeVInt(indexRoutingTable.shards().size()); + for (IndexShardRoutingTable next : indexRoutingTable) { + IndexShardRoutingTable.Builder.writeTo(next, out); + } + out.writeLong(out.getChecksum()); + out.flush(); + } catch (IOException e) { + throw new IOException("Failed to write IndexRoutingTable to stream", e); + } + } + + private void verifyCheckSum(BufferedChecksumStreamInput in) throws IOException { + long expectedChecksum = in.getChecksum(); + long readChecksum = in.readLong(); + if (readChecksum != expectedChecksum) { + throw new IOException( + "checksum verification failed - expected: 0x" + + Long.toHexString(expectedChecksum) + + ", got: 0x" + + Long.toHexString(readChecksum) + ); + } + } +} diff --git a/server/src/main/java/org/opensearch/gateway/remote/routingtable/package-info.java b/server/src/main/java/org/opensearch/gateway/remote/routingtable/package-info.java new file mode 100644 index 0000000000000..a6cb2251a5dd7 --- /dev/null +++ b/server/src/main/java/org/opensearch/gateway/remote/routingtable/package-info.java @@ -0,0 +1,12 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/** + * Package containing class to perform operations on remote routing table. + */ +package org.opensearch.gateway.remote.routingtable; diff --git a/server/src/main/java/org/opensearch/index/shard/RemoteStoreRefreshListener.java b/server/src/main/java/org/opensearch/index/shard/RemoteStoreRefreshListener.java index 20afd7b2f3568..77556f8391473 100644 --- a/server/src/main/java/org/opensearch/index/shard/RemoteStoreRefreshListener.java +++ b/server/src/main/java/org/opensearch/index/shard/RemoteStoreRefreshListener.java @@ -170,6 +170,13 @@ protected boolean performAfterRefreshWithPermit(boolean didRefresh) { * @return true if sync is needed */ private boolean shouldSync(boolean didRefresh, boolean skipPrimaryTermCheck) { + // Ignore syncing segments if the underlying shard is closed + // This also makes sure that retries are not scheduled for shards + // with failed syncSegments invocation after they are closed + if (shardClosed()) { + logger.info("Shard is already closed. Not attempting sync to remote store"); + return false; + } boolean shouldSync = didRefresh // If the readers change, didRefresh is always true. // The third condition exists for uploading the zero state segments where the refresh has not changed the reader // reference, but it is important to upload the zero state segments so that the restore does not break. @@ -607,6 +614,15 @@ public void onFailure(String file) { }; } + /** + * Checks if the underlying IndexShard instance is closed + * + * @return true if it is closed, false otherwise + */ + private boolean shardClosed() { + return indexShard.state() == IndexShardState.CLOSED; + } + @Override protected Logger getLogger() { return logger; diff --git a/server/src/main/java/org/opensearch/index/translog/BaseTranslogReader.java b/server/src/main/java/org/opensearch/index/translog/BaseTranslogReader.java index d6fa2a2e53de3..37af1dcbeab8b 100644 --- a/server/src/main/java/org/opensearch/index/translog/BaseTranslogReader.java +++ b/server/src/main/java/org/opensearch/index/translog/BaseTranslogReader.java @@ -32,6 +32,7 @@ package org.opensearch.index.translog; +import org.opensearch.core.common.io.stream.BufferedChecksumStreamInput; import org.opensearch.core.common.io.stream.ByteBufferStreamInput; import org.opensearch.index.seqno.SequenceNumbers; diff --git a/server/src/main/java/org/opensearch/index/translog/RemoteFsTranslog.java b/server/src/main/java/org/opensearch/index/translog/RemoteFsTranslog.java index 67549c86b7dd2..f29b6fba6537f 100644 --- a/server/src/main/java/org/opensearch/index/translog/RemoteFsTranslog.java +++ b/server/src/main/java/org/opensearch/index/translog/RemoteFsTranslog.java @@ -706,6 +706,10 @@ int availablePermits() { */ @Override protected boolean shouldFlush() { - return readers.size() >= translogTransferManager.getMaxRemoteTranslogReadersSettings(); + int maxRemoteTlogReaders = translogTransferManager.getMaxRemoteTranslogReadersSettings(); + if (maxRemoteTlogReaders == -1) { + return false; + } + return readers.size() >= maxRemoteTlogReaders; } } diff --git a/server/src/main/java/org/opensearch/index/translog/Translog.java b/server/src/main/java/org/opensearch/index/translog/Translog.java index 842e9c77d2350..87e0c21b8203c 100644 --- a/server/src/main/java/org/opensearch/index/translog/Translog.java +++ b/server/src/main/java/org/opensearch/index/translog/Translog.java @@ -48,6 +48,8 @@ import org.opensearch.core.common.Strings; import org.opensearch.core.common.bytes.BytesArray; import org.opensearch.core.common.bytes.BytesReference; +import org.opensearch.core.common.io.stream.BufferedChecksumStreamInput; +import org.opensearch.core.common.io.stream.BufferedChecksumStreamOutput; import org.opensearch.core.common.io.stream.StreamInput; import org.opensearch.core.common.io.stream.StreamOutput; import org.opensearch.core.index.shard.ShardId; diff --git a/server/src/main/java/org/opensearch/index/translog/TranslogHeader.java b/server/src/main/java/org/opensearch/index/translog/TranslogHeader.java index 7b5be9505f27a..66a9fe08d06b5 100644 --- a/server/src/main/java/org/opensearch/index/translog/TranslogHeader.java +++ b/server/src/main/java/org/opensearch/index/translog/TranslogHeader.java @@ -40,6 +40,8 @@ import org.apache.lucene.store.OutputStreamDataOutput; import org.apache.lucene.util.BytesRef; import org.opensearch.common.io.Channels; +import org.opensearch.core.common.io.stream.BufferedChecksumStreamInput; +import org.opensearch.core.common.io.stream.BufferedChecksumStreamOutput; import org.opensearch.core.common.io.stream.InputStreamStreamInput; import org.opensearch.core.common.io.stream.OutputStreamStreamOutput; import org.opensearch.core.common.io.stream.StreamInput; diff --git a/server/src/main/java/org/opensearch/index/translog/TranslogSnapshot.java b/server/src/main/java/org/opensearch/index/translog/TranslogSnapshot.java index 89718156cbbe8..521472f4d64a0 100644 --- a/server/src/main/java/org/opensearch/index/translog/TranslogSnapshot.java +++ b/server/src/main/java/org/opensearch/index/translog/TranslogSnapshot.java @@ -32,6 +32,7 @@ package org.opensearch.index.translog; import org.opensearch.common.io.Channels; +import org.opensearch.core.common.io.stream.BufferedChecksumStreamInput; import org.opensearch.index.seqno.SequenceNumbers; import java.io.EOFException; diff --git a/server/src/main/java/org/opensearch/index/translog/TranslogWriter.java b/server/src/main/java/org/opensearch/index/translog/TranslogWriter.java index 86f7567f3333d..b0c7d51c3e43b 100644 --- a/server/src/main/java/org/opensearch/index/translog/TranslogWriter.java +++ b/server/src/main/java/org/opensearch/index/translog/TranslogWriter.java @@ -50,6 +50,7 @@ import org.opensearch.core.Assertions; import org.opensearch.core.common.bytes.BytesArray; import org.opensearch.core.common.bytes.BytesReference; +import org.opensearch.core.common.io.stream.BufferedChecksumStreamInput; import org.opensearch.core.common.unit.ByteSizeValue; import org.opensearch.core.index.shard.ShardId; import org.opensearch.index.seqno.SequenceNumbers; diff --git a/server/src/main/java/org/opensearch/indices/RemoteStoreSettings.java b/server/src/main/java/org/opensearch/indices/RemoteStoreSettings.java index 074186f64a75d..8cb482c8d8681 100644 --- a/server/src/main/java/org/opensearch/indices/RemoteStoreSettings.java +++ b/server/src/main/java/org/opensearch/indices/RemoteStoreSettings.java @@ -25,6 +25,7 @@ */ @PublicApi(since = "2.14.0") public class RemoteStoreSettings { + private static final int MIN_CLUSTER_REMOTE_MAX_TRANSLOG_READERS = 100; /** * Used to specify the default translog buffer interval for remote store backed indexes. @@ -112,7 +113,12 @@ public class RemoteStoreSettings { public static final Setting CLUSTER_REMOTE_MAX_TRANSLOG_READERS = Setting.intSetting( "cluster.remote_store.translog.max_readers", 1000, - 100, + -1, + v -> { + if (v != -1 && v < MIN_CLUSTER_REMOTE_MAX_TRANSLOG_READERS) { + throw new IllegalArgumentException("Cannot set value lower than " + MIN_CLUSTER_REMOTE_MAX_TRANSLOG_READERS); + } + }, Property.Dynamic, Property.NodeScope ); diff --git a/server/src/main/java/org/opensearch/node/Node.java b/server/src/main/java/org/opensearch/node/Node.java index 76109ba10624a..cb1f2caa082fc 100644 --- a/server/src/main/java/org/opensearch/node/Node.java +++ b/server/src/main/java/org/opensearch/node/Node.java @@ -138,6 +138,7 @@ import org.opensearch.gateway.MetaStateService; import org.opensearch.gateway.PersistedClusterStateService; import org.opensearch.gateway.ShardsBatchGatewayAllocator; +import org.opensearch.gateway.remote.RemoteClusterStateCleanupManager; import org.opensearch.gateway.remote.RemoteClusterStateService; import org.opensearch.http.HttpServerTransport; import org.opensearch.identity.IdentityService; @@ -752,6 +753,7 @@ protected Node( threadPool::relativeTimeInMillis ); final RemoteClusterStateService remoteClusterStateService; + final RemoteClusterStateCleanupManager remoteClusterStateCleanupManager; final RemoteIndexPathUploader remoteIndexPathUploader; if (isRemoteStoreClusterStateEnabled(settings)) { remoteIndexPathUploader = new RemoteIndexPathUploader( @@ -764,14 +766,16 @@ protected Node( nodeEnvironment.nodeId(), repositoriesServiceReference::get, settings, - clusterService.getClusterSettings(), + clusterService, threadPool::preciseRelativeTimeInNanos, threadPool, List.of(remoteIndexPathUploader) ); + remoteClusterStateCleanupManager = remoteClusterStateService.getCleanupManager(); } else { remoteClusterStateService = null; remoteIndexPathUploader = null; + remoteClusterStateCleanupManager = null; } // collect engine factory providers from plugins @@ -1195,7 +1199,8 @@ protected Node( rerouteService, fsHealthService, persistedStateRegistry, - remoteStoreNodeService + remoteStoreNodeService, + clusterManagerMetrics ); final SearchPipelineService searchPipelineService = new SearchPipelineService( clusterService, @@ -1376,6 +1381,7 @@ protected Node( b.bind(MetricsRegistry.class).toInstance(metricsRegistry); b.bind(RemoteClusterStateService.class).toProvider(() -> remoteClusterStateService); b.bind(RemoteIndexPathUploader.class).toProvider(() -> remoteIndexPathUploader); + b.bind(RemoteClusterStateCleanupManager.class).toProvider(() -> remoteClusterStateCleanupManager); b.bind(PersistedStateRegistry.class).toInstance(persistedStateRegistry); b.bind(SegmentReplicationStatsTracker.class).toInstance(segmentReplicationStatsTracker); b.bind(SearchRequestOperationsCompositeListenerFactory.class).toInstance(searchRequestOperationsCompositeListenerFactory); diff --git a/server/src/main/java/org/opensearch/node/remotestore/RemoteStoreNodeAttribute.java b/server/src/main/java/org/opensearch/node/remotestore/RemoteStoreNodeAttribute.java index b10ec0d99c3d5..a0f745a4270c4 100644 --- a/server/src/main/java/org/opensearch/node/remotestore/RemoteStoreNodeAttribute.java +++ b/server/src/main/java/org/opensearch/node/remotestore/RemoteStoreNodeAttribute.java @@ -13,6 +13,7 @@ import org.opensearch.cluster.metadata.RepositoryMetadata; import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.common.settings.Settings; +import org.opensearch.common.util.FeatureFlags; import org.opensearch.gateway.remote.RemoteClusterStateService; import org.opensearch.node.Node; import org.opensearch.repositories.blobstore.BlobStoreRepository; @@ -28,6 +29,8 @@ import java.util.Set; import java.util.stream.Collectors; +import static org.opensearch.common.util.FeatureFlags.REMOTE_PUBLICATION_EXPERIMENTAL; + /** * This is an abstraction for validating and storing information specific to remote backed storage nodes. * @@ -46,6 +49,8 @@ public class RemoteStoreNodeAttribute { + "." + CryptoMetadata.SETTINGS_KEY; public static final String REMOTE_STORE_REPOSITORY_SETTINGS_ATTRIBUTE_KEY_PREFIX = "remote_store.repository.%s.settings."; + public static final String REMOTE_STORE_ROUTING_TABLE_REPOSITORY_NAME_ATTRIBUTE_KEY = "remote_store.routing_table.repository"; + private final RepositoriesMetadata repositoriesMetadata; public static List SUPPORTED_DATA_REPO_NAME_ATTRIBUTES = List.of( @@ -157,6 +162,10 @@ private Set getValidatedRepositoryNames(DiscoveryNode node) { } else if (node.getAttributes().containsKey(REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY)) { repositoryNames.add(validateAttributeNonNull(node, REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY)); } + if (node.getAttributes().containsKey(REMOTE_STORE_ROUTING_TABLE_REPOSITORY_NAME_ATTRIBUTE_KEY)) { + repositoryNames.add(validateAttributeNonNull(node, REMOTE_STORE_ROUTING_TABLE_REPOSITORY_NAME_ATTRIBUTE_KEY)); + } + return repositoryNames; } @@ -187,6 +196,15 @@ public static boolean isRemoteStoreClusterStateEnabled(Settings settings) { && isRemoteClusterStateAttributePresent(settings); } + private static boolean isRemoteRoutingTableAttributePresent(Settings settings) { + return settings.getByPrefix(Node.NODE_ATTRIBUTES.getKey() + REMOTE_STORE_ROUTING_TABLE_REPOSITORY_NAME_ATTRIBUTE_KEY) + .isEmpty() == false; + } + + public static boolean isRemoteRoutingTableEnabled(Settings settings) { + return FeatureFlags.isEnabled(REMOTE_PUBLICATION_EXPERIMENTAL) && isRemoteRoutingTableAttributePresent(settings); + } + public RepositoriesMetadata getRepositoriesMetadata() { return this.repositoriesMetadata; } @@ -231,6 +249,21 @@ public int hashCode() { return hashCode; } + /** + * Checks if 2 instances are equal, with option to skip check for a list of repos. + * * + * @param o other instance + * @param reposToSkip list of repos to skip check for equality + * @return {@code true} iff both instances are equal, not including the repositories in both instances if they are part of reposToSkip. + */ + public boolean equalsWithRepoSkip(Object o, List reposToSkip) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + + RemoteStoreNodeAttribute that = (RemoteStoreNodeAttribute) o; + return this.getRepositoriesMetadata().equalsIgnoreGenerationsWithRepoSkip(that.getRepositoriesMetadata(), reposToSkip); + } + @Override public boolean equals(Object o) { if (this == o) return true; diff --git a/server/src/test/java/org/opensearch/cluster/coordination/FollowersCheckerTests.java b/server/src/test/java/org/opensearch/cluster/coordination/FollowersCheckerTests.java index a106706c00732..d0bc41b459cc3 100644 --- a/server/src/test/java/org/opensearch/cluster/coordination/FollowersCheckerTests.java +++ b/server/src/test/java/org/opensearch/cluster/coordination/FollowersCheckerTests.java @@ -33,6 +33,7 @@ import org.opensearch.OpenSearchException; import org.opensearch.Version; +import org.opensearch.cluster.ClusterManagerMetrics; import org.opensearch.cluster.ClusterName; import org.opensearch.cluster.coordination.Coordinator.Mode; import org.opensearch.cluster.coordination.FollowersChecker.FollowerCheckRequest; @@ -47,6 +48,9 @@ import org.opensearch.core.transport.TransportResponse.Empty; import org.opensearch.monitor.NodeHealthService; import org.opensearch.monitor.StatusInfo; +import org.opensearch.telemetry.TestInMemoryMetricsRegistry; +import org.opensearch.telemetry.metrics.MetricsRegistry; +import org.opensearch.telemetry.metrics.noop.NoopMetricsRegistry; import org.opensearch.telemetry.tracing.noop.NoopTracer; import org.opensearch.test.EqualsHashCodeTestUtils; import org.opensearch.test.EqualsHashCodeTestUtils.CopyFunction; @@ -131,6 +135,8 @@ protected void onSendRequest(long requestId, String action, TransportRequest req transportService.start(); transportService.acceptIncomingRequests(); + TestInMemoryMetricsRegistry metricsRegistry = new TestInMemoryMetricsRegistry(); + final FollowersChecker followersChecker = new FollowersChecker( settings, clusterSettings, @@ -139,7 +145,8 @@ protected void onSendRequest(long requestId, String action, TransportRequest req (node, reason) -> { assert false : node; }, - () -> new StatusInfo(StatusInfo.Status.HEALTHY, "healthy-info") + () -> new StatusInfo(StatusInfo.Status.HEALTHY, "healthy-info"), + new ClusterManagerMetrics(metricsRegistry) ); followersChecker.setCurrentNodes(discoveryNodesHolder[0]); @@ -193,35 +200,43 @@ protected void onSendRequest(long requestId, String action, TransportRequest req followersChecker.clearCurrentNodes(); deterministicTaskQueue.runAllTasks(); assertThat(checkedNodes, empty()); + assertEquals(Integer.valueOf(0), metricsRegistry.getCounterStore().get("followers.checker.failure.count").getCounterValue()); } public void testFailsNodeThatDoesNotRespond() { final Settings settings = randomSettings(); + TestInMemoryMetricsRegistry metricsRegistry = new TestInMemoryMetricsRegistry(); testBehaviourOfFailingNode( settings, () -> null, "followers check retry count exceeded", (FOLLOWER_CHECK_RETRY_COUNT_SETTING.get(settings) - 1) * FOLLOWER_CHECK_INTERVAL_SETTING.get(settings).millis() + FOLLOWER_CHECK_RETRY_COUNT_SETTING.get(settings) * FOLLOWER_CHECK_TIMEOUT_SETTING.get(settings).millis(), - () -> new StatusInfo(HEALTHY, "healthy-info") + () -> new StatusInfo(HEALTHY, "healthy-info"), + metricsRegistry ); + assertEquals(Integer.valueOf(2), metricsRegistry.getCounterStore().get("followers.checker.failure.count").getCounterValue()); } public void testFailsNodeThatRejectsCheck() { final Settings settings = randomSettings(); + TestInMemoryMetricsRegistry metricsRegistry = new TestInMemoryMetricsRegistry(); testBehaviourOfFailingNode( settings, () -> { throw new OpenSearchException("simulated exception"); }, "followers check retry count exceeded", (FOLLOWER_CHECK_RETRY_COUNT_SETTING.get(settings) - 1) * FOLLOWER_CHECK_INTERVAL_SETTING.get(settings).millis(), - () -> new StatusInfo(HEALTHY, "healthy-info") + () -> new StatusInfo(HEALTHY, "healthy-info"), + metricsRegistry ); + assertEquals(Integer.valueOf(2), metricsRegistry.getCounterStore().get("followers.checker.failure.count").getCounterValue()); } public void testFailureCounterResetsOnSuccess() { final Settings settings = randomSettings(); final int retryCount = FOLLOWER_CHECK_RETRY_COUNT_SETTING.get(settings); final int maxRecoveries = randomIntBetween(3, 10); + TestInMemoryMetricsRegistry metricsRegistry = new TestInMemoryMetricsRegistry(); // passes just enough checks to keep it alive, up to maxRecoveries, and then fails completely testBehaviourOfFailingNode(settings, new Supplier() { @@ -241,18 +256,23 @@ public Empty get() { "followers check retry count exceeded", (FOLLOWER_CHECK_RETRY_COUNT_SETTING.get(settings) * (maxRecoveries + 1) - 1) * FOLLOWER_CHECK_INTERVAL_SETTING.get(settings) .millis(), - () -> new StatusInfo(HEALTHY, "healthy-info") + () -> new StatusInfo(HEALTHY, "healthy-info"), + metricsRegistry ); + assertEquals(Integer.valueOf(2), metricsRegistry.getCounterStore().get("followers.checker.failure.count").getCounterValue()); } public void testFailsNodeThatIsDisconnected() { + TestInMemoryMetricsRegistry metricsRegistry = new TestInMemoryMetricsRegistry(); testBehaviourOfFailingNode( Settings.EMPTY, () -> { throw new ConnectTransportException(null, "simulated exception"); }, "disconnected", 0, - () -> new StatusInfo(HEALTHY, "healthy-info") + () -> new StatusInfo(HEALTHY, "healthy-info"), + metricsRegistry ); + assertEquals(Integer.valueOf(2), metricsRegistry.getCounterStore().get("followers.checker.failure.count").getCounterValue()); } public void testFailsNodeThatDisconnects() { @@ -297,6 +317,7 @@ public String toString() { transportService.acceptIncomingRequests(); final AtomicBoolean nodeFailed = new AtomicBoolean(); + TestInMemoryMetricsRegistry metricsRegistry = new TestInMemoryMetricsRegistry(); final FollowersChecker followersChecker = new FollowersChecker( settings, @@ -307,7 +328,8 @@ public String toString() { assertTrue(nodeFailed.compareAndSet(false, true)); assertThat(reason, equalTo("disconnected")); }, - () -> new StatusInfo(HEALTHY, "healthy-info") + () -> new StatusInfo(HEALTHY, "healthy-info"), + new ClusterManagerMetrics(metricsRegistry) ); DiscoveryNodes discoveryNodes = DiscoveryNodes.builder().add(localNode).add(otherNode).localNodeId(localNode.getId()).build(); @@ -318,16 +340,20 @@ public String toString() { deterministicTaskQueue.runAllRunnableTasks(); assertTrue(nodeFailed.get()); assertThat(followersChecker.getFaultyNodes(), contains(otherNode)); + assertEquals(Integer.valueOf(1), metricsRegistry.getCounterStore().get("followers.checker.failure.count").getCounterValue()); } public void testFailsNodeThatIsUnhealthy() { + TestInMemoryMetricsRegistry metricsRegistry = new TestInMemoryMetricsRegistry(); testBehaviourOfFailingNode( randomSettings(), () -> { throw new NodeHealthCheckFailureException("non writable exception"); }, "health check failed", 0, - () -> new StatusInfo(HEALTHY, "healthy-info") + () -> new StatusInfo(HEALTHY, "healthy-info"), + metricsRegistry ); + assertEquals(Integer.valueOf(2), metricsRegistry.getCounterStore().get("followers.checker.failure.count").getCounterValue()); } private void testBehaviourOfFailingNode( @@ -335,7 +361,8 @@ private void testBehaviourOfFailingNode( Supplier responder, String failureReason, long expectedFailureTime, - NodeHealthService nodeHealthService + NodeHealthService nodeHealthService, + MetricsRegistry metricsRegistry ) { final DiscoveryNode localNode = new DiscoveryNode("local-node", buildNewFakeTransportAddress(), Version.CURRENT); final DiscoveryNode otherNode = new DiscoveryNode("other-node", buildNewFakeTransportAddress(), Version.CURRENT); @@ -386,7 +413,6 @@ public String toString() { transportService.acceptIncomingRequests(); final AtomicBoolean nodeFailed = new AtomicBoolean(); - final FollowersChecker followersChecker = new FollowersChecker( settings, clusterSettings, @@ -396,7 +422,8 @@ public String toString() { assertTrue(nodeFailed.compareAndSet(false, true)); assertThat(reason, equalTo(failureReason)); }, - nodeHealthService + nodeHealthService, + new ClusterManagerMetrics(metricsRegistry) ); DiscoveryNodes discoveryNodes = DiscoveryNodes.builder().add(localNode).add(otherNode).localNodeId(localNode.getId()).build(); @@ -501,7 +528,11 @@ protected void onSendRequest(long requestId, String action, TransportRequest req if (exception != null) { throw exception; } - }, (node, reason) -> { assert false : node; }, () -> new StatusInfo(UNHEALTHY, "unhealthy-info")); + }, + (node, reason) -> { assert false : node; }, + () -> new StatusInfo(UNHEALTHY, "unhealthy-info"), + new ClusterManagerMetrics(NoopMetricsRegistry.INSTANCE) + ); final long leaderTerm = randomLongBetween(2, Long.MAX_VALUE); final long followerTerm = randomLongBetween(1, leaderTerm - 1); @@ -574,7 +605,11 @@ protected void onSendRequest(long requestId, String action, TransportRequest req if (exception != null) { throw exception; } - }, (node, reason) -> { assert false : node; }, () -> new StatusInfo(HEALTHY, "healthy-info")); + }, + (node, reason) -> { assert false : node; }, + () -> new StatusInfo(HEALTHY, "healthy-info"), + new ClusterManagerMetrics(NoopMetricsRegistry.INSTANCE) + ); { // Does not call into the coordinator in the normal case @@ -721,7 +756,11 @@ public void testPreferClusterManagerNodes() { ); final FollowersChecker followersChecker = new FollowersChecker(Settings.EMPTY, clusterSettings, transportService, fcr -> { assert false : fcr; - }, (node, reason) -> { assert false : node; }, () -> new StatusInfo(HEALTHY, "healthy-info")); + }, + (node, reason) -> { assert false : node; }, + () -> new StatusInfo(HEALTHY, "healthy-info"), + new ClusterManagerMetrics(NoopMetricsRegistry.INSTANCE) + ); followersChecker.setCurrentNodes(discoveryNodes); List followerTargets = Stream.of(capturingTransport.getCapturedRequestsAndClear()) .map(cr -> cr.node) diff --git a/server/src/test/java/org/opensearch/cluster/coordination/JoinTaskExecutorTests.java b/server/src/test/java/org/opensearch/cluster/coordination/JoinTaskExecutorTests.java index 3e343e95f6c4b..9cb1bd0b57132 100644 --- a/server/src/test/java/org/opensearch/cluster/coordination/JoinTaskExecutorTests.java +++ b/server/src/test/java/org/opensearch/cluster/coordination/JoinTaskExecutorTests.java @@ -72,6 +72,7 @@ import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_REPOSITORY_SETTINGS_ATTRIBUTE_KEY_PREFIX; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_REPOSITORY_TYPE_ATTRIBUTE_KEY_FORMAT; +import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_ROUTING_TABLE_REPOSITORY_NAME_ATTRIBUTE_KEY; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_TRANSLOG_REPOSITORY_NAME_ATTRIBUTE_KEY; import static org.opensearch.node.remotestore.RemoteStoreNodeService.MIGRATION_DIRECTION_SETTING; @@ -944,6 +945,145 @@ public void testNodeJoinInMixedMode() { JoinTaskExecutor.ensureNodesCompatibility(joiningNode2, currentNodes, metadata); } + public void testRemoteRoutingTableRepoAbsentNodeJoin() { + + final DiscoveryNode existingNode = new DiscoveryNode( + UUIDs.base64UUID(), + buildNewFakeTransportAddress(), + remoteStoreNodeAttributes(SEGMENT_REPO, TRANSLOG_REPO), + DiscoveryNodeRole.BUILT_IN_ROLES, + Version.CURRENT + ); + + ClusterState currentState = ClusterState.builder(ClusterName.DEFAULT) + .nodes(DiscoveryNodes.builder().add(existingNode).localNodeId(existingNode.getId()).build()) + .build(); + + DiscoveryNode joiningNode = newDiscoveryNode(remoteStoreNodeAttributes(SEGMENT_REPO, TRANSLOG_REPO)); + JoinTaskExecutor.ensureNodesCompatibility(joiningNode, currentState.getNodes(), currentState.metadata()); + } + + public void testRemoteRoutingTableNodeJoinRepoPresentInJoiningNode() { + final DiscoveryNode existingNode = new DiscoveryNode( + UUIDs.base64UUID(), + buildNewFakeTransportAddress(), + remoteStoreNodeAttributes(SEGMENT_REPO, TRANSLOG_REPO), + DiscoveryNodeRole.BUILT_IN_ROLES, + Version.CURRENT + ); + ClusterState currentState = ClusterState.builder(ClusterName.DEFAULT) + .nodes(DiscoveryNodes.builder().add(existingNode).localNodeId(existingNode.getId()).build()) + .build(); + + Map attr = remoteStoreNodeAttributes(SEGMENT_REPO, TRANSLOG_REPO); + attr.putAll(remoteRoutingTableAttributes(ROUTING_TABLE_REPO)); + DiscoveryNode joiningNode = newDiscoveryNode(attr); + JoinTaskExecutor.ensureNodesCompatibility(joiningNode, currentState.getNodes(), currentState.metadata()); + } + + public void testRemoteRoutingTableNodeJoinRepoPresentInExistingNode() { + Map attr = remoteStoreNodeAttributes(SEGMENT_REPO, TRANSLOG_REPO); + attr.putAll(remoteRoutingTableAttributes(ROUTING_TABLE_REPO)); + final DiscoveryNode existingNode = new DiscoveryNode( + UUIDs.base64UUID(), + buildNewFakeTransportAddress(), + attr, + DiscoveryNodeRole.BUILT_IN_ROLES, + Version.CURRENT + ); + + ClusterState currentState = ClusterState.builder(ClusterName.DEFAULT) + .nodes(DiscoveryNodes.builder().add(existingNode).localNodeId(existingNode.getId()).build()) + .build(); + + DiscoveryNode joiningNode = newDiscoveryNode(remoteStoreNodeAttributes(SEGMENT_REPO, TRANSLOG_REPO)); + assertThrows( + IllegalStateException.class, + () -> JoinTaskExecutor.ensureNodesCompatibility(joiningNode, currentState.getNodes(), currentState.metadata()) + ); + } + + public void testRemoteRoutingTableNodeJoinRepoPresentInBothNode() { + Map attr = remoteStoreNodeAttributes(SEGMENT_REPO, TRANSLOG_REPO); + attr.putAll(remoteRoutingTableAttributes(ROUTING_TABLE_REPO)); + final DiscoveryNode existingNode = new DiscoveryNode( + UUIDs.base64UUID(), + buildNewFakeTransportAddress(), + attr, + DiscoveryNodeRole.BUILT_IN_ROLES, + Version.CURRENT + ); + + ClusterState currentState = ClusterState.builder(ClusterName.DEFAULT) + .nodes(DiscoveryNodes.builder().add(existingNode).localNodeId(existingNode.getId()).build()) + .build(); + + DiscoveryNode joiningNode = newDiscoveryNode(attr); + JoinTaskExecutor.ensureNodesCompatibility(joiningNode, currentState.getNodes(), currentState.metadata()); + } + + public void testRemoteRoutingTableNodeJoinNodeWithRemoteAndRoutingRepoDifference() { + Map attr = remoteStoreNodeAttributes(SEGMENT_REPO, TRANSLOG_REPO); + attr.putAll(remoteRoutingTableAttributes(ROUTING_TABLE_REPO)); + final DiscoveryNode existingNode = new DiscoveryNode( + UUIDs.base64UUID(), + buildNewFakeTransportAddress(), + attr, + DiscoveryNodeRole.BUILT_IN_ROLES, + Version.CURRENT + ); + + final DiscoveryNode existingNode2 = new DiscoveryNode( + UUIDs.base64UUID(), + buildNewFakeTransportAddress(), + remoteStoreNodeAttributes(SEGMENT_REPO, TRANSLOG_REPO), + DiscoveryNodeRole.BUILT_IN_ROLES, + Version.CURRENT + ); + + ClusterState currentState = ClusterState.builder(ClusterName.DEFAULT) + .nodes(DiscoveryNodes.builder().add(existingNode2).add(existingNode).localNodeId(existingNode.getId()).build()) + .build(); + + DiscoveryNode joiningNode = newDiscoveryNode(attr); + JoinTaskExecutor.ensureNodesCompatibility(joiningNode, currentState.getNodes(), currentState.metadata()); + } + + public void testRemoteRoutingTableNodeJoinNodeWithRemoteAndRoutingRepoDifferenceMixedMode() { + Map attr = remoteStoreNodeAttributes(SEGMENT_REPO, TRANSLOG_REPO); + attr.putAll(remoteRoutingTableAttributes(ROUTING_TABLE_REPO)); + final DiscoveryNode existingNode = new DiscoveryNode( + UUIDs.base64UUID(), + buildNewFakeTransportAddress(), + attr, + DiscoveryNodeRole.BUILT_IN_ROLES, + Version.CURRENT + ); + + final DiscoveryNode existingNode2 = new DiscoveryNode( + UUIDs.base64UUID(), + buildNewFakeTransportAddress(), + remoteStoreNodeAttributes(SEGMENT_REPO, TRANSLOG_REPO), + DiscoveryNodeRole.BUILT_IN_ROLES, + Version.CURRENT + ); + + final Settings settings = Settings.builder() + .put(MIGRATION_DIRECTION_SETTING.getKey(), RemoteStoreNodeService.Direction.REMOTE_STORE) + .put(REMOTE_STORE_COMPATIBILITY_MODE_SETTING.getKey(), "mixed") + .build(); + final Settings nodeSettings = Settings.builder().put(REMOTE_STORE_MIGRATION_EXPERIMENTAL, "true").build(); + FeatureFlags.initializeFeatureFlags(nodeSettings); + Metadata metadata = Metadata.builder().persistentSettings(settings).build(); + ClusterState currentState = ClusterState.builder(ClusterName.DEFAULT) + .nodes(DiscoveryNodes.builder().add(existingNode2).add(existingNode).localNodeId(existingNode.getId()).build()) + .metadata(metadata) + .build(); + + DiscoveryNode joiningNode = newDiscoveryNode(attr); + JoinTaskExecutor.ensureNodesCompatibility(joiningNode, currentState.getNodes(), currentState.metadata()); + } + private void validateRepositoryMetadata(ClusterState updatedState, DiscoveryNode existingNode, int expectedRepositories) throws Exception { @@ -985,6 +1125,7 @@ private DiscoveryNode newDiscoveryNode(Map attributes) { private static final String TRANSLOG_REPO = "translog-repo"; private static final String CLUSTER_STATE_REPO = "cluster-state-repo"; private static final String COMMON_REPO = "remote-repo"; + private static final String ROUTING_TABLE_REPO = "routing-table-repo"; private Map remoteStoreNodeAttributes(String segmentRepoName, String translogRepoName) { return remoteStoreNodeAttributes(segmentRepoName, translogRepoName, CLUSTER_STATE_REPO); @@ -1049,6 +1190,28 @@ private Map remoteStateNodeAttributes(String clusterStateRepo) { }; } + private Map remoteRoutingTableAttributes(String repoName) { + String routingTableRepositoryTypeAttributeKey = String.format( + Locale.getDefault(), + REMOTE_STORE_REPOSITORY_TYPE_ATTRIBUTE_KEY_FORMAT, + repoName + ); + String routingTableRepositorySettingsAttributeKeyPrefix = String.format( + Locale.getDefault(), + REMOTE_STORE_REPOSITORY_SETTINGS_ATTRIBUTE_KEY_PREFIX, + repoName + ); + + return new HashMap<>() { + { + put(REMOTE_STORE_ROUTING_TABLE_REPOSITORY_NAME_ATTRIBUTE_KEY, repoName); + putIfAbsent(routingTableRepositoryTypeAttributeKey, "s3"); + putIfAbsent(routingTableRepositorySettingsAttributeKeyPrefix + "bucket", "state_bucket"); + putIfAbsent(routingTableRepositorySettingsAttributeKeyPrefix + "base_path", "/state/path"); + } + }; + } + private void validateAttributes(Map remoteStoreNodeAttributes, ClusterState currentState, DiscoveryNode existingNode) { DiscoveryNode joiningNode = newDiscoveryNode(remoteStoreNodeAttributes); Exception e = assertThrows( diff --git a/server/src/test/java/org/opensearch/cluster/coordination/LeaderCheckerTests.java b/server/src/test/java/org/opensearch/cluster/coordination/LeaderCheckerTests.java index fe65058333116..decdeddfa37a1 100644 --- a/server/src/test/java/org/opensearch/cluster/coordination/LeaderCheckerTests.java +++ b/server/src/test/java/org/opensearch/cluster/coordination/LeaderCheckerTests.java @@ -34,6 +34,7 @@ import org.opensearch.OpenSearchException; import org.opensearch.Version; +import org.opensearch.cluster.ClusterManagerMetrics; import org.opensearch.cluster.ClusterName; import org.opensearch.cluster.coordination.LeaderChecker.LeaderCheckRequest; import org.opensearch.cluster.node.DiscoveryNode; @@ -44,6 +45,7 @@ import org.opensearch.core.transport.TransportResponse; import org.opensearch.core.transport.TransportResponse.Empty; import org.opensearch.monitor.StatusInfo; +import org.opensearch.telemetry.TestInMemoryMetricsRegistry; import org.opensearch.telemetry.tracing.noop.NoopTracer; import org.opensearch.test.EqualsHashCodeTestUtils; import org.opensearch.test.EqualsHashCodeTestUtils.CopyFunction; @@ -175,11 +177,13 @@ public String toString() { transportService.acceptIncomingRequests(); final AtomicBoolean leaderFailed = new AtomicBoolean(); + TestInMemoryMetricsRegistry metricsRegistry = new TestInMemoryMetricsRegistry(); + final ClusterManagerMetrics clusterManagerMetrics = new ClusterManagerMetrics(metricsRegistry); final LeaderChecker leaderChecker = new LeaderChecker(settings, clusterSettings, transportService, e -> { assertThat(e.getMessage(), matchesRegex("node \\[.*\\] failed \\[[1-9][0-9]*\\] consecutive checks")); assertTrue(leaderFailed.compareAndSet(false, true)); - }, () -> new StatusInfo(StatusInfo.Status.HEALTHY, "healthy-info")); + }, () -> new StatusInfo(StatusInfo.Status.HEALTHY, "healthy-info"), clusterManagerMetrics); logger.info("--> creating first checker"); leaderChecker.updateLeader(leader1); @@ -229,6 +233,7 @@ public String toString() { ); } leaderChecker.updateLeader(null); + assertEquals(Integer.valueOf(1), metricsRegistry.getCounterStore().get("leader.checker.failure.count").getCounterValue()); } enum Response { @@ -293,10 +298,13 @@ public String toString() { transportService.acceptIncomingRequests(); final AtomicBoolean leaderFailed = new AtomicBoolean(); + TestInMemoryMetricsRegistry metricsRegistry = new TestInMemoryMetricsRegistry(); + final ClusterManagerMetrics clusterManagerMetrics = new ClusterManagerMetrics(metricsRegistry); + final LeaderChecker leaderChecker = new LeaderChecker(settings, clusterSettings, transportService, e -> { assertThat(e.getMessage(), anyOf(endsWith("disconnected"), endsWith("disconnected during check"))); assertTrue(leaderFailed.compareAndSet(false, true)); - }, () -> new StatusInfo(StatusInfo.Status.HEALTHY, "healthy-info")); + }, () -> new StatusInfo(StatusInfo.Status.HEALTHY, "healthy-info"), clusterManagerMetrics); leaderChecker.updateLeader(leader); { @@ -351,6 +359,7 @@ public String toString() { deterministicTaskQueue.runAllRunnableTasks(); assertTrue(leaderFailed.get()); } + assertEquals(Integer.valueOf(3), metricsRegistry.getCounterStore().get("leader.checker.failure.count").getCounterValue()); } public void testFollowerFailsImmediatelyOnHealthCheckFailure() { @@ -407,10 +416,12 @@ public String toString() { transportService.acceptIncomingRequests(); final AtomicBoolean leaderFailed = new AtomicBoolean(); + TestInMemoryMetricsRegistry metricsRegistry = new TestInMemoryMetricsRegistry(); + final ClusterManagerMetrics clusterManagerMetrics = new ClusterManagerMetrics(metricsRegistry); final LeaderChecker leaderChecker = new LeaderChecker(settings, clusterSettings, transportService, e -> { assertThat(e.getMessage(), endsWith("failed health checks")); assertTrue(leaderFailed.compareAndSet(false, true)); - }, () -> new StatusInfo(StatusInfo.Status.HEALTHY, "healthy-info")); + }, () -> new StatusInfo(StatusInfo.Status.HEALTHY, "healthy-info"), clusterManagerMetrics); leaderChecker.updateLeader(leader); @@ -430,6 +441,8 @@ public String toString() { assertTrue(leaderFailed.get()); } + + assertEquals(Integer.valueOf(1), metricsRegistry.getCounterStore().get("leader.checker.failure.count").getCounterValue()); } public void testLeaderBehaviour() { @@ -453,12 +466,15 @@ public void testLeaderBehaviour() { transportService.start(); transportService.acceptIncomingRequests(); + final TestInMemoryMetricsRegistry metricsRegistry = new TestInMemoryMetricsRegistry(); + final ClusterManagerMetrics clusterManagerMetrics = new ClusterManagerMetrics(metricsRegistry); final LeaderChecker leaderChecker = new LeaderChecker( settings, clusterSettings, transportService, e -> fail("shouldn't be checking anything"), - () -> nodeHealthServiceStatus.get() + () -> nodeHealthServiceStatus.get(), + clusterManagerMetrics ); final DiscoveryNodes discoveryNodes = DiscoveryNodes.builder() @@ -523,6 +539,7 @@ public void testLeaderBehaviour() { equalTo("rejecting leader check from [" + otherNode + "] sent to a node that is no longer the cluster-manager") ); } + assertEquals(Integer.valueOf(0), metricsRegistry.getCounterStore().get("leader.checker.failure.count").getCounterValue()); } private class CapturingTransportResponseHandler implements TransportResponseHandler { diff --git a/server/src/test/java/org/opensearch/cluster/coordination/NodeJoinTests.java b/server/src/test/java/org/opensearch/cluster/coordination/NodeJoinTests.java index 10d5dceb74f55..f84f0326f4a9d 100644 --- a/server/src/test/java/org/opensearch/cluster/coordination/NodeJoinTests.java +++ b/server/src/test/java/org/opensearch/cluster/coordination/NodeJoinTests.java @@ -273,7 +273,8 @@ protected void onSendRequest( ElectionStrategy.DEFAULT_INSTANCE, nodeHealthService, persistedStateRegistry, - Mockito.mock(RemoteStoreNodeService.class) + Mockito.mock(RemoteStoreNodeService.class), + new ClusterManagerMetrics(NoopMetricsRegistry.INSTANCE) ); transportService.start(); transportService.acceptIncomingRequests(); diff --git a/server/src/test/java/org/opensearch/cluster/routing/remote/RemoteRoutingTableServiceTests.java b/server/src/test/java/org/opensearch/cluster/routing/remote/RemoteRoutingTableServiceTests.java new file mode 100644 index 0000000000000..9a9cbfa153259 --- /dev/null +++ b/server/src/test/java/org/opensearch/cluster/routing/remote/RemoteRoutingTableServiceTests.java @@ -0,0 +1,77 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.routing.remote; + +import org.opensearch.common.settings.Settings; +import org.opensearch.common.util.FeatureFlags; +import org.opensearch.repositories.FilterRepository; +import org.opensearch.repositories.RepositoriesService; +import org.opensearch.repositories.RepositoryMissingException; +import org.opensearch.repositories.blobstore.BlobStoreRepository; +import org.opensearch.test.OpenSearchTestCase; +import org.junit.After; +import org.junit.Before; + +import java.util.function.Supplier; + +import static org.opensearch.common.util.FeatureFlags.REMOTE_PUBLICATION_EXPERIMENTAL; +import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_ROUTING_TABLE_REPOSITORY_NAME_ATTRIBUTE_KEY; +import static org.mockito.Mockito.doThrow; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +public class RemoteRoutingTableServiceTests extends OpenSearchTestCase { + + private RemoteRoutingTableService remoteRoutingTableService; + private Supplier repositoriesServiceSupplier; + private RepositoriesService repositoriesService; + private BlobStoreRepository blobStoreRepository; + + @Before + public void setup() { + repositoriesServiceSupplier = mock(Supplier.class); + repositoriesService = mock(RepositoriesService.class); + when(repositoriesServiceSupplier.get()).thenReturn(repositoriesService); + + Settings settings = Settings.builder() + .put("node.attr." + REMOTE_STORE_ROUTING_TABLE_REPOSITORY_NAME_ATTRIBUTE_KEY, "routing_repository") + .build(); + + blobStoreRepository = mock(BlobStoreRepository.class); + when(repositoriesService.repository("routing_repository")).thenReturn(blobStoreRepository); + + Settings nodeSettings = Settings.builder().put(REMOTE_PUBLICATION_EXPERIMENTAL, "true").build(); + FeatureFlags.initializeFeatureFlags(nodeSettings); + + remoteRoutingTableService = new RemoteRoutingTableService(repositoriesServiceSupplier, settings); + } + + @After + public void teardown() throws Exception { + super.tearDown(); + remoteRoutingTableService.close(); + } + + public void testFailInitializationWhenRemoteRoutingDisabled() { + final Settings settings = Settings.builder().build(); + assertThrows(AssertionError.class, () -> new RemoteRoutingTableService(repositoriesServiceSupplier, settings)); + } + + public void testFailStartWhenRepositoryNotSet() { + doThrow(new RepositoryMissingException("repository missing")).when(repositoriesService).repository("routing_repository"); + assertThrows(RepositoryMissingException.class, () -> remoteRoutingTableService.start()); + } + + public void testFailStartWhenNotBlobRepository() { + final FilterRepository filterRepository = mock(FilterRepository.class); + when(repositoriesService.repository("routing_repository")).thenReturn(filterRepository); + assertThrows(AssertionError.class, () -> remoteRoutingTableService.start()); + } + +} diff --git a/server/src/test/java/org/opensearch/discovery/DiscoveryModuleTests.java b/server/src/test/java/org/opensearch/discovery/DiscoveryModuleTests.java index b33ebf8333b36..5539b3237c2bf 100644 --- a/server/src/test/java/org/opensearch/discovery/DiscoveryModuleTests.java +++ b/server/src/test/java/org/opensearch/discovery/DiscoveryModuleTests.java @@ -32,6 +32,7 @@ package org.opensearch.discovery; import org.opensearch.Version; +import org.opensearch.cluster.ClusterManagerMetrics; import org.opensearch.cluster.ClusterState; import org.opensearch.cluster.coordination.Coordinator; import org.opensearch.cluster.coordination.PersistedStateRegistry; @@ -48,6 +49,7 @@ import org.opensearch.gateway.GatewayMetaState; import org.opensearch.node.remotestore.RemoteStoreNodeService; import org.opensearch.plugins.DiscoveryPlugin; +import org.opensearch.telemetry.metrics.noop.NoopMetricsRegistry; import org.opensearch.telemetry.tracing.noop.NoopTracer; import org.opensearch.test.OpenSearchTestCase; import org.opensearch.test.transport.MockTransportService; @@ -128,7 +130,8 @@ private DiscoveryModule newModule(Settings settings, List plugi mock(RerouteService.class), null, new PersistedStateRegistry(), - remoteStoreNodeService + remoteStoreNodeService, + new ClusterManagerMetrics(NoopMetricsRegistry.INSTANCE) ); } diff --git a/server/src/test/java/org/opensearch/gateway/GatewayMetaStatePersistedStateTests.java b/server/src/test/java/org/opensearch/gateway/GatewayMetaStatePersistedStateTests.java index 3ba98c44f8d3e..418e6d8de6adb 100644 --- a/server/src/test/java/org/opensearch/gateway/GatewayMetaStatePersistedStateTests.java +++ b/server/src/test/java/org/opensearch/gateway/GatewayMetaStatePersistedStateTests.java @@ -462,9 +462,7 @@ public void testDataOnlyNodePersistence() throws Exception { }); when(transportService.getThreadPool()).thenReturn(threadPool); ClusterService clusterService = mock(ClusterService.class); - when(clusterService.getClusterSettings()).thenReturn( - new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS) - ); + when(clusterService.getClusterSettings()).thenReturn(new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS)); final PersistedClusterStateService persistedClusterStateService = new PersistedClusterStateService( nodeEnvironment, xContentRegistry(), @@ -487,7 +485,7 @@ public void testDataOnlyNodePersistence() throws Exception { nodeEnvironment.nodeId(), repositoriesServiceSupplier, settings, - clusterSettings, + clusterService, () -> 0L, threadPool, List.of(new RemoteIndexPathUploader(threadPool, settings, repositoriesServiceSupplier, clusterSettings)) diff --git a/server/src/test/java/org/opensearch/gateway/remote/ClusterMetadataManifestTests.java b/server/src/test/java/org/opensearch/gateway/remote/ClusterMetadataManifestTests.java index 0b3cd49140939..d1f559eb75f85 100644 --- a/server/src/test/java/org/opensearch/gateway/remote/ClusterMetadataManifestTests.java +++ b/server/src/test/java/org/opensearch/gateway/remote/ClusterMetadataManifestTests.java @@ -99,7 +99,7 @@ public void testClusterMetadataManifestXContent() throws IOException { Version.CURRENT, "test-node-id", false, - ClusterMetadataManifest.CODEC_V2, + ClusterMetadataManifest.CODEC_V3, null, Collections.singletonList(uploadedIndexMetadata), "prev-cluster-uuid", @@ -123,7 +123,9 @@ public void testClusterMetadataManifestXContent() throws IOException { "custom--weighted_routing_netadata-file" ) ) - ).stream().collect(Collectors.toMap(UploadedMetadataAttribute::getAttributeName, Function.identity())) + ).stream().collect(Collectors.toMap(UploadedMetadataAttribute::getAttributeName, Function.identity())), + 1L, + randomUploadedIndexMetadataList() ); final XContentBuilder builder = JsonXContent.contentBuilder(); builder.startObject(); @@ -169,7 +171,9 @@ public void testClusterMetadataManifestSerializationEqualsHashCode() { "custom--weighted_routing_netadata-file" ) ) - ).stream().collect(Collectors.toMap(UploadedMetadataAttribute::getAttributeName, Function.identity())) + ).stream().collect(Collectors.toMap(UploadedMetadataAttribute::getAttributeName, Function.identity())), + 1L, + randomUploadedIndexMetadataList() ); { // Mutate Cluster Term EqualsHashCodeTestUtils.checkEqualsAndHashCode( @@ -309,6 +313,106 @@ public void testClusterMetadataManifestSerializationEqualsHashCode() { } } + public void testClusterMetadataManifestXContentV2() throws IOException { + UploadedIndexMetadata uploadedIndexMetadata = new UploadedIndexMetadata("test-index", "test-uuid", "/test/upload/path"); + UploadedMetadataAttribute uploadedMetadataAttribute = new UploadedMetadataAttribute("attribute_name", "testing_attribute"); + ClusterMetadataManifest originalManifest = new ClusterMetadataManifest( + 1L, + 1L, + "test-cluster-uuid", + "test-state-uuid", + Version.CURRENT, + "test-node-id", + false, + ClusterMetadataManifest.CODEC_V2, + null, + Collections.singletonList(uploadedIndexMetadata), + "prev-cluster-uuid", + true, + uploadedMetadataAttribute, + uploadedMetadataAttribute, + uploadedMetadataAttribute, + Collections.unmodifiableList( + Arrays.asList( + new UploadedMetadataAttribute( + RemoteClusterStateService.CUSTOM_METADATA + RemoteClusterStateService.CUSTOM_DELIMITER + RepositoriesMetadata.TYPE, + "custom--repositories-file" + ), + new UploadedMetadataAttribute( + RemoteClusterStateService.CUSTOM_METADATA + RemoteClusterStateService.CUSTOM_DELIMITER + IndexGraveyard.TYPE, + "custom--index_graveyard-file" + ), + new UploadedMetadataAttribute( + RemoteClusterStateService.CUSTOM_METADATA + RemoteClusterStateService.CUSTOM_DELIMITER + + WeightedRoutingMetadata.TYPE, + "custom--weighted_routing_netadata-file" + ) + ) + ).stream().collect(Collectors.toMap(UploadedMetadataAttribute::getAttributeName, Function.identity())), + 0, + new ArrayList<>() + ); + final XContentBuilder builder = JsonXContent.contentBuilder(); + builder.startObject(); + originalManifest.toXContent(builder, ToXContent.EMPTY_PARAMS); + builder.endObject(); + + try (XContentParser parser = createParser(JsonXContent.jsonXContent, BytesReference.bytes(builder))) { + final ClusterMetadataManifest fromXContentManifest = ClusterMetadataManifest.fromXContentV2(parser); + assertEquals(originalManifest, fromXContentManifest); + } + } + + public void testClusterMetadataManifestXContentV3() throws IOException { + UploadedIndexMetadata uploadedIndexMetadata = new UploadedIndexMetadata("test-index", "test-uuid", "/test/upload/path"); + UploadedMetadataAttribute uploadedMetadataAttribute = new UploadedMetadataAttribute("attribute_name", "testing_attribute"); + ClusterMetadataManifest originalManifest = new ClusterMetadataManifest( + 1L, + 1L, + "test-cluster-uuid", + "test-state-uuid", + Version.CURRENT, + "test-node-id", + false, + ClusterMetadataManifest.CODEC_V3, + null, + Collections.singletonList(uploadedIndexMetadata), + "prev-cluster-uuid", + true, + uploadedMetadataAttribute, + uploadedMetadataAttribute, + uploadedMetadataAttribute, + Collections.unmodifiableList( + Arrays.asList( + new UploadedMetadataAttribute( + RemoteClusterStateService.CUSTOM_METADATA + RemoteClusterStateService.CUSTOM_DELIMITER + RepositoriesMetadata.TYPE, + "custom--repositories-file" + ), + new UploadedMetadataAttribute( + RemoteClusterStateService.CUSTOM_METADATA + RemoteClusterStateService.CUSTOM_DELIMITER + IndexGraveyard.TYPE, + "custom--index_graveyard-file" + ), + new UploadedMetadataAttribute( + RemoteClusterStateService.CUSTOM_METADATA + RemoteClusterStateService.CUSTOM_DELIMITER + + WeightedRoutingMetadata.TYPE, + "custom--weighted_routing_netadata-file" + ) + ) + ).stream().collect(Collectors.toMap(UploadedMetadataAttribute::getAttributeName, Function.identity())), + 1L, + Collections.singletonList(uploadedIndexMetadata) + ); + final XContentBuilder builder = JsonXContent.contentBuilder(); + builder.startObject(); + originalManifest.toXContent(builder, ToXContent.EMPTY_PARAMS); + builder.endObject(); + + try (XContentParser parser = createParser(JsonXContent.jsonXContent, BytesReference.bytes(builder))) { + final ClusterMetadataManifest fromXContentManifest = ClusterMetadataManifest.fromXContent(parser); + assertEquals(originalManifest, fromXContentManifest); + } + } + private List randomUploadedIndexMetadataList() { final int size = randomIntBetween(1, 10); final List uploadedIndexMetadataList = new ArrayList<>(size); diff --git a/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateCleanupManagerTests.java b/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateCleanupManagerTests.java new file mode 100644 index 0000000000000..24fd1b164a4ff --- /dev/null +++ b/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateCleanupManagerTests.java @@ -0,0 +1,446 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.gateway.remote; + +import org.opensearch.cluster.ClusterName; +import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.metadata.Metadata; +import org.opensearch.cluster.node.DiscoveryNodes; +import org.opensearch.cluster.service.ClusterApplierService; +import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.blobstore.BlobContainer; +import org.opensearch.common.blobstore.BlobMetadata; +import org.opensearch.common.blobstore.BlobPath; +import org.opensearch.common.blobstore.BlobStore; +import org.opensearch.common.blobstore.support.PlainBlobMetadata; +import org.opensearch.common.settings.ClusterSettings; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.util.concurrent.AbstractAsyncTask; +import org.opensearch.core.action.ActionListener; +import org.opensearch.repositories.RepositoriesService; +import org.opensearch.repositories.blobstore.BlobStoreRepository; +import org.opensearch.repositories.fs.FsRepository; +import org.opensearch.test.OpenSearchTestCase; +import org.opensearch.test.VersionUtils; +import org.opensearch.threadpool.TestThreadPool; +import org.opensearch.threadpool.ThreadPool; +import org.junit.After; +import org.junit.Before; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashSet; +import java.util.List; +import java.util.Locale; +import java.util.Set; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.function.Supplier; + +import static org.opensearch.gateway.remote.ClusterMetadataManifest.CODEC_V1; +import static org.opensearch.gateway.remote.ClusterMetadataManifest.CODEC_V2; +import static org.opensearch.gateway.remote.ClusterMetadataManifest.UploadedIndexMetadata; +import static org.opensearch.gateway.remote.ClusterMetadataManifest.UploadedMetadataAttribute; +import static org.opensearch.gateway.remote.RemoteClusterStateCleanupManager.AsyncStaleFileDeletion; +import static org.opensearch.gateway.remote.RemoteClusterStateCleanupManager.CLUSTER_STATE_CLEANUP_INTERVAL_DEFAULT; +import static org.opensearch.gateway.remote.RemoteClusterStateCleanupManager.REMOTE_CLUSTER_STATE_CLEANUP_INTERVAL_SETTING; +import static org.opensearch.gateway.remote.RemoteClusterStateCleanupManager.RETAINED_MANIFESTS; +import static org.opensearch.gateway.remote.RemoteClusterStateCleanupManager.SKIP_CLEANUP_STATE_CHANGES; +import static org.opensearch.gateway.remote.RemoteClusterStateService.CLUSTER_STATE_PATH_TOKEN; +import static org.opensearch.gateway.remote.RemoteClusterStateService.COORDINATION_METADATA; +import static org.opensearch.gateway.remote.RemoteClusterStateService.DELIMITER; +import static org.opensearch.gateway.remote.RemoteClusterStateService.GLOBAL_METADATA_PATH_TOKEN; +import static org.opensearch.gateway.remote.RemoteClusterStateService.INDEX_PATH_TOKEN; +import static org.opensearch.gateway.remote.RemoteClusterStateService.MANIFEST_FILE_PREFIX; +import static org.opensearch.gateway.remote.RemoteClusterStateService.MANIFEST_PATH_TOKEN; +import static org.opensearch.gateway.remote.RemoteClusterStateService.SETTING_METADATA; +import static org.opensearch.gateway.remote.RemoteClusterStateService.TEMPLATES_METADATA; +import static org.opensearch.gateway.remote.RemoteClusterStateService.encodeString; +import static org.opensearch.gateway.remote.RemoteClusterStateServiceTests.generateClusterStateWithOneIndex; +import static org.opensearch.gateway.remote.RemoteClusterStateServiceTests.nodesWithLocalNodeClusterManager; +import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY; +import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_REPOSITORY_SETTINGS_ATTRIBUTE_KEY_PREFIX; +import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_REPOSITORY_TYPE_ATTRIBUTE_KEY_FORMAT; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyInt; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.doNothing; +import static org.mockito.Mockito.doThrow; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +public class RemoteClusterStateCleanupManagerTests extends OpenSearchTestCase { + private RemoteClusterStateCleanupManager remoteClusterStateCleanupManager; + private Supplier repositoriesServiceSupplier; + private RepositoriesService repositoriesService; + private BlobStoreRepository blobStoreRepository; + private BlobStore blobStore; + private ClusterSettings clusterSettings; + private ClusterApplierService clusterApplierService; + private ClusterState clusterState; + private Metadata metadata; + private RemoteClusterStateService remoteClusterStateService; + private final ThreadPool threadPool = new TestThreadPool(getClass().getName()); + + @Before + public void setup() { + repositoriesServiceSupplier = mock(Supplier.class); + repositoriesService = mock(RepositoriesService.class); + when(repositoriesServiceSupplier.get()).thenReturn(repositoriesService); + + String stateRepoTypeAttributeKey = String.format( + Locale.getDefault(), + "node.attr." + REMOTE_STORE_REPOSITORY_TYPE_ATTRIBUTE_KEY_FORMAT, + "remote_store_repository" + ); + String stateRepoSettingsAttributeKeyPrefix = String.format( + Locale.getDefault(), + "node.attr." + REMOTE_STORE_REPOSITORY_SETTINGS_ATTRIBUTE_KEY_PREFIX, + "remote_store_repository" + ); + + Settings settings = Settings.builder() + .put("node.attr." + REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY, "remote_store_repository") + .put(stateRepoTypeAttributeKey, FsRepository.TYPE) + .put(stateRepoSettingsAttributeKeyPrefix + "location", "randomRepoPath") + .put(RemoteClusterStateService.REMOTE_CLUSTER_STATE_ENABLED_SETTING.getKey(), true) + .build(); + + clusterSettings = new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); + clusterApplierService = mock(ClusterApplierService.class); + clusterState = mock(ClusterState.class); + metadata = mock(Metadata.class); + ClusterService clusterService = mock(ClusterService.class); + when(clusterService.getClusterSettings()).thenReturn(clusterSettings); + when(clusterState.getClusterName()).thenReturn(new ClusterName("test")); + when(metadata.clusterUUID()).thenReturn("testUUID"); + when(clusterState.metadata()).thenReturn(metadata); + when(clusterApplierService.state()).thenReturn(clusterState); + when(clusterService.getClusterApplierService()).thenReturn(clusterApplierService); + + blobStoreRepository = mock(BlobStoreRepository.class); + blobStore = mock(BlobStore.class); + when(blobStoreRepository.blobStore()).thenReturn(blobStore); + when(repositoriesService.repository("remote_store_repository")).thenReturn(blobStoreRepository); + + remoteClusterStateService = mock(RemoteClusterStateService.class); + when(remoteClusterStateService.getStats()).thenReturn(new RemotePersistenceStats()); + when(remoteClusterStateService.getThreadpool()).thenReturn(threadPool); + when(remoteClusterStateService.getBlobStore()).thenReturn(blobStore); + remoteClusterStateCleanupManager = new RemoteClusterStateCleanupManager(remoteClusterStateService, clusterService); + } + + @After + public void teardown() throws Exception { + super.tearDown(); + remoteClusterStateCleanupManager.close(); + threadPool.shutdown(); + } + + public void testDeleteClusterMetadata() throws IOException { + String clusterUUID = "clusterUUID"; + String clusterName = "test-cluster"; + List inactiveBlobs = Arrays.asList( + new PlainBlobMetadata("manifest1.dat", 1L), + new PlainBlobMetadata("manifest2.dat", 1L), + new PlainBlobMetadata("manifest3.dat", 1L) + ); + List activeBlobs = Arrays.asList( + new PlainBlobMetadata("manifest4.dat", 1L), + new PlainBlobMetadata("manifest5.dat", 1L) + ); + UploadedIndexMetadata index1Metadata = new UploadedIndexMetadata("index1", "indexUUID1", "index_metadata1"); + UploadedIndexMetadata index2Metadata = new UploadedIndexMetadata("index2", "indexUUID2", "index_metadata2"); + UploadedIndexMetadata index1UpdatedMetadata = new UploadedIndexMetadata("index1", "indexUUID1", "index_metadata1_updated"); + UploadedMetadataAttribute coordinationMetadata = new UploadedMetadataAttribute(COORDINATION_METADATA, "coordination_metadata"); + UploadedMetadataAttribute templateMetadata = new UploadedMetadataAttribute(TEMPLATES_METADATA, "template_metadata"); + UploadedMetadataAttribute settingMetadata = new UploadedMetadataAttribute(SETTING_METADATA, "settings_metadata"); + UploadedMetadataAttribute coordinationMetadataUpdated = new UploadedMetadataAttribute( + COORDINATION_METADATA, + "coordination_metadata_updated" + ); + UploadedMetadataAttribute templateMetadataUpdated = new UploadedMetadataAttribute(TEMPLATES_METADATA, "template_metadata_updated"); + UploadedMetadataAttribute settingMetadataUpdated = new UploadedMetadataAttribute(SETTING_METADATA, "settings_metadata_updated"); + ClusterMetadataManifest manifest1 = ClusterMetadataManifest.builder() + .indices(List.of(index1Metadata)) + .globalMetadataFileName("global_metadata") + .clusterTerm(1L) + .stateVersion(1L) + .codecVersion(CODEC_V1) + .stateUUID(randomAlphaOfLength(10)) + .clusterUUID(clusterUUID) + .nodeId("nodeA") + .opensearchVersion(VersionUtils.randomOpenSearchVersion(random())) + .previousClusterUUID(ClusterState.UNKNOWN_UUID) + .committed(true) + .build(); + ClusterMetadataManifest manifest2 = ClusterMetadataManifest.builder(manifest1) + .indices(List.of(index1Metadata, index2Metadata)) + .codecVersion(CODEC_V2) + .globalMetadataFileName(null) + .coordinationMetadata(coordinationMetadata) + .templatesMetadata(templateMetadata) + .settingMetadata(settingMetadata) + .build(); + ClusterMetadataManifest manifest3 = ClusterMetadataManifest.builder(manifest2) + .indices(List.of(index1UpdatedMetadata, index2Metadata)) + .settingMetadata(settingMetadataUpdated) + .build(); + + // active manifest have reference to index1Updated, index2, settingsUpdated, coordinationUpdated, templates, templatesUpdated + ClusterMetadataManifest manifest4 = ClusterMetadataManifest.builder(manifest3) + .coordinationMetadata(coordinationMetadataUpdated) + .build(); + ClusterMetadataManifest manifest5 = ClusterMetadataManifest.builder(manifest4).templatesMetadata(templateMetadataUpdated).build(); + + when(remoteClusterStateService.fetchRemoteClusterMetadataManifest(eq(clusterName), eq(clusterUUID), any())).thenReturn( + manifest4, + manifest5, + manifest1, + manifest2, + manifest3 + ); + BlobContainer container = mock(BlobContainer.class); + when(blobStore.blobContainer(any())).thenReturn(container); + doNothing().when(container).deleteBlobsIgnoringIfNotExists(any()); + + remoteClusterStateCleanupManager.deleteClusterMetadata(clusterName, clusterUUID, activeBlobs, inactiveBlobs); + verify(container).deleteBlobsIgnoringIfNotExists( + List.of( + new BlobPath().add(GLOBAL_METADATA_PATH_TOKEN).buildAsString() + coordinationMetadata.getUploadedFilename() + ".dat", + new BlobPath().add(GLOBAL_METADATA_PATH_TOKEN).buildAsString() + settingMetadata.getUploadedFilename() + ".dat", + new BlobPath().add(GLOBAL_METADATA_PATH_TOKEN).buildAsString() + "global_metadata.dat" + ) + ); + verify(container).deleteBlobsIgnoringIfNotExists( + List.of( + new BlobPath().add(INDEX_PATH_TOKEN).add(index1Metadata.getIndexUUID()).buildAsString() + + index1Metadata.getUploadedFilePath() + + ".dat" + ) + ); + Set staleManifest = new HashSet<>(); + inactiveBlobs.forEach(blob -> staleManifest.add(new BlobPath().add(MANIFEST_PATH_TOKEN).buildAsString() + blob.name())); + verify(container).deleteBlobsIgnoringIfNotExists(new ArrayList<>(staleManifest)); + } + + public void testDeleteStaleClusterUUIDs() throws IOException { + final ClusterState clusterState = generateClusterStateWithOneIndex().nodes(nodesWithLocalNodeClusterManager()).build(); + ClusterMetadataManifest clusterMetadataManifest = ClusterMetadataManifest.builder() + .indices(List.of()) + .clusterTerm(1L) + .stateVersion(1L) + .stateUUID(randomAlphaOfLength(10)) + .clusterUUID("cluster-uuid1") + .nodeId("nodeA") + .opensearchVersion(VersionUtils.randomOpenSearchVersion(random())) + .previousClusterUUID(ClusterState.UNKNOWN_UUID) + .committed(true) + .build(); + + BlobPath blobPath = new BlobPath().add("random-path"); + BlobContainer uuidContainerContainer = mock(BlobContainer.class); + BlobContainer manifest2Container = mock(BlobContainer.class); + BlobContainer manifest3Container = mock(BlobContainer.class); + when(blobStore.blobContainer(any())).then(invocation -> { + BlobPath blobPath1 = invocation.getArgument(0); + if (blobPath1.buildAsString().endsWith("cluster-state/")) { + return uuidContainerContainer; + } else if (blobPath1.buildAsString().contains("cluster-state/cluster-uuid2/")) { + return manifest2Container; + } else if (blobPath1.buildAsString().contains("cluster-state/cluster-uuid3/")) { + return manifest3Container; + } else { + throw new IllegalArgumentException("Unexpected blob path " + blobPath1); + } + }); + when( + manifest2Container.listBlobsByPrefixInSortedOrder( + MANIFEST_FILE_PREFIX + DELIMITER, + Integer.MAX_VALUE, + BlobContainer.BlobNameSortOrder.LEXICOGRAPHIC + ) + ).thenReturn(List.of(new PlainBlobMetadata("mainfest2", 1L))); + when( + manifest3Container.listBlobsByPrefixInSortedOrder( + MANIFEST_FILE_PREFIX + DELIMITER, + Integer.MAX_VALUE, + BlobContainer.BlobNameSortOrder.LEXICOGRAPHIC + ) + ).thenReturn(List.of(new PlainBlobMetadata("mainfest3", 1L))); + Set uuids = new HashSet<>(Arrays.asList("cluster-uuid1", "cluster-uuid2", "cluster-uuid3")); + when(remoteClusterStateService.getAllClusterUUIDs(any())).thenReturn(uuids); + when(remoteClusterStateService.getCusterMetadataBasePath(any(), any())).then( + invocationOnMock -> blobPath.add(encodeString(invocationOnMock.getArgument(0))) + .add(CLUSTER_STATE_PATH_TOKEN) + .add((String) invocationOnMock.getArgument(1)) + ); + remoteClusterStateCleanupManager.start(); + remoteClusterStateCleanupManager.deleteStaleClusterUUIDs(clusterState, clusterMetadataManifest); + try { + assertBusy(() -> { + verify(manifest2Container, times(1)).delete(); + verify(manifest3Container, times(1)).delete(); + }); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + public void testRemoteStateCleanupFailureStats() throws IOException { + BlobContainer blobContainer = mock(BlobContainer.class); + doThrow(IOException.class).when(blobContainer).delete(); + when(blobStore.blobContainer(any())).thenReturn(blobContainer); + BlobPath blobPath = new BlobPath().add("random-path"); + when((blobStoreRepository.basePath())).thenReturn(blobPath); + remoteClusterStateCleanupManager.start(); + remoteClusterStateCleanupManager.deleteStaleUUIDsClusterMetadata("cluster1", List.of("cluster-uuid1")); + try { + assertBusy(() -> { + // wait for stats to get updated + assertNotNull(remoteClusterStateCleanupManager.getStats()); + assertEquals(0, remoteClusterStateCleanupManager.getStats().getSuccessCount()); + assertEquals(1, remoteClusterStateCleanupManager.getStats().getCleanupAttemptFailedCount()); + }); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + public void testSingleConcurrentExecutionOfStaleManifestCleanup() throws Exception { + BlobContainer blobContainer = mock(BlobContainer.class); + when(blobStore.blobContainer(any())).thenReturn(blobContainer); + + CountDownLatch latch = new CountDownLatch(1); + AtomicInteger callCount = new AtomicInteger(0); + doAnswer(invocation -> { + callCount.incrementAndGet(); + if (latch.await(5000, TimeUnit.SECONDS) == false) { + throw new Exception("Timed out waiting for delete task queuing to complete"); + } + return null; + }).when(blobContainer) + .listBlobsByPrefixInSortedOrder( + any(String.class), + any(int.class), + any(BlobContainer.BlobNameSortOrder.class), + any(ActionListener.class) + ); + + remoteClusterStateCleanupManager.start(); + remoteClusterStateCleanupManager.deleteStaleClusterMetadata("cluster-name", "cluster-uuid", RETAINED_MANIFESTS); + remoteClusterStateCleanupManager.deleteStaleClusterMetadata("cluster-name", "cluster-uuid", RETAINED_MANIFESTS); + + latch.countDown(); + assertBusy(() -> assertEquals(1, callCount.get())); + } + + public void testRemoteClusterStateCleanupSetting() { + remoteClusterStateCleanupManager.start(); + // verify default value + assertEquals(CLUSTER_STATE_CLEANUP_INTERVAL_DEFAULT, remoteClusterStateCleanupManager.getStaleFileCleanupInterval()); + + // verify update interval + int cleanupInterval = randomIntBetween(1, 10); + Settings newSettings = Settings.builder().put("cluster.remote_store.state.cleanup_interval", cleanupInterval + "s").build(); + clusterSettings.applySettings(newSettings); + assertEquals(cleanupInterval, remoteClusterStateCleanupManager.getStaleFileCleanupInterval().seconds()); + } + + public void testRemoteCleanupTaskScheduled() { + AbstractAsyncTask cleanupTask = remoteClusterStateCleanupManager.getStaleFileDeletionTask(); + assertNull(cleanupTask); + // now the task should be initialized + remoteClusterStateCleanupManager.start(); + assertNotNull(remoteClusterStateCleanupManager.getStaleFileDeletionTask()); + assertTrue(remoteClusterStateCleanupManager.getStaleFileDeletionTask().mustReschedule()); + assertEquals( + clusterSettings.get(REMOTE_CLUSTER_STATE_CLEANUP_INTERVAL_SETTING), + remoteClusterStateCleanupManager.getStaleFileDeletionTask().getInterval() + ); + assertTrue(remoteClusterStateCleanupManager.getStaleFileDeletionTask().isScheduled()); + assertFalse(remoteClusterStateCleanupManager.getStaleFileDeletionTask().isClosed()); + } + + public void testRemoteCleanupSkipsOnOnlyElectedClusterManager() { + DiscoveryNodes nodes = mock(DiscoveryNodes.class); + when(nodes.isLocalNodeElectedClusterManager()).thenReturn(false); + when(clusterState.nodes()).thenReturn(nodes); + RemoteClusterStateCleanupManager spyManager = spy(remoteClusterStateCleanupManager); + AtomicInteger callCount = new AtomicInteger(0); + doAnswer(invocation -> callCount.incrementAndGet()).when(spyManager).deleteStaleClusterMetadata(any(), any(), anyInt()); + spyManager.cleanUpStaleFiles(); + assertEquals(0, callCount.get()); + + when(nodes.isLocalNodeElectedClusterManager()).thenReturn(true); + when(clusterState.version()).thenReturn(randomLongBetween(11, 20)); + spyManager.cleanUpStaleFiles(); + assertEquals(1, callCount.get()); + } + + public void testRemoteCleanupSkipsIfVersionIncrementLessThanThreshold() { + DiscoveryNodes nodes = mock(DiscoveryNodes.class); + long version = randomLongBetween(1, SKIP_CLEANUP_STATE_CHANGES); + when(clusterApplierService.state()).thenReturn(clusterState); + when(nodes.isLocalNodeElectedClusterManager()).thenReturn(true); + when(clusterState.nodes()).thenReturn(nodes); + when(clusterState.version()).thenReturn(version); + + RemoteClusterStateCleanupManager spyManager = spy(remoteClusterStateCleanupManager); + AtomicInteger callCount = new AtomicInteger(0); + doAnswer(invocation -> callCount.incrementAndGet()).when(spyManager).deleteStaleClusterMetadata(any(), any(), anyInt()); + + remoteClusterStateCleanupManager.cleanUpStaleFiles(); + assertEquals(0, callCount.get()); + } + + public void testRemoteCleanupCallsDeleteIfVersionIncrementGreaterThanThreshold() { + DiscoveryNodes nodes = mock(DiscoveryNodes.class); + long version = randomLongBetween(SKIP_CLEANUP_STATE_CHANGES + 1, SKIP_CLEANUP_STATE_CHANGES + 10); + when(clusterApplierService.state()).thenReturn(clusterState); + when(nodes.isLocalNodeElectedClusterManager()).thenReturn(true); + when(clusterState.nodes()).thenReturn(nodes); + when(clusterState.version()).thenReturn(version); + + RemoteClusterStateCleanupManager spyManager = spy(remoteClusterStateCleanupManager); + AtomicInteger callCount = new AtomicInteger(0); + doAnswer(invocation -> callCount.incrementAndGet()).when(spyManager).deleteStaleClusterMetadata(any(), any(), anyInt()); + + // using spied cleanup manager so that stubbed deleteStaleClusterMetadata is called + spyManager.cleanUpStaleFiles(); + assertEquals(1, callCount.get()); + } + + public void testRemoteCleanupSchedulesEvenAfterFailure() { + remoteClusterStateCleanupManager.start(); + RemoteClusterStateCleanupManager spyManager = spy(remoteClusterStateCleanupManager); + AtomicInteger callCount = new AtomicInteger(0); + doAnswer(invocationOnMock -> { + callCount.incrementAndGet(); + throw new RuntimeException("Test exception"); + }).when(spyManager).cleanUpStaleFiles(); + AsyncStaleFileDeletion task = new AsyncStaleFileDeletion(spyManager); + assertTrue(task.isScheduled()); + task.run(); + // Task is still scheduled after the failure + assertTrue(task.isScheduled()); + assertEquals(1, callCount.get()); + + task.run(); + // Task is still scheduled after the failure + assertTrue(task.isScheduled()); + assertEquals(2, callCount.get()); + } +} diff --git a/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java b/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java index 1b242b921c0d7..4a53770c76d88 100644 --- a/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java +++ b/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java @@ -19,6 +19,7 @@ import org.opensearch.cluster.metadata.Metadata; import org.opensearch.cluster.metadata.TemplatesMetadata; import org.opensearch.cluster.node.DiscoveryNodes; +import org.opensearch.cluster.service.ClusterService; import org.opensearch.common.blobstore.AsyncMultiStreamBlobContainer; import org.opensearch.common.blobstore.BlobContainer; import org.opensearch.common.blobstore.BlobMetadata; @@ -33,6 +34,7 @@ import org.opensearch.common.network.NetworkModule; import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Settings; +import org.opensearch.common.util.FeatureFlags; import org.opensearch.core.ParseField; import org.opensearch.core.action.ActionListener; import org.opensearch.core.common.bytes.BytesArray; @@ -72,9 +74,6 @@ import java.util.Optional; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.CountDownLatch; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicInteger; import java.util.function.BiConsumer; import java.util.function.Function; import java.util.function.Supplier; @@ -86,6 +85,7 @@ import org.mockito.ArgumentMatchers; import static java.util.stream.Collectors.toList; +import static org.opensearch.common.util.FeatureFlags.REMOTE_PUBLICATION_EXPERIMENTAL; import static org.opensearch.gateway.remote.RemoteClusterStateService.COORDINATION_METADATA; import static org.opensearch.gateway.remote.RemoteClusterStateService.DELIMITER; import static org.opensearch.gateway.remote.RemoteClusterStateService.FORMAT_PARAMS; @@ -93,12 +93,12 @@ import static org.opensearch.gateway.remote.RemoteClusterStateService.MANIFEST_CURRENT_CODEC_VERSION; import static org.opensearch.gateway.remote.RemoteClusterStateService.MANIFEST_FILE_PREFIX; import static org.opensearch.gateway.remote.RemoteClusterStateService.METADATA_FILE_PREFIX; -import static org.opensearch.gateway.remote.RemoteClusterStateService.RETAINED_MANIFESTS; import static org.opensearch.gateway.remote.RemoteClusterStateService.SETTING_METADATA; import static org.opensearch.gateway.remote.RemoteClusterStateService.TEMPLATES_METADATA; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_REPOSITORY_SETTINGS_ATTRIBUTE_KEY_PREFIX; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_REPOSITORY_TYPE_ATTRIBUTE_KEY_FORMAT; +import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_ROUTING_TABLE_REPOSITORY_NAME_ATTRIBUTE_KEY; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.is; import static org.hamcrest.Matchers.not; @@ -109,13 +109,12 @@ import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.doThrow; import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.times; -import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; public class RemoteClusterStateServiceTests extends OpenSearchTestCase { private RemoteClusterStateService remoteClusterStateService; + private ClusterService clusterService; private ClusterSettings clusterSettings; private Supplier repositoriesServiceSupplier; private RepositoriesService repositoriesService; @@ -148,6 +147,8 @@ public void setup() { .build(); clusterSettings = new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); + clusterService = mock(ClusterService.class); + when(clusterService.getClusterSettings()).thenReturn(clusterSettings); NamedXContentRegistry xContentRegistry = new NamedXContentRegistry( Stream.of( NetworkModule.getNamedXContents().stream(), @@ -165,7 +166,7 @@ public void setup() { "test-node-id", repositoriesServiceSupplier, settings, - clusterSettings, + clusterService, () -> 0L, threadPool, List.of(new RemoteIndexPathUploader(threadPool, settings, repositoriesServiceSupplier, clusterSettings)) @@ -187,14 +188,14 @@ public void testFailWriteFullMetadataNonClusterManagerNode() throws IOException public void testFailInitializationWhenRemoteStateDisabled() { final Settings settings = Settings.builder().build(); - ClusterSettings clusterSettings = new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); + when(clusterService.getClusterSettings()).thenReturn(new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS)); assertThrows( AssertionError.class, () -> new RemoteClusterStateService( "test-node-id", repositoriesServiceSupplier, settings, - clusterSettings, + clusterService, () -> 0L, threadPool, List.of(new RemoteIndexPathUploader(threadPool, settings, repositoriesServiceSupplier, clusterSettings)) @@ -547,7 +548,7 @@ private void verifyWriteIncrementalGlobalMetadataFromOlderCodecSuccess(ClusterMe ); final ClusterMetadataManifest expectedManifest = ClusterMetadataManifest.builder() - .codecVersion(2) + .codecVersion(3) .indices(Collections.emptyList()) .clusterTerm(1L) .stateVersion(1L) @@ -1071,6 +1072,8 @@ public void testReadGlobalMetadata() throws IOException { .nodeId("nodeA") .opensearchVersion(VersionUtils.randomOpenSearchVersion(random())) .previousClusterUUID("prev-cluster-uuid") + .routingTableVersion(1) + .indicesRouting(List.of()) .build(); Metadata expactedMetadata = Metadata.builder().persistentSettings(Settings.builder().put("readonly", true).build()).build(); @@ -1280,72 +1283,6 @@ public void testGetValidPreviousClusterUUIDWhenLastUUIDUncommitted() throws IOEx assertThat(previousClusterUUID, equalTo("cluster-uuid2")); } - public void testDeleteStaleClusterUUIDs() throws IOException { - final ClusterState clusterState = generateClusterStateWithOneIndex().nodes(nodesWithLocalNodeClusterManager()).build(); - ClusterMetadataManifest clusterMetadataManifest = ClusterMetadataManifest.builder() - .indices(List.of()) - .clusterTerm(1L) - .stateVersion(1L) - .stateUUID(randomAlphaOfLength(10)) - .clusterUUID("cluster-uuid1") - .nodeId("nodeA") - .opensearchVersion(VersionUtils.randomOpenSearchVersion(random())) - .previousClusterUUID(ClusterState.UNKNOWN_UUID) - .committed(true) - .build(); - - BlobPath blobPath = new BlobPath().add("random-path"); - when((blobStoreRepository.basePath())).thenReturn(blobPath); - BlobContainer uuidContainerContainer = mock(BlobContainer.class); - BlobContainer manifest2Container = mock(BlobContainer.class); - BlobContainer manifest3Container = mock(BlobContainer.class); - when(blobStore.blobContainer(any())).then(invocation -> { - BlobPath blobPath1 = invocation.getArgument(0); - if (blobPath1.buildAsString().endsWith("cluster-state/")) { - return uuidContainerContainer; - } else if (blobPath1.buildAsString().contains("cluster-state/cluster-uuid2/")) { - return manifest2Container; - } else if (blobPath1.buildAsString().contains("cluster-state/cluster-uuid3/")) { - return manifest3Container; - } else { - throw new IllegalArgumentException("Unexpected blob path " + blobPath1); - } - }); - Map blobMetadataMap = Map.of( - "cluster-uuid1", - mock(BlobContainer.class), - "cluster-uuid2", - mock(BlobContainer.class), - "cluster-uuid3", - mock(BlobContainer.class) - ); - when(uuidContainerContainer.children()).thenReturn(blobMetadataMap); - when( - manifest2Container.listBlobsByPrefixInSortedOrder( - MANIFEST_FILE_PREFIX + DELIMITER, - Integer.MAX_VALUE, - BlobContainer.BlobNameSortOrder.LEXICOGRAPHIC - ) - ).thenReturn(List.of(new PlainBlobMetadata("mainfest2", 1L))); - when( - manifest3Container.listBlobsByPrefixInSortedOrder( - MANIFEST_FILE_PREFIX + DELIMITER, - Integer.MAX_VALUE, - BlobContainer.BlobNameSortOrder.LEXICOGRAPHIC - ) - ).thenReturn(List.of(new PlainBlobMetadata("mainfest3", 1L))); - remoteClusterStateService.start(); - remoteClusterStateService.deleteStaleClusterUUIDs(clusterState, clusterMetadataManifest); - try { - assertBusy(() -> { - verify(manifest2Container, times(1)).delete(); - verify(manifest3Container, times(1)).delete(); - }); - } catch (Exception e) { - throw new RuntimeException(e); - } - } - public void testRemoteStateStats() throws IOException { final ClusterState clusterState = generateClusterStateWithOneIndex().nodes(nodesWithLocalNodeClusterManager()).build(); mockBlobStoreObjects(); @@ -1358,26 +1295,6 @@ public void testRemoteStateStats() throws IOException { assertEquals(0, remoteClusterStateService.getStats().getFailedCount()); } - public void testRemoteStateCleanupFailureStats() throws IOException { - BlobContainer blobContainer = mock(BlobContainer.class); - doThrow(IOException.class).when(blobContainer).delete(); - when(blobStore.blobContainer(any())).thenReturn(blobContainer); - BlobPath blobPath = new BlobPath().add("random-path"); - when((blobStoreRepository.basePath())).thenReturn(blobPath); - remoteClusterStateService.start(); - remoteClusterStateService.deleteStaleUUIDsClusterMetadata("cluster1", Arrays.asList("cluster-uuid1")); - try { - assertBusy(() -> { - // wait for stats to get updated - assertTrue(remoteClusterStateService.getStats() != null); - assertEquals(0, remoteClusterStateService.getStats().getSuccessCount()); - assertEquals(1, remoteClusterStateService.getStats().getCleanupAttemptFailedCount()); - }); - } catch (Exception e) { - throw new RuntimeException(e); - } - } - public void testFileNames() { final Index index = new Index("test-index", "index-uuid"); final Settings idxSettings = Settings.builder() @@ -1418,36 +1335,6 @@ private void verifyManifestFileNameWithCodec(int codecVersion) { assertThat(splittedName[3], is("P")); } - public void testSingleConcurrentExecutionOfStaleManifestCleanup() throws Exception { - BlobContainer blobContainer = mock(BlobContainer.class); - BlobPath blobPath = new BlobPath().add("random-path"); - when((blobStoreRepository.basePath())).thenReturn(blobPath); - when(blobStore.blobContainer(any())).thenReturn(blobContainer); - - CountDownLatch latch = new CountDownLatch(1); - AtomicInteger callCount = new AtomicInteger(0); - doAnswer(invocation -> { - callCount.incrementAndGet(); - if (latch.await(5000, TimeUnit.SECONDS) == false) { - throw new Exception("Timed out waiting for delete task queuing to complete"); - } - return null; - }).when(blobContainer) - .listBlobsByPrefixInSortedOrder( - any(String.class), - any(int.class), - any(BlobContainer.BlobNameSortOrder.class), - any(ActionListener.class) - ); - - remoteClusterStateService.start(); - remoteClusterStateService.deleteStaleClusterMetadata("cluster-name", "cluster-uuid", RETAINED_MANIFESTS); - remoteClusterStateService.deleteStaleClusterMetadata("cluster-name", "cluster-uuid", RETAINED_MANIFESTS); - - latch.countDown(); - assertBusy(() -> assertEquals(1, callCount.get())); - } - public void testIndexMetadataUploadWaitTimeSetting() { // verify default value assertEquals( @@ -1496,6 +1383,33 @@ public void testGlobalMetadataUploadWaitTimeSetting() { assertEquals(globalMetadataUploadTimeout, remoteClusterStateService.getGlobalMetadataUploadTimeout().seconds()); } + public void testRemoteRoutingTableNotInitializedWhenDisabled() { + assertFalse(remoteClusterStateService.getRemoteRoutingTableService().isPresent()); + } + + public void testRemoteRoutingTableInitializedWhenEnabled() { + Settings newSettings = Settings.builder() + .put("node.attr." + REMOTE_STORE_ROUTING_TABLE_REPOSITORY_NAME_ATTRIBUTE_KEY, "routing_repository") + .put("node.attr." + REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY, "remote_store_repository") + .put(RemoteClusterStateService.REMOTE_CLUSTER_STATE_ENABLED_SETTING.getKey(), true) + .build(); + clusterSettings.applySettings(newSettings); + + Settings nodeSettings = Settings.builder().put(REMOTE_PUBLICATION_EXPERIMENTAL, "true").build(); + FeatureFlags.initializeFeatureFlags(nodeSettings); + + remoteClusterStateService = new RemoteClusterStateService( + "test-node-id", + repositoriesServiceSupplier, + newSettings, + clusterService, + () -> 0L, + threadPool, + List.of(new RemoteIndexPathUploader(threadPool, newSettings, repositoriesServiceSupplier, clusterSettings)) + ); + assertTrue(remoteClusterStateService.getRemoteRoutingTableService().isPresent()); + } + private void mockObjectsForGettingPreviousClusterUUID(Map clusterUUIDsPointers) throws IOException { mockObjectsForGettingPreviousClusterUUID(clusterUUIDsPointers, false, Collections.emptyMap()); } @@ -1557,7 +1471,7 @@ private void mockObjectsForGettingPreviousClusterUUID( .build(); Map indexMetadataMap1 = Map.of("index-uuid1", indexMetadata1, "index-uuid2", indexMetadata2); mockBlobContainerForGlobalMetadata(blobContainer1, clusterManifest1, metadata1); - mockBlobContainer(blobContainer1, clusterManifest1, indexMetadataMap1, ClusterMetadataManifest.CODEC_V2); + mockBlobContainer(blobContainer1, clusterManifest1, indexMetadataMap1, ClusterMetadataManifest.CODEC_V3); List uploadedIndexMetadataList2 = List.of( new UploadedIndexMetadata("index1", "index-uuid1", "key1"), @@ -1589,7 +1503,7 @@ private void mockObjectsForGettingPreviousClusterUUID( .build(); Map indexMetadataMap2 = Map.of("index-uuid1", indexMetadata3, "index-uuid2", indexMetadata4); mockBlobContainerForGlobalMetadata(blobContainer2, clusterManifest2, metadata2); - mockBlobContainer(blobContainer2, clusterManifest2, indexMetadataMap2, ClusterMetadataManifest.CODEC_V2); + mockBlobContainer(blobContainer2, clusterManifest2, indexMetadataMap2, ClusterMetadataManifest.CODEC_V3); // differGlobalMetadata controls which one of IndexMetadata or Metadata object would be different // when comparing cluster-uuid3 and cluster-uuid1 state. @@ -1623,7 +1537,7 @@ private void mockObjectsForGettingPreviousClusterUUID( clusterUUIDCommitted.getOrDefault("cluster-uuid3", true) ); mockBlobContainerForGlobalMetadata(blobContainer3, clusterManifest3, metadata3); - mockBlobContainer(blobContainer3, clusterManifest3, indexMetadataMap3, ClusterMetadataManifest.CODEC_V2); + mockBlobContainer(blobContainer3, clusterManifest3, indexMetadataMap3, ClusterMetadataManifest.CODEC_V3); ArrayList mockBlobContainerOrderedList = new ArrayList<>( List.of(blobContainer1, blobContainer1, blobContainer3, blobContainer3, blobContainer2, blobContainer2) @@ -1891,7 +1805,7 @@ private static ClusterState.Builder generateClusterStateWithGlobalMetadata() { ); } - private static ClusterState.Builder generateClusterStateWithOneIndex() { + static ClusterState.Builder generateClusterStateWithOneIndex() { final Index index = new Index("test-index", "index-uuid"); final Settings idxSettings = Settings.builder() .put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT) @@ -1921,7 +1835,7 @@ private static ClusterState.Builder generateClusterStateWithOneIndex() { ); } - private static DiscoveryNodes nodesWithLocalNodeClusterManager() { + static DiscoveryNodes nodesWithLocalNodeClusterManager() { return DiscoveryNodes.builder().clusterManagerNodeId("cluster-manager-id").localNodeId("cluster-manager-id").build(); } diff --git a/server/src/test/java/org/opensearch/gateway/remote/routingtable/IndexRoutingTableHeaderTests.java b/server/src/test/java/org/opensearch/gateway/remote/routingtable/IndexRoutingTableHeaderTests.java new file mode 100644 index 0000000000000..a3f0ac36a40f1 --- /dev/null +++ b/server/src/test/java/org/opensearch/gateway/remote/routingtable/IndexRoutingTableHeaderTests.java @@ -0,0 +1,32 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.gateway.remote.routingtable; + +import org.opensearch.common.io.stream.BytesStreamOutput; +import org.opensearch.core.common.io.stream.BytesStreamInput; +import org.opensearch.test.OpenSearchTestCase; + +import java.io.IOException; + +public class IndexRoutingTableHeaderTests extends OpenSearchTestCase { + + public void testIndexRoutingTableHeader() throws IOException { + String indexName = randomAlphaOfLength(randomIntBetween(1, 50)); + IndexRoutingTableHeader header = new IndexRoutingTableHeader(indexName); + try (BytesStreamOutput out = new BytesStreamOutput()) { + header.writeTo(out); + + BytesStreamInput in = new BytesStreamInput(out.bytes().toBytesRef().bytes); + IndexRoutingTableHeader headerRead = new IndexRoutingTableHeader(in); + assertEquals(indexName, headerRead.getIndexName()); + + } + } + +} diff --git a/server/src/test/java/org/opensearch/gateway/remote/routingtable/RemoteIndexRoutingTableTests.java b/server/src/test/java/org/opensearch/gateway/remote/routingtable/RemoteIndexRoutingTableTests.java new file mode 100644 index 0000000000000..72066d8afb45b --- /dev/null +++ b/server/src/test/java/org/opensearch/gateway/remote/routingtable/RemoteIndexRoutingTableTests.java @@ -0,0 +1,87 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.gateway.remote.routingtable; + +import org.opensearch.Version; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.cluster.metadata.Metadata; +import org.opensearch.cluster.routing.IndexRoutingTable; +import org.opensearch.cluster.routing.RoutingTable; +import org.opensearch.cluster.routing.ShardRoutingState; +import org.opensearch.common.io.stream.BytesStreamOutput; +import org.opensearch.test.OpenSearchTestCase; + +import java.io.IOException; +import java.util.concurrent.atomic.AtomicInteger; + +public class RemoteIndexRoutingTableTests extends OpenSearchTestCase { + + public void testRoutingTableInput() { + String indexName = randomAlphaOfLength(randomIntBetween(1, 50)); + int numberOfShards = randomIntBetween(1, 10); + int numberOfReplicas = randomIntBetween(1, 10); + Metadata metadata = Metadata.builder() + .put( + IndexMetadata.builder(indexName) + .settings(settings(Version.CURRENT)) + .numberOfShards(numberOfShards) + .numberOfReplicas(numberOfReplicas) + ) + .build(); + + RoutingTable initialRoutingTable = RoutingTable.builder().addAsNew(metadata.index(indexName)).build(); + + initialRoutingTable.getIndicesRouting().values().forEach(indexShardRoutingTables -> { + RemoteIndexRoutingTable indexRouting = new RemoteIndexRoutingTable(indexShardRoutingTables); + try (BytesStreamOutput streamOutput = new BytesStreamOutput();) { + indexRouting.writeTo(streamOutput); + RemoteIndexRoutingTable remoteIndexRoutingTable = new RemoteIndexRoutingTable( + streamOutput.bytes().streamInput(), + metadata.index(indexName).getIndex() + ); + IndexRoutingTable indexRoutingTable = remoteIndexRoutingTable.getIndexRoutingTable(); + assertEquals(numberOfShards, indexRoutingTable.getShards().size()); + assertEquals(metadata.index(indexName).getIndex(), indexRoutingTable.getIndex()); + assertEquals( + numberOfShards * (1 + numberOfReplicas), + indexRoutingTable.shardsWithState(ShardRoutingState.UNASSIGNED).size() + ); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + } + + public void testRoutingTableInputStreamWithInvalidIndex() { + Metadata metadata = Metadata.builder() + .put(IndexMetadata.builder("test").settings(settings(Version.CURRENT)).numberOfShards(1).numberOfReplicas(1)) + .put(IndexMetadata.builder("invalid-index").settings(settings(Version.CURRENT)).numberOfShards(1).numberOfReplicas(1)) + .build(); + + RoutingTable initialRoutingTable = RoutingTable.builder().addAsNew(metadata.index("test")).build(); + AtomicInteger assertionError = new AtomicInteger(); + initialRoutingTable.getIndicesRouting().values().forEach(indexShardRoutingTables -> { + RemoteIndexRoutingTable indexRouting = new RemoteIndexRoutingTable(indexShardRoutingTables); + try (BytesStreamOutput streamOutput = new BytesStreamOutput()) { + indexRouting.writeTo(streamOutput); + RemoteIndexRoutingTable remoteIndexRoutingTable = new RemoteIndexRoutingTable( + streamOutput.bytes().streamInput(), + metadata.index("invalid-index").getIndex() + ); + } catch (AssertionError e) { + assertionError.getAndIncrement(); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + + assertEquals(1, assertionError.get()); + } + +} diff --git a/server/src/test/java/org/opensearch/index/mapper/DateFieldMapperTests.java b/server/src/test/java/org/opensearch/index/mapper/DateFieldMapperTests.java index 2aa310ae959d9..98bcaa3a1a46b 100644 --- a/server/src/test/java/org/opensearch/index/mapper/DateFieldMapperTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/DateFieldMapperTests.java @@ -208,7 +208,7 @@ public void testChangeLocale() throws IOException { fieldMapping(b -> b.field("type", "date").field("format", "E, d MMM yyyy HH:mm:ss Z").field("locale", "de")) ); - mapper.parse(source(b -> b.field("field", "Mi, 06 Dez 2000 02:55:00 -0800"))); + mapper.parse(source(b -> b.field("field", "Mi., 06 Dez. 2000 02:55:00 -0800"))); } public void testNullValue() throws IOException { diff --git a/server/src/test/java/org/opensearch/index/shard/RemoteStoreRefreshListenerTests.java b/server/src/test/java/org/opensearch/index/shard/RemoteStoreRefreshListenerTests.java index bb0776e0ced25..67787e8583930 100644 --- a/server/src/test/java/org/opensearch/index/shard/RemoteStoreRefreshListenerTests.java +++ b/server/src/test/java/org/opensearch/index/shard/RemoteStoreRefreshListenerTests.java @@ -48,6 +48,7 @@ import java.util.Map; import java.util.Objects; import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicLong; import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_REPLICATION_TYPE; @@ -470,6 +471,25 @@ public void testRefreshPersistentFailure() throws Exception { assertFalse("remote store should not in sync", tuple.v1().isRemoteSegmentStoreInSync()); } + public void testRefreshPersistentFailureAndIndexShardClosed() throws Exception { + int succeedOnAttempt = 3; + int closeShardOnAttempt = 1; + CountDownLatch refreshCountLatch = new CountDownLatch(1); + CountDownLatch successLatch = new CountDownLatch(10); + Tuple tuple = mockIndexShardWithRetryAndScheduleRefresh( + succeedOnAttempt, + refreshCountLatch, + successLatch, + true, + closeShardOnAttempt + ); + // Giving 10ms for some iterations of remote refresh upload + Thread.sleep(TimeUnit.SECONDS.toMillis(2)); + RemoteStoreRefreshListener listener = tuple.v1(); + assertFalse("remote store should not in sync", listener.isRemoteSegmentStoreInSync()); + assertFalse(listener.getRetryScheduledStatus()); + } + private void assertNoLagAndTotalUploadsFailed(RemoteSegmentTransferTracker segmentTracker, long totalUploadsFailed) throws Exception { assertBusy(() -> { assertEquals(0, segmentTracker.getBytesLag()); @@ -548,6 +568,49 @@ private Tuple mockIn return mockIndexShardWithRetryAndScheduleRefresh(succeedOnAttempt, refreshCountLatch, successLatch, 1, noOpLatch); } + private Tuple mockIndexShardWithRetryAndScheduleRefresh( + int totalAttempt, + CountDownLatch refreshCountLatch, + CountDownLatch successLatch, + int checkpointPublishSucceedOnAttempt, + CountDownLatch reachedCheckpointPublishLatch, + boolean mockPrimaryTerm, + boolean testUploadTimeout + ) throws IOException { + return mockIndexShardWithRetryAndScheduleRefresh( + totalAttempt, + refreshCountLatch, + successLatch, + checkpointPublishSucceedOnAttempt, + reachedCheckpointPublishLatch, + mockPrimaryTerm, + testUploadTimeout, + false, + 0 + ); + } + + private Tuple mockIndexShardWithRetryAndScheduleRefresh( + int succeedOnAttempt, + CountDownLatch refreshCountLatch, + CountDownLatch successLatch, + boolean closedShard, + int closeShardAfterAttempt + ) throws IOException { + CountDownLatch noOpLatch = new CountDownLatch(0); + return mockIndexShardWithRetryAndScheduleRefresh( + succeedOnAttempt, + refreshCountLatch, + successLatch, + 1, + noOpLatch, + true, + false, + closedShard, + closeShardAfterAttempt + ); + } + private Tuple mockIndexShardWithRetryAndScheduleRefresh( int succeedOnAttempt, CountDownLatch refreshCountLatch, @@ -562,7 +625,9 @@ private Tuple mockIn succeedCheckpointPublishOnAttempt, reachedCheckpointPublishLatch, true, - false + false, + false, + 0 ); } @@ -573,7 +638,9 @@ private Tuple mockIn int succeedCheckpointPublishOnAttempt, CountDownLatch reachedCheckpointPublishLatch, boolean mockPrimaryTerm, - boolean testUploadTimeout + boolean testUploadTimeout, + boolean closeShard, + int closeShardAfterAttempt ) throws IOException { // Create index shard that we will be using to mock different methods in IndexShard for the unit test indexShard = newStartedShard( @@ -601,7 +668,6 @@ private Tuple mockIn IndexShard shard = mock(IndexShard.class); Store store = mock(Store.class); when(shard.store()).thenReturn(store); - when(shard.state()).thenReturn(IndexShardState.STARTED); when(store.directory()).thenReturn(indexShard.store().directory()); // Mock (RemoteSegmentStoreDirectory) ((FilterDirectory) ((FilterDirectory) indexShard.remoteStore().directory()) @@ -663,6 +729,14 @@ private Tuple mockIn return indexShard.getLatestReplicationCheckpoint(); })).when(shard).computeReplicationCheckpoint(any()); + doAnswer((invocationOnMock -> { + if (closeShard && counter.get() == closeShardAfterAttempt) { + logger.info("Closing shard..."); + return IndexShardState.CLOSED; + } + return IndexShardState.STARTED; + })).when(shard).state(); + doAnswer(invocation -> { if (Objects.nonNull(successLatch)) { successLatch.countDown(); diff --git a/server/src/test/java/org/opensearch/indices/RemoteStoreSettingsDynamicUpdateTests.java b/server/src/test/java/org/opensearch/indices/RemoteStoreSettingsDynamicUpdateTests.java index f89fd3df6e340..cc9096ee41315 100644 --- a/server/src/test/java/org/opensearch/indices/RemoteStoreSettingsDynamicUpdateTests.java +++ b/server/src/test/java/org/opensearch/indices/RemoteStoreSettingsDynamicUpdateTests.java @@ -116,4 +116,15 @@ public void testMaxRemoteReferencedTranslogFiles() { ); assertEquals(500, remoteStoreSettings.getMaxRemoteTranslogReaders()); } + + public void testDisableMaxRemoteReferencedTranslogFiles() { + // Test default value + assertEquals(1000, remoteStoreSettings.getMaxRemoteTranslogReaders()); + + // Test override with valid value + clusterSettings.applySettings( + Settings.builder().put(RemoteStoreSettings.CLUSTER_REMOTE_MAX_TRANSLOG_READERS.getKey(), "-1").build() + ); + assertEquals(-1, remoteStoreSettings.getMaxRemoteTranslogReaders()); + } } diff --git a/server/src/test/java/org/opensearch/node/RemoteStoreNodeAttributeTests.java b/server/src/test/java/org/opensearch/node/RemoteStoreNodeAttributeTests.java index c4ba271d27ae9..de7f8977686a7 100644 --- a/server/src/test/java/org/opensearch/node/RemoteStoreNodeAttributeTests.java +++ b/server/src/test/java/org/opensearch/node/RemoteStoreNodeAttributeTests.java @@ -19,6 +19,7 @@ import java.net.InetAddress; import java.net.UnknownHostException; +import java.util.List; import java.util.Locale; import java.util.Map; @@ -28,6 +29,7 @@ import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_REPOSITORY_CRYPTO_SETTINGS_PREFIX; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_REPOSITORY_SETTINGS_ATTRIBUTE_KEY_PREFIX; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_REPOSITORY_TYPE_ATTRIBUTE_KEY_FORMAT; +import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_ROUTING_TABLE_REPOSITORY_NAME_ATTRIBUTE_KEY; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_TRANSLOG_REPOSITORY_NAME_ATTRIBUTE_KEY; @@ -148,4 +150,77 @@ public void testNoCryptoMetadata() throws UnknownHostException { RepositoryMetadata repositoryMetadata = remoteStoreNodeAttribute.getRepositoriesMetadata().repositories().get(0); assertNull(repositoryMetadata.cryptoMetadata()); } + + public void testEqualsWithRepoSkip() throws UnknownHostException { + String repoName = "remote-store-A"; + String repoTypeSettingKey = String.format(Locale.ROOT, REMOTE_STORE_REPOSITORY_TYPE_ATTRIBUTE_KEY_FORMAT, repoName); + String repoSettingsKey = String.format(Locale.ROOT, REMOTE_STORE_REPOSITORY_SETTINGS_ATTRIBUTE_KEY_PREFIX, repoName); + Map attr = Map.of( + REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY, + repoName, + REMOTE_STORE_TRANSLOG_REPOSITORY_NAME_ATTRIBUTE_KEY, + repoName, + REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY, + repoName, + repoTypeSettingKey, + "s3", + repoSettingsKey, + "abc", + repoSettingsKey + "base_path", + "xyz" + ); + DiscoveryNode node = new DiscoveryNode( + "C", + new TransportAddress(InetAddress.getByName("localhost"), 9876), + attr, + emptySet(), + Version.CURRENT + ); + + RemoteStoreNodeAttribute remoteStoreNodeAttribute = new RemoteStoreNodeAttribute(node); + + String routingTableRepoName = "remote-store-B"; + String routingTableRepoTypeSettingKey = String.format( + Locale.ROOT, + REMOTE_STORE_REPOSITORY_TYPE_ATTRIBUTE_KEY_FORMAT, + routingTableRepoName + ); + String routingTableRepoSettingsKey = String.format( + Locale.ROOT, + REMOTE_STORE_REPOSITORY_SETTINGS_ATTRIBUTE_KEY_PREFIX, + routingTableRepoName + ); + + Map attr2 = Map.of( + REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY, + repoName, + REMOTE_STORE_TRANSLOG_REPOSITORY_NAME_ATTRIBUTE_KEY, + repoName, + REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY, + repoName, + REMOTE_STORE_ROUTING_TABLE_REPOSITORY_NAME_ATTRIBUTE_KEY, + routingTableRepoName, + repoTypeSettingKey, + "s3", + repoSettingsKey, + "abc", + repoSettingsKey + "base_path", + "xyz", + routingTableRepoTypeSettingKey, + "s3", + routingTableRepoSettingsKey, + "xyz" + ); + DiscoveryNode node2 = new DiscoveryNode( + "C", + new TransportAddress(InetAddress.getByName("localhost"), 9876), + attr2, + emptySet(), + Version.CURRENT + ); + RemoteStoreNodeAttribute remoteStoreNodeAttribute2 = new RemoteStoreNodeAttribute(node2); + + assertFalse(remoteStoreNodeAttribute.equalsWithRepoSkip(remoteStoreNodeAttribute2, List.of())); + assertTrue(remoteStoreNodeAttribute.equalsWithRepoSkip(remoteStoreNodeAttribute2, List.of(routingTableRepoName))); + } } diff --git a/server/src/test/java/org/opensearch/snapshots/BlobStoreFormatTests.java b/server/src/test/java/org/opensearch/snapshots/BlobStoreFormatTests.java index c5f36fcc01983..95a8267734a07 100644 --- a/server/src/test/java/org/opensearch/snapshots/BlobStoreFormatTests.java +++ b/server/src/test/java/org/opensearch/snapshots/BlobStoreFormatTests.java @@ -49,13 +49,13 @@ import org.opensearch.common.io.stream.BytesStreamOutput; import org.opensearch.core.action.ActionListener; import org.opensearch.core.common.bytes.BytesArray; +import org.opensearch.core.common.io.stream.BufferedChecksumStreamOutput; import org.opensearch.core.common.io.stream.StreamInput; import org.opensearch.core.compress.CompressorRegistry; import org.opensearch.core.xcontent.ToXContent; import org.opensearch.core.xcontent.ToXContentFragment; import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.core.xcontent.XContentParser; -import org.opensearch.index.translog.BufferedChecksumStreamOutput; import org.opensearch.repositories.blobstore.ChecksumBlobStoreFormat; import org.opensearch.test.OpenSearchTestCase; diff --git a/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java b/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java index 5b39880930984..86de008b5dee5 100644 --- a/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java +++ b/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java @@ -2559,7 +2559,8 @@ public void start(ClusterState initialState) { ElectionStrategy.DEFAULT_INSTANCE, () -> new StatusInfo(HEALTHY, "healthy-info"), persistedStateRegistry, - remoteStoreNodeService + remoteStoreNodeService, + new ClusterManagerMetrics(NoopMetricsRegistry.INSTANCE) ); clusterManagerService.setClusterStatePublisher(coordinator); coordinator.start(); diff --git a/server/src/test/java/org/opensearch/telemetry/TestInMemoryCounter.java b/server/src/test/java/org/opensearch/telemetry/TestInMemoryCounter.java new file mode 100644 index 0000000000000..d9aee5ebfa941 --- /dev/null +++ b/server/src/test/java/org/opensearch/telemetry/TestInMemoryCounter.java @@ -0,0 +1,52 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.telemetry; + +import org.opensearch.telemetry.metrics.Counter; +import org.opensearch.telemetry.metrics.tags.Tags; + +import java.util.HashMap; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicInteger; + +/** + * This is a simple implementation of Counter which is utilized by TestInMemoryMetricsRegistry for + * Unit Tests. It initializes an atomic integer to add the values of counter which doesn't have any tags + * along with a map to store the values recorded against the tags. + * The map and atomic integer can then be used to get the added values. + */ +public class TestInMemoryCounter implements Counter { + + private AtomicInteger counterValue = new AtomicInteger(0); + private ConcurrentHashMap, Double> counterValueForTags = new ConcurrentHashMap<>(); + + public Integer getCounterValue() { + return this.counterValue.get(); + } + + public ConcurrentHashMap, Double> getCounterValueForTags() { + return this.counterValueForTags; + } + + @Override + public void add(double value) { + counterValue.addAndGet((int) value); + } + + @Override + public synchronized void add(double value, Tags tags) { + HashMap hashMap = (HashMap) tags.getTagsMap(); + if (counterValueForTags.get(hashMap) == null) { + counterValueForTags.put(hashMap, value); + } else { + value = counterValueForTags.get(hashMap) + value; + counterValueForTags.put(hashMap, value); + } + } +} diff --git a/server/src/test/java/org/opensearch/telemetry/TestInMemoryHistogram.java b/server/src/test/java/org/opensearch/telemetry/TestInMemoryHistogram.java new file mode 100644 index 0000000000000..ff28df2b6529d --- /dev/null +++ b/server/src/test/java/org/opensearch/telemetry/TestInMemoryHistogram.java @@ -0,0 +1,47 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.telemetry; + +import org.opensearch.telemetry.metrics.Histogram; +import org.opensearch.telemetry.metrics.tags.Tags; + +import java.util.HashMap; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicInteger; + +/** + * This is a simple implementation of Histogram which is utilized by TestInMemoryMetricsRegistry for + * Unit Tests. It initializes an atomic integer to record the value of histogram which doesn't have any tags + * along with a map to store the values recorded against the tags. + * The map and atomic integer can then be used to get the recorded values. + */ +public class TestInMemoryHistogram implements Histogram { + + private AtomicInteger histogramValue = new AtomicInteger(0); + private ConcurrentHashMap, Double> histogramValueForTags = new ConcurrentHashMap<>(); + + public Integer getHistogramValue() { + return this.histogramValue.get(); + } + + public ConcurrentHashMap, Double> getHistogramValueForTags() { + return this.histogramValueForTags; + } + + @Override + public void record(double value) { + histogramValue.addAndGet((int) value); + } + + @Override + public synchronized void record(double value, Tags tags) { + HashMap hashMap = (HashMap) tags.getTagsMap(); + histogramValueForTags.put(hashMap, value); + } +} diff --git a/server/src/test/java/org/opensearch/telemetry/TestInMemoryMetricsRegistry.java b/server/src/test/java/org/opensearch/telemetry/TestInMemoryMetricsRegistry.java new file mode 100644 index 0000000000000..6d395085b12ea --- /dev/null +++ b/server/src/test/java/org/opensearch/telemetry/TestInMemoryMetricsRegistry.java @@ -0,0 +1,71 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.telemetry; + +import org.opensearch.telemetry.metrics.Counter; +import org.opensearch.telemetry.metrics.Histogram; +import org.opensearch.telemetry.metrics.MetricsRegistry; +import org.opensearch.telemetry.metrics.tags.Tags; + +import java.io.Closeable; +import java.io.IOException; +import java.util.concurrent.ConcurrentHashMap; +import java.util.function.Supplier; + +/** + * This is a simple implementation of MetricsRegistry which can be utilized by Unit Tests. + * It just initializes and stores counters/histograms within a map, once created. + * The maps can then be used to get the counters/histograms by their names. + */ +public class TestInMemoryMetricsRegistry implements MetricsRegistry { + + private ConcurrentHashMap counterStore = new ConcurrentHashMap<>(); + private ConcurrentHashMap histogramStore = new ConcurrentHashMap<>(); + + public ConcurrentHashMap getCounterStore() { + return this.counterStore; + } + + public ConcurrentHashMap getHistogramStore() { + return this.histogramStore; + } + + @Override + public Counter createCounter(String name, String description, String unit) { + TestInMemoryCounter counter = new TestInMemoryCounter(); + counterStore.putIfAbsent(name, counter); + return counter; + } + + @Override + public Counter createUpDownCounter(String name, String description, String unit) { + /** + * ToDo: To be implemented when required. + */ + return null; + } + + @Override + public Histogram createHistogram(String name, String description, String unit) { + TestInMemoryHistogram histogram = new TestInMemoryHistogram(); + histogramStore.putIfAbsent(name, histogram); + return histogram; + } + + @Override + public Closeable createGauge(String name, String description, String unit, Supplier valueProvider, Tags tags) { + /** + * ToDo: To be implemented when required. + */ + return null; + } + + @Override + public void close() throws IOException {} +} diff --git a/test/framework/src/main/java/org/opensearch/cluster/coordination/AbstractCoordinatorTestCase.java b/test/framework/src/main/java/org/opensearch/cluster/coordination/AbstractCoordinatorTestCase.java index 3ae737bf63923..1c2270bab1260 100644 --- a/test/framework/src/main/java/org/opensearch/cluster/coordination/AbstractCoordinatorTestCase.java +++ b/test/framework/src/main/java/org/opensearch/cluster/coordination/AbstractCoordinatorTestCase.java @@ -1183,7 +1183,8 @@ protected Optional getDisruptableMockTransport(Transpo getElectionStrategy(), nodeHealthService, persistedStateRegistry, - remoteStoreNodeService + remoteStoreNodeService, + new ClusterManagerMetrics(NoopMetricsRegistry.INSTANCE) ); clusterManagerService.setClusterStatePublisher(coordinator); final GatewayService gatewayService = new GatewayService(