diff --git a/CHANGELOG.md b/CHANGELOG.md index 06329385ee7a6..f2c5d3e4dfe1a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -39,7 +39,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - [Workload Management] Add rejection logic for co-ordinator and shard level requests ([#15428](https://github.com/opensearch-project/OpenSearch/pull/15428))) - Adding translog durability validation in index templates ([#15494](https://github.com/opensearch-project/OpenSearch/pull/15494)) - Add index creation using the context field ([#15290](https://github.com/opensearch-project/OpenSearch/pull/15290)) -- [Reader Writer Separation] Add searchOnly replica routing configuration ([#15410](https://github.com/opensearch-project/OpenSearch/pull/15410)) +- [Reader Writer Separation] Add experimental search replica shard type to achieve reader writer separation ([#15237](https://github.com/opensearch-project/OpenSearch/pull/15237)) - [Range Queries] Add new approximateable query framework to short-circuit range queries ([#13788](https://github.com/opensearch-project/OpenSearch/pull/13788)) - [Workload Management] Add query group level failure tracking ([#15227](https://github.com/opensearch-project/OpenSearch/pull/15527)) - Add support for pluggable deciders for concurrent search ([#15363](https://github.com/opensearch-project/OpenSearch/pull/15363)) @@ -53,6 +53,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Add canRemain method to TargetPoolAllocationDecider to move shards from local to remote pool for hot to warm tiering ([#15010](https://github.com/opensearch-project/OpenSearch/pull/15010)) - ClusterManagerTaskThrottler Improvements ([#15508](https://github.com/opensearch-project/OpenSearch/pull/15508)) - Reset DiscoveryNodes in all transport node actions request ([#15131](https://github.com/opensearch-project/OpenSearch/pull/15131)) +- Relax the join validation for Remote State publication ([#15471](https://github.com/opensearch-project/OpenSearch/pull/15471)) +- MultiTermQueries in keyword fields now default to `indexed` approach and gated behind cluster setting ([#15637](https://github.com/opensearch-project/OpenSearch/pull/15637)) ### Dependencies - Bump `netty` from 4.1.111.Final to 4.1.112.Final ([#15081](https://github.com/opensearch-project/OpenSearch/pull/15081)) @@ -106,6 +108,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Fix split response processor not included in allowlist ([#15393](https://github.com/opensearch-project/OpenSearch/pull/15393)) - Fix unchecked cast in dynamic action map getter ([#15394](https://github.com/opensearch-project/OpenSearch/pull/15394)) - Fix null values indexed as "null" strings in flat_object field ([#14069](https://github.com/opensearch-project/OpenSearch/pull/14069)) +- Fix terms query on wildcard field returns nothing ([#15607](https://github.com/opensearch-project/OpenSearch/pull/15607)) ### Security diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search/270_wildcard_fieldtype_queries.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search/270_wildcard_fieldtype_queries.yml index 05b6b2e5ed712..c449a06cf633d 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search/270_wildcard_fieldtype_queries.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search/270_wildcard_fieldtype_queries.yml @@ -227,3 +227,13 @@ setup: my_field: value: ".*" - match: { hits.total.value: 5 } +--- +"terms query on wildcard field matches": + - do: + search: + index: test + body: + query: + terms: { my_field: ["AbCd"] } + - match: { hits.total.value: 1 } + - match: { hits.hits.0._id: "5" } diff --git a/server/src/internalClusterTest/java/org/opensearch/cluster/allocation/SearchReplicaFilteringAllocationIT.java b/server/src/internalClusterTest/java/org/opensearch/cluster/allocation/SearchReplicaFilteringAllocationIT.java new file mode 100644 index 0000000000000..5f65d6647f26d --- /dev/null +++ b/server/src/internalClusterTest/java/org/opensearch/cluster/allocation/SearchReplicaFilteringAllocationIT.java @@ -0,0 +1,125 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.allocation; + +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.cluster.routing.IndexShardRoutingTable; +import org.opensearch.cluster.routing.ShardRouting; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.util.FeatureFlags; +import org.opensearch.indices.replication.common.ReplicationType; +import org.opensearch.test.OpenSearchIntegTestCase; + +import java.util.List; +import java.util.stream.Collectors; + +import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_REPLICATION_TYPE; +import static org.opensearch.cluster.routing.allocation.decider.SearchReplicaAllocationDecider.SEARCH_REPLICA_ROUTING_INCLUDE_GROUP_SETTING; + +@OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, numDataNodes = 0) +public class SearchReplicaFilteringAllocationIT extends OpenSearchIntegTestCase { + + @Override + protected Settings featureFlagSettings() { + return Settings.builder().put(super.featureFlagSettings()).put(FeatureFlags.READER_WRITER_SPLIT_EXPERIMENTAL, Boolean.TRUE).build(); + } + + public void testSearchReplicaDedicatedIncludes() { + List nodesIds = internalCluster().startNodes(3); + final String node_0 = nodesIds.get(0); + final String node_1 = nodesIds.get(1); + final String node_2 = nodesIds.get(2); + assertEquals(3, cluster().size()); + + client().admin() + .cluster() + .prepareUpdateSettings() + .setTransientSettings( + Settings.builder().put(SEARCH_REPLICA_ROUTING_INCLUDE_GROUP_SETTING.getKey() + "_name", node_1 + "," + node_0) + ) + .execute() + .actionGet(); + + createIndex( + "test", + Settings.builder() + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0) + .put(IndexMetadata.SETTING_NUMBER_OF_SEARCH_REPLICAS, 1) + .put(SETTING_REPLICATION_TYPE, ReplicationType.SEGMENT) + .build() + ); + ensureGreen("test"); + // ensure primary is not on node 0 or 1, + IndexShardRoutingTable routingTable = getRoutingTable(); + assertEquals(node_2, getNodeName(routingTable.primaryShard().currentNodeId())); + + String existingSearchReplicaNode = getNodeName(routingTable.searchOnlyReplicas().get(0).currentNodeId()); + String emptyAllowedNode = existingSearchReplicaNode.equals(node_0) ? node_1 : node_0; + + // set the included nodes to the other open node, search replica should relocate to that node. + client().admin() + .cluster() + .prepareUpdateSettings() + .setTransientSettings(Settings.builder().put(SEARCH_REPLICA_ROUTING_INCLUDE_GROUP_SETTING.getKey() + "_name", emptyAllowedNode)) + .execute() + .actionGet(); + ensureGreen("test"); + + routingTable = getRoutingTable(); + assertEquals(node_2, getNodeName(routingTable.primaryShard().currentNodeId())); + assertEquals(emptyAllowedNode, getNodeName(routingTable.searchOnlyReplicas().get(0).currentNodeId())); + } + + public void testSearchReplicaDedicatedIncludes_DoNotAssignToOtherNodes() { + List nodesIds = internalCluster().startNodes(3); + final String node_0 = nodesIds.get(0); + final String node_1 = nodesIds.get(1); + final String node_2 = nodesIds.get(2); + assertEquals(3, cluster().size()); + + // set filter on 1 node and set search replica count to 2 - should leave 1 unassigned + client().admin() + .cluster() + .prepareUpdateSettings() + .setTransientSettings(Settings.builder().put(SEARCH_REPLICA_ROUTING_INCLUDE_GROUP_SETTING.getKey() + "_name", node_1)) + .execute() + .actionGet(); + + logger.info("--> creating an index with no replicas"); + createIndex( + "test", + Settings.builder() + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0) + .put(IndexMetadata.SETTING_NUMBER_OF_SEARCH_REPLICAS, 2) + .put(SETTING_REPLICATION_TYPE, ReplicationType.SEGMENT) + .build() + ); + ensureYellowAndNoInitializingShards("test"); + IndexShardRoutingTable routingTable = getRoutingTable(); + assertEquals(2, routingTable.searchOnlyReplicas().size()); + List assignedSearchShards = routingTable.searchOnlyReplicas() + .stream() + .filter(ShardRouting::assignedToNode) + .collect(Collectors.toList()); + assertEquals(1, assignedSearchShards.size()); + assertEquals(node_1, getNodeName(assignedSearchShards.get(0).currentNodeId())); + assertEquals(1, routingTable.searchOnlyReplicas().stream().filter(ShardRouting::unassigned).count()); + } + + private IndexShardRoutingTable getRoutingTable() { + IndexShardRoutingTable routingTable = getClusterState().routingTable().index("test").getShards().get(0); + return routingTable; + } + + private String getNodeName(String id) { + return getClusterState().nodes().get(id).getName(); + } +} diff --git a/server/src/internalClusterTest/java/org/opensearch/indices/replication/SearchReplicaReplicationIT.java b/server/src/internalClusterTest/java/org/opensearch/indices/replication/SearchReplicaReplicationIT.java new file mode 100644 index 0000000000000..a1b512c326ac5 --- /dev/null +++ b/server/src/internalClusterTest/java/org/opensearch/indices/replication/SearchReplicaReplicationIT.java @@ -0,0 +1,85 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.indices.replication; + +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.util.FeatureFlags; +import org.opensearch.test.OpenSearchIntegTestCase; +import org.junit.After; +import org.junit.Before; + +import java.nio.file.Path; + +@OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, numDataNodes = 0) +public class SearchReplicaReplicationIT extends SegmentReplicationBaseIT { + + private static final String REPOSITORY_NAME = "test-remote-store-repo"; + protected Path absolutePath; + + private Boolean useRemoteStore; + + @Before + public void randomizeRemoteStoreEnabled() { + useRemoteStore = randomBoolean(); + } + + @Override + protected Settings nodeSettings(int nodeOrdinal) { + if (useRemoteStore) { + if (absolutePath == null) { + absolutePath = randomRepoPath().toAbsolutePath(); + } + return Settings.builder() + .put(super.nodeSettings(nodeOrdinal)) + .put(remoteStoreClusterSettings(REPOSITORY_NAME, absolutePath)) + .build(); + } + return super.nodeSettings(nodeOrdinal); + } + + @After + public void teardown() { + if (useRemoteStore) { + clusterAdmin().prepareCleanupRepository(REPOSITORY_NAME).get(); + } + } + + @Override + public Settings indexSettings() { + return Settings.builder() + .put(super.indexSettings()) + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0) + .put(IndexMetadata.SETTING_NUMBER_OF_SEARCH_REPLICAS, 1) + .build(); + } + + @Override + protected Settings featureFlagSettings() { + return Settings.builder().put(super.featureFlagSettings()).put(FeatureFlags.READER_WRITER_SPLIT_EXPERIMENTAL, true).build(); + } + + public void testReplication() throws Exception { + internalCluster().startClusterManagerOnlyNode(); + final String primary = internalCluster().startDataOnlyNode(); + createIndex(INDEX_NAME); + ensureYellowAndNoInitializingShards(INDEX_NAME); + final String replica = internalCluster().startDataOnlyNode(); + ensureGreen(INDEX_NAME); + + final int docCount = 10; + for (int i = 0; i < docCount; i++) { + client().prepareIndex(INDEX_NAME).setId(Integer.toString(i)).setSource("field", "value" + i).execute().get(); + } + refresh(INDEX_NAME); + waitForSearchableDocs(docCount, primary, replica); + } + +} diff --git a/server/src/internalClusterTest/java/org/opensearch/indices/settings/SearchOnlyReplicaFeatureFlagIT.java b/server/src/internalClusterTest/java/org/opensearch/indices/settings/SearchOnlyReplicaFeatureFlagIT.java index e5a05c04fa7ee..ef18cff7e5b29 100644 --- a/server/src/internalClusterTest/java/org/opensearch/indices/settings/SearchOnlyReplicaFeatureFlagIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/indices/settings/SearchOnlyReplicaFeatureFlagIT.java @@ -17,6 +17,7 @@ import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_SEARCH_REPLICAS; import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_REPLICATION_TYPE; +import static org.opensearch.cluster.routing.allocation.decider.SearchReplicaAllocationDecider.SEARCH_REPLICA_ROUTING_INCLUDE_GROUP_SETTING; @OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.SUITE, numDataNodes = 1) public class SearchOnlyReplicaFeatureFlagIT extends OpenSearchIntegTestCase { @@ -53,4 +54,15 @@ public void testUpdateFeatureFlagDisabled() { }); assertTrue(settingsException.getMessage().contains("unknown setting")); } + + public void testFilterAllocationSettingNotRegistered() { + expectThrows(SettingsException.class, () -> { + client().admin() + .cluster() + .prepareUpdateSettings() + .setTransientSettings(Settings.builder().put(SEARCH_REPLICA_ROUTING_INCLUDE_GROUP_SETTING.getKey() + "_name", "node")) + .execute() + .actionGet(); + }); + } } diff --git a/server/src/internalClusterTest/java/org/opensearch/indices/settings/SearchOnlyReplicaIT.java b/server/src/internalClusterTest/java/org/opensearch/indices/settings/SearchOnlyReplicaIT.java index 5fc8e30ed2c7a..6bd91df1de66f 100644 --- a/server/src/internalClusterTest/java/org/opensearch/indices/settings/SearchOnlyReplicaIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/indices/settings/SearchOnlyReplicaIT.java @@ -8,14 +8,17 @@ package org.opensearch.indices.settings; +import org.opensearch.action.search.SearchResponse; import org.opensearch.action.support.WriteRequest; import org.opensearch.cluster.ClusterState; import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.cluster.metadata.Metadata; import org.opensearch.cluster.routing.IndexShardRoutingTable; +import org.opensearch.cluster.routing.Preference; import org.opensearch.cluster.routing.ShardRouting; import org.opensearch.common.settings.Settings; import org.opensearch.common.util.FeatureFlags; +import org.opensearch.index.query.QueryBuilders; import org.opensearch.indices.replication.common.ReplicationType; import org.opensearch.test.InternalTestCluster; import org.opensearch.test.OpenSearchIntegTestCase; @@ -110,7 +113,6 @@ public void testFailoverWithSearchReplica_WithWriterReplicas() throws IOExceptio // add back a node internalCluster().startDataOnlyNode(); ensureGreen(TEST_INDEX); - } public void testFailoverWithSearchReplica_WithoutWriterReplicas() throws IOException { @@ -175,6 +177,39 @@ public void testSearchReplicaScaling() { assertActiveSearchShards(0); } + public void testSearchReplicaRoutingPreference() throws IOException { + int numSearchReplicas = 1; + int numWriterReplicas = 1; + internalCluster().startClusterManagerOnlyNode(); + String primaryNodeName = internalCluster().startDataOnlyNode(); + createIndex( + TEST_INDEX, + Settings.builder() + .put(indexSettings()) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, numWriterReplicas) + .put(IndexMetadata.SETTING_NUMBER_OF_SEARCH_REPLICAS, numSearchReplicas) + .build() + ); + ensureYellow(TEST_INDEX); + client().prepareIndex(TEST_INDEX).setId("1").setSource("foo", "bar").setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE).get(); + // add 2 nodes for the replicas + internalCluster().startDataOnlyNodes(2); + ensureGreen(TEST_INDEX); + + assertActiveShardCounts(numSearchReplicas, numWriterReplicas); + + // set preference to search replica here - we default to this when there are + // search replicas but tests will randomize this value if unset + SearchResponse response = client().prepareSearch(TEST_INDEX) + .setPreference(Preference.SEARCH_REPLICA.type()) + .setQuery(QueryBuilders.matchAllQuery()) + .get(); + + String nodeId = response.getHits().getAt(0).getShard().getNodeId(); + IndexShardRoutingTable indexShardRoutingTable = getIndexShardRoutingTable(); + assertEquals(nodeId, indexShardRoutingTable.searchOnlyReplicas().get(0).currentNodeId()); + } + /** * Helper to assert counts of active shards for each type. */ diff --git a/server/src/internalClusterTest/java/org/opensearch/snapshots/DeleteSnapshotITV2.java b/server/src/internalClusterTest/java/org/opensearch/snapshots/DeleteSnapshotV2IT.java similarity index 96% rename from server/src/internalClusterTest/java/org/opensearch/snapshots/DeleteSnapshotITV2.java rename to server/src/internalClusterTest/java/org/opensearch/snapshots/DeleteSnapshotV2IT.java index 02b6ea47172c7..1d7a58384c0be 100644 --- a/server/src/internalClusterTest/java/org/opensearch/snapshots/DeleteSnapshotITV2.java +++ b/server/src/internalClusterTest/java/org/opensearch/snapshots/DeleteSnapshotV2IT.java @@ -32,7 +32,7 @@ import static org.hamcrest.Matchers.lessThan; @OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, numDataNodes = 0) -public class DeleteSnapshotITV2 extends AbstractSnapshotIntegTestCase { +public class DeleteSnapshotV2IT extends AbstractSnapshotIntegTestCase { private static final String REMOTE_REPO_NAME = "remote-store-repo-name"; @@ -206,6 +206,7 @@ public void testDeleteShallowCopyV2MultipleSnapshots() throws Exception { } + @AwaitsFix(bugUrl = "https://github.com/opensearch-project/OpenSearch/issues/15692") public void testRemoteStoreCleanupForDeletedIndexForSnapshotV2() throws Exception { disableRepoConsistencyCheck("Remote store repository is being used in the test"); final Path remoteStoreRepoPath = randomRepoPath(); @@ -276,9 +277,11 @@ public void testRemoteStoreCleanupForDeletedIndexForSnapshotV2() throws Exceptio Path indexPath = Path.of(String.valueOf(remoteStoreRepoPath), indexUUID); Path shardPath = Path.of(String.valueOf(indexPath), "0"); Path segmentsPath = Path.of(String.valueOf(shardPath), "segments"); + Path translogPath = Path.of(String.valueOf(shardPath), "translog"); // Get total segments remote store directory file count for deleted index and shard 0 int segmentFilesCountBeforeDeletingSnapshot1 = RemoteStoreBaseIntegTestCase.getFileCount(segmentsPath); + int translogFilesCountBeforeDeletingSnapshot1 = RemoteStoreBaseIntegTestCase.getFileCount(translogPath); RemoteStoreSettings.setPinnedTimestampsLookbackInterval(TimeValue.ZERO); @@ -312,6 +315,13 @@ public void testRemoteStoreCleanupForDeletedIndexForSnapshotV2() throws Exceptio assertThat(RemoteStoreBaseIntegTestCase.getFileCount(segmentsPath), lessThan(segmentFilesCountAfterDeletingSnapshot1)); } catch (Exception e) {} }, 60, TimeUnit.SECONDS); + + assertBusy(() -> { + try { + assertThat(RemoteStoreBaseIntegTestCase.getFileCount(translogPath), lessThan(translogFilesCountBeforeDeletingSnapshot1)); + } catch (Exception e) {} + }, 60, TimeUnit.SECONDS); + } private Settings snapshotV2Settings(Path remoteStoreRepoPath) { diff --git a/server/src/internalClusterTest/java/org/opensearch/snapshots/SnapshotStatusApisIT.java b/server/src/internalClusterTest/java/org/opensearch/snapshots/SnapshotStatusApisIT.java index 5a043e69e9735..c3214022df663 100644 --- a/server/src/internalClusterTest/java/org/opensearch/snapshots/SnapshotStatusApisIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/snapshots/SnapshotStatusApisIT.java @@ -51,6 +51,7 @@ import org.opensearch.common.unit.TimeValue; import org.opensearch.common.util.io.IOUtils; import org.opensearch.core.common.Strings; +import org.opensearch.core.common.breaker.CircuitBreakingException; import org.opensearch.core.common.unit.ByteSizeUnit; import org.opensearch.core.rest.RestStatus; import org.opensearch.index.IndexNotFoundException; @@ -602,11 +603,11 @@ public void testSnapshotStatusApiFailureForTooManyShardsAcrossSnapshots() throws // across a single snapshot assertBusy(() -> { - TooManyShardsInSnapshotsStatusException exception = expectThrows( - TooManyShardsInSnapshotsStatusException.class, + CircuitBreakingException exception = expectThrows( + CircuitBreakingException.class, () -> client().admin().cluster().prepareSnapshotStatus(repositoryName).setSnapshots(snapshot1).execute().actionGet() ); - assertEquals(exception.status(), RestStatus.REQUEST_ENTITY_TOO_LARGE); + assertEquals(exception.status(), RestStatus.TOO_MANY_REQUESTS); assertTrue( exception.getMessage().endsWith(" is more than the maximum allowed value of shard count [2] for snapshot status request") ); @@ -614,8 +615,8 @@ public void testSnapshotStatusApiFailureForTooManyShardsAcrossSnapshots() throws // across multiple snapshots assertBusy(() -> { - TooManyShardsInSnapshotsStatusException exception = expectThrows( - TooManyShardsInSnapshotsStatusException.class, + CircuitBreakingException exception = expectThrows( + CircuitBreakingException.class, () -> client().admin() .cluster() .prepareSnapshotStatus(repositoryName) @@ -623,7 +624,7 @@ public void testSnapshotStatusApiFailureForTooManyShardsAcrossSnapshots() throws .execute() .actionGet() ); - assertEquals(exception.status(), RestStatus.REQUEST_ENTITY_TOO_LARGE); + assertEquals(exception.status(), RestStatus.TOO_MANY_REQUESTS); assertTrue( exception.getMessage().endsWith(" is more than the maximum allowed value of shard count [2] for snapshot status request") ); @@ -741,8 +742,8 @@ public void testSnapshotStatusFailuresWithIndexFilter() throws Exception { updateSettingsRequest.persistentSettings(Settings.builder().put(MAX_SHARDS_ALLOWED_IN_STATUS_API.getKey(), 2)); assertAcked(client().admin().cluster().updateSettings(updateSettingsRequest).actionGet()); - TooManyShardsInSnapshotsStatusException ex = expectThrows( - TooManyShardsInSnapshotsStatusException.class, + CircuitBreakingException ex = expectThrows( + CircuitBreakingException.class, () -> client().admin() .cluster() .prepareSnapshotStatus(repositoryName) @@ -751,7 +752,7 @@ public void testSnapshotStatusFailuresWithIndexFilter() throws Exception { .execute() .actionGet() ); - assertEquals(ex.status(), RestStatus.REQUEST_ENTITY_TOO_LARGE); + assertEquals(ex.status(), RestStatus.TOO_MANY_REQUESTS); assertTrue(ex.getMessage().endsWith(" is more than the maximum allowed value of shard count [2] for snapshot status request")); logger.info("Reset MAX_SHARDS_ALLOWED_IN_STATUS_API to default value"); diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/snapshots/status/TransportSnapshotsStatusAction.java b/server/src/main/java/org/opensearch/action/admin/cluster/snapshots/status/TransportSnapshotsStatusAction.java index f2a9b88f790c9..8228cb6301c8c 100644 --- a/server/src/main/java/org/opensearch/action/admin/cluster/snapshots/status/TransportSnapshotsStatusAction.java +++ b/server/src/main/java/org/opensearch/action/admin/cluster/snapshots/status/TransportSnapshotsStatusAction.java @@ -49,6 +49,8 @@ import org.opensearch.common.util.set.Sets; import org.opensearch.core.action.ActionListener; import org.opensearch.core.common.Strings; +import org.opensearch.core.common.breaker.CircuitBreaker; +import org.opensearch.core.common.breaker.CircuitBreakingException; import org.opensearch.core.common.io.stream.StreamInput; import org.opensearch.core.common.util.CollectionUtils; import org.opensearch.core.index.shard.ShardId; @@ -66,7 +68,6 @@ import org.opensearch.snapshots.SnapshotShardsService; import org.opensearch.snapshots.SnapshotState; import org.opensearch.snapshots.SnapshotsService; -import org.opensearch.snapshots.TooManyShardsInSnapshotsStatusException; import org.opensearch.threadpool.ThreadPool; import org.opensearch.transport.TransportService; @@ -458,13 +459,17 @@ private Map snapshotsInfo( snapshotsInfoMap.put(snapshotId, snapshotInfo); } if (totalShardsAcrossSnapshots > maximumAllowedShardCount && request.indices().length == 0) { - String message = "Total shard count [" + String message = "[" + + repositoryName + + ":" + + String.join(", ", request.snapshots()) + + "]" + + " Total shard count [" + totalShardsAcrossSnapshots + "] is more than the maximum allowed value of shard count [" + maximumAllowedShardCount + "] for snapshot status request"; - - throw new TooManyShardsInSnapshotsStatusException(repositoryName, message, request.snapshots()); + throw new CircuitBreakingException(message, CircuitBreaker.Durability.PERMANENT); } return unmodifiableMap(snapshotsInfoMap); } @@ -520,15 +525,19 @@ private Map snapshotShards( } if (totalShardsAcrossIndices > maximumAllowedShardCount && requestedIndexNames.isEmpty() == false && isV2Snapshot == false) { - String message = "Total shard count [" + String message = "[" + + repositoryName + + ":" + + String.join(", ", request.snapshots()) + + "]" + + " Total shard count [" + totalShardsAcrossIndices + "] across the requested indices [" + requestedIndexNames.stream().collect(Collectors.joining(", ")) + "] is more than the maximum allowed value of shard count [" + maximumAllowedShardCount + "] for snapshot status request"; - - throw new TooManyShardsInSnapshotsStatusException(repositoryName, message, snapshotName); + throw new CircuitBreakingException(message, CircuitBreaker.Durability.PERMANENT); } final Map shardStatus = new HashMap<>(); diff --git a/server/src/main/java/org/opensearch/action/admin/indices/segments/IndicesSegmentResponse.java b/server/src/main/java/org/opensearch/action/admin/indices/segments/IndicesSegmentResponse.java index 648f58dada4f9..b69d4b6f624b8 100644 --- a/server/src/main/java/org/opensearch/action/admin/indices/segments/IndicesSegmentResponse.java +++ b/server/src/main/java/org/opensearch/action/admin/indices/segments/IndicesSegmentResponse.java @@ -45,13 +45,12 @@ import org.opensearch.index.engine.Segment; import java.io.IOException; -import java.util.ArrayList; +import java.util.Arrays; +import java.util.Comparator; import java.util.HashMap; -import java.util.HashSet; import java.util.List; import java.util.Locale; import java.util.Map; -import java.util.Set; /** * Transport response for retrieving indices segment information @@ -86,21 +85,24 @@ public Map getIndices() { return indicesSegments; } Map indicesSegments = new HashMap<>(); - - Set indices = new HashSet<>(); - for (ShardSegments shard : shards) { - indices.add(shard.getShardRouting().getIndexName()); + if (shards.length == 0) { + this.indicesSegments = indicesSegments; + return indicesSegments; } - for (String indexName : indices) { - List shards = new ArrayList<>(); - for (ShardSegments shard : this.shards) { - if (shard.getShardRouting().getIndexName().equals(indexName)) { - shards.add(shard); - } + Arrays.sort(shards, Comparator.comparing(shardSegment -> shardSegment.getShardRouting().getIndexName())); + int startIndexPos = 0; + String startIndexName = shards[startIndexPos].getShardRouting().getIndexName(); + for (int i = 0; i < shards.length; i++) { + if (!shards[i].getShardRouting().getIndexName().equals(startIndexName)) { + indicesSegments.put(startIndexName, new IndexSegments(startIndexName, Arrays.copyOfRange(shards, startIndexPos, i))); + startIndexPos = i; + startIndexName = shards[startIndexPos].getShardRouting().getIndexName(); } - indicesSegments.put(indexName, new IndexSegments(indexName, shards.toArray(new ShardSegments[0]))); } + // Add the last shardSegment from shards list which would have got missed in the loop above + indicesSegments.put(startIndexName, new IndexSegments(startIndexName, Arrays.copyOfRange(shards, startIndexPos, shards.length))); + this.indicesSegments = indicesSegments; return indicesSegments; } diff --git a/server/src/main/java/org/opensearch/action/admin/indices/stats/CommonStatsFlags.java b/server/src/main/java/org/opensearch/action/admin/indices/stats/CommonStatsFlags.java index 04f39d77ce6c8..03fb55323feec 100644 --- a/server/src/main/java/org/opensearch/action/admin/indices/stats/CommonStatsFlags.java +++ b/server/src/main/java/org/opensearch/action/admin/indices/stats/CommonStatsFlags.java @@ -101,7 +101,7 @@ public CommonStatsFlags(StreamInput in) throws IOException { includeCaches = in.readEnumSet(CacheType.class); levels = in.readStringArray(); } - if (in.getVersion().onOrAfter(Version.V_3_0_0)) { + if (in.getVersion().onOrAfter(Version.V_2_17_0)) { includeIndicesStatsByLevel = in.readBoolean(); } } @@ -128,7 +128,7 @@ public void writeTo(StreamOutput out) throws IOException { out.writeEnumSet(includeCaches); out.writeStringArrayNullable(levels); } - if (out.getVersion().onOrAfter(Version.V_3_0_0)) { + if (out.getVersion().onOrAfter(Version.V_2_17_0)) { out.writeBoolean(includeIndicesStatsByLevel); } } diff --git a/server/src/main/java/org/opensearch/cluster/ClusterModule.java b/server/src/main/java/org/opensearch/cluster/ClusterModule.java index bb51c42252448..d9bb87a517927 100644 --- a/server/src/main/java/org/opensearch/cluster/ClusterModule.java +++ b/server/src/main/java/org/opensearch/cluster/ClusterModule.java @@ -75,6 +75,7 @@ import org.opensearch.cluster.routing.allocation.decider.ResizeAllocationDecider; import org.opensearch.cluster.routing.allocation.decider.RestoreInProgressAllocationDecider; import org.opensearch.cluster.routing.allocation.decider.SameShardAllocationDecider; +import org.opensearch.cluster.routing.allocation.decider.SearchReplicaAllocationDecider; import org.opensearch.cluster.routing.allocation.decider.ShardsLimitAllocationDecider; import org.opensearch.cluster.routing.allocation.decider.SnapshotInProgressAllocationDecider; import org.opensearch.cluster.routing.allocation.decider.TargetPoolAllocationDecider; @@ -85,6 +86,7 @@ import org.opensearch.common.settings.Setting; import org.opensearch.common.settings.Setting.Property; import org.opensearch.common.settings.Settings; +import org.opensearch.common.util.FeatureFlags; import org.opensearch.common.util.concurrent.ThreadContext; import org.opensearch.common.util.set.Sets; import org.opensearch.core.ParseField; @@ -379,6 +381,9 @@ public static Collection createAllocationDeciders( addAllocationDecider(deciders, new SnapshotInProgressAllocationDecider()); addAllocationDecider(deciders, new RestoreInProgressAllocationDecider()); addAllocationDecider(deciders, new FilterAllocationDecider(settings, clusterSettings)); + if (FeatureFlags.READER_WRITER_SPLIT_EXPERIMENTAL_SETTING.get(settings)) { + addAllocationDecider(deciders, new SearchReplicaAllocationDecider(settings, clusterSettings)); + } addAllocationDecider(deciders, new SameShardAllocationDecider(settings, clusterSettings)); addAllocationDecider(deciders, new DiskThresholdDecider(settings, clusterSettings)); addAllocationDecider(deciders, new ThrottlingAllocationDecider(settings, clusterSettings)); diff --git a/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java b/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java index f77a7ffc8ce8e..838b5723b217b 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java @@ -67,6 +67,7 @@ import static org.opensearch.cluster.decommission.DecommissionHelper.nodeCommissioned; import static org.opensearch.gateway.GatewayService.STATE_NOT_RECOVERED_BLOCK; +import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_CLUSTER_PUBLICATION_REPO_NAME_ATTRIBUTES; import static org.opensearch.node.remotestore.RemoteStoreNodeService.CompatibilityMode; import static org.opensearch.node.remotestore.RemoteStoreNodeService.CompatibilityMode.MIXED; import static org.opensearch.node.remotestore.RemoteStoreNodeService.CompatibilityMode.STRICT; @@ -458,7 +459,7 @@ public static void ensureNodesCompatibility( ); } - ensureRemoteStoreNodesCompatibility(joiningNode, currentNodes, metadata); + ensureRemoteRepositoryCompatibility(joiningNode, currentNodes, metadata); } /** @@ -491,6 +492,30 @@ public static void ensureNodeCommissioned(DiscoveryNode node, Metadata metadata) } } + public static void ensureRemoteRepositoryCompatibility(DiscoveryNode joiningNode, DiscoveryNodes currentNodes, Metadata metadata) { + List existingNodes = new ArrayList<>(currentNodes.getNodes().values()); + + boolean isClusterRemoteStoreEnabled = existingNodes.stream().anyMatch(DiscoveryNode::isRemoteStoreNode); + if (isClusterRemoteStoreEnabled || joiningNode.isRemoteStoreNode()) { + ensureRemoteStoreNodesCompatibility(joiningNode, currentNodes, metadata); + } else { + ensureRemoteClusterStateNodesCompatibility(joiningNode, currentNodes); + } + } + + private static void ensureRemoteClusterStateNodesCompatibility(DiscoveryNode joiningNode, DiscoveryNodes currentNodes) { + List existingNodes = new ArrayList<>(currentNodes.getNodes().values()); + + assert existingNodes.isEmpty() == false; + Optional remotePublicationNode = existingNodes.stream() + .filter(DiscoveryNode::isRemoteStatePublicationEnabled) + .findFirst(); + + if (remotePublicationNode.isPresent() && joiningNode.isRemoteStatePublicationEnabled()) { + ensureRepositoryCompatibility(joiningNode, remotePublicationNode.get(), REMOTE_CLUSTER_PUBLICATION_REPO_NAME_ATTRIBUTES); + } + } + /** * The method ensures homogeneity - * 1. The joining node has to be a remote store backed if it's joining a remote store backed cluster. Validates @@ -506,6 +531,7 @@ public static void ensureNodeCommissioned(DiscoveryNode node, Metadata metadata) * needs to be modified. */ private static void ensureRemoteStoreNodesCompatibility(DiscoveryNode joiningNode, DiscoveryNodes currentNodes, Metadata metadata) { + List existingNodes = new ArrayList<>(currentNodes.getNodes().values()); assert existingNodes.isEmpty() == false; @@ -587,6 +613,23 @@ private static void ensureRemoteStoreNodesCompatibility( } } + private static void ensureRepositoryCompatibility(DiscoveryNode joiningNode, DiscoveryNode existingNode, List reposToValidate) { + + RemoteStoreNodeAttribute joiningRemoteStoreNodeAttribute = new RemoteStoreNodeAttribute(joiningNode); + RemoteStoreNodeAttribute existingRemoteStoreNodeAttribute = new RemoteStoreNodeAttribute(existingNode); + + if (existingRemoteStoreNodeAttribute.equalsForRepositories(joiningRemoteStoreNodeAttribute, reposToValidate) == false) { + throw new IllegalStateException( + "a remote store node [" + + joiningNode + + "] is trying to join a remote store cluster with incompatible node attributes in " + + "comparison with existing node [" + + existingNode + + "]" + ); + } + } + public static Collection> addBuiltInJoinValidators( Collection> onJoinValidators ) { diff --git a/server/src/main/java/org/opensearch/cluster/metadata/RepositoriesMetadata.java b/server/src/main/java/org/opensearch/cluster/metadata/RepositoriesMetadata.java index 4b3dc7964a87b..59452e33191d7 100644 --- a/server/src/main/java/org/opensearch/cluster/metadata/RepositoriesMetadata.java +++ b/server/src/main/java/org/opensearch/cluster/metadata/RepositoriesMetadata.java @@ -184,6 +184,24 @@ public boolean equalsIgnoreGenerationsWithRepoSkip(@Nullable RepositoriesMetadat .filter(repo -> !reposToSkip.contains(repo.name())) .collect(Collectors.toList()); + return equalsRepository(currentRepositories, otherRepositories); + } + + public boolean equalsIgnoreGenerationsForRepo(@Nullable RepositoriesMetadata other, List reposToValidate) { + if (other == null) { + return false; + } + List currentRepositories = repositories.stream() + .filter(repo -> reposToValidate.contains(repo.name())) + .collect(Collectors.toList()); + List otherRepositories = other.repositories.stream() + .filter(repo -> reposToValidate.contains(repo.name())) + .collect(Collectors.toList()); + + return equalsRepository(currentRepositories, otherRepositories); + } + + public static boolean equalsRepository(List currentRepositories, List otherRepositories) { if (otherRepositories.size() != currentRepositories.size()) { return false; } diff --git a/server/src/main/java/org/opensearch/cluster/node/DiscoveryNode.java b/server/src/main/java/org/opensearch/cluster/node/DiscoveryNode.java index 3ed7633dce189..a6f0a457f7f9b 100644 --- a/server/src/main/java/org/opensearch/cluster/node/DiscoveryNode.java +++ b/server/src/main/java/org/opensearch/cluster/node/DiscoveryNode.java @@ -65,6 +65,7 @@ import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_NODE_ATTRIBUTE_KEY_PREFIX; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_ROUTING_TABLE_REPOSITORY_NAME_ATTRIBUTE_KEY; +import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY; /** * A discovery node represents a node that is part of the cluster. @@ -509,7 +510,8 @@ public boolean isSearchNode() { * @return true if the node contains remote store node attributes, false otherwise */ public boolean isRemoteStoreNode() { - return this.getAttributes().keySet().stream().anyMatch(key -> key.startsWith(REMOTE_STORE_NODE_ATTRIBUTE_KEY_PREFIX)); + return this.getAttributes().keySet().stream().anyMatch(key -> key.equals(REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY)) + && this.getAttributes().keySet().stream().anyMatch(key -> key.equals(REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY)); } /** diff --git a/server/src/main/java/org/opensearch/cluster/routing/IndexShardRoutingTable.java b/server/src/main/java/org/opensearch/cluster/routing/IndexShardRoutingTable.java index 4cc3300676986..f25cb14f65eca 100644 --- a/server/src/main/java/org/opensearch/cluster/routing/IndexShardRoutingTable.java +++ b/server/src/main/java/org/opensearch/cluster/routing/IndexShardRoutingTable.java @@ -647,15 +647,11 @@ public ShardIterator replicaActiveInitializingShardIt() { return new PlainShardIterator(shardId, Collections.emptyList()); } - LinkedList ordered = new LinkedList<>(); - for (ShardRouting replica : shuffler.shuffle(replicas)) { - if (replica.active()) { - ordered.addFirst(replica); - } else if (replica.initializing()) { - ordered.addLast(replica); - } - } - return new PlainShardIterator(shardId, ordered); + return filterAndOrderShards(replica -> true); + } + + public ShardIterator searchReplicaActiveInitializingShardIt() { + return filterAndOrderShards(ShardRouting::isSearchOnly); } /** @@ -686,6 +682,20 @@ public ShardIterator replicaFirstActiveInitializingShardsIt() { return new PlainShardIterator(shardId, ordered); } + private ShardIterator filterAndOrderShards(Predicate filter) { + LinkedList ordered = new LinkedList<>(); + for (ShardRouting replica : shuffler.shuffle(replicas)) { + if (filter.test(replica)) { + if (replica.active()) { + ordered.addFirst(replica); + } else if (replica.initializing()) { + ordered.addLast(replica); + } + } + } + return new PlainShardIterator(shardId, ordered); + } + /** * Returns an iterator on active and initializing shards residing on the provided nodeId. */ diff --git a/server/src/main/java/org/opensearch/cluster/routing/OperationRouting.java b/server/src/main/java/org/opensearch/cluster/routing/OperationRouting.java index 6242247f34a93..fe9e00b250e70 100644 --- a/server/src/main/java/org/opensearch/cluster/routing/OperationRouting.java +++ b/server/src/main/java/org/opensearch/cluster/routing/OperationRouting.java @@ -121,6 +121,7 @@ public class OperationRouting { private volatile boolean isFailOpenEnabled; private volatile boolean isStrictWeightedShardRouting; private volatile boolean ignoreWeightedRouting; + private final boolean isReaderWriterSplitEnabled; public OperationRouting(Settings settings, ClusterSettings clusterSettings) { // whether to ignore awareness attributes when routing requests @@ -141,6 +142,7 @@ public OperationRouting(Settings settings, ClusterSettings clusterSettings) { clusterSettings.addSettingsUpdateConsumer(WEIGHTED_ROUTING_FAILOPEN_ENABLED, this::setFailOpenEnabled); clusterSettings.addSettingsUpdateConsumer(STRICT_WEIGHTED_SHARD_ROUTING_ENABLED, this::setStrictWeightedShardRouting); clusterSettings.addSettingsUpdateConsumer(IGNORE_WEIGHTED_SHARD_ROUTING, this::setIgnoreWeightedRouting); + this.isReaderWriterSplitEnabled = FeatureFlags.READER_WRITER_SPLIT_EXPERIMENTAL_SETTING.get(settings); } void setUseAdaptiveReplicaSelection(boolean useAdaptiveReplicaSelection) { @@ -254,6 +256,14 @@ public GroupShardsIterator searchShards( preference = Preference.PRIMARY_FIRST.type(); } + if (isReaderWriterSplitEnabled) { + if (preference == null || preference.isEmpty()) { + if (indexMetadataForShard.getNumberOfSearchOnlyReplicas() > 0) { + preference = Preference.SEARCH_REPLICA.type(); + } + } + } + ShardIterator iterator = preferenceActiveShardIterator( shard, clusterState.nodes().getLocalNodeId(), @@ -366,6 +376,8 @@ private ShardIterator preferenceActiveShardIterator( return indexShard.primaryFirstActiveInitializingShardsIt(); case REPLICA_FIRST: return indexShard.replicaFirstActiveInitializingShardsIt(); + case SEARCH_REPLICA: + return indexShard.searchReplicaActiveInitializingShardIt(); case ONLY_LOCAL: return indexShard.onlyNodeActiveInitializingShardsIt(localNodeId); case ONLY_NODES: diff --git a/server/src/main/java/org/opensearch/cluster/routing/Preference.java b/server/src/main/java/org/opensearch/cluster/routing/Preference.java index a1ea01afa118f..093e3d5fd45f8 100644 --- a/server/src/main/java/org/opensearch/cluster/routing/Preference.java +++ b/server/src/main/java/org/opensearch/cluster/routing/Preference.java @@ -73,6 +73,11 @@ public enum Preference { */ REPLICA_FIRST("_replica_first"), + /** + * Route to search replica shards + */ + SEARCH_REPLICA("_search_replica"), + /** * Route to the local shard only */ @@ -127,6 +132,8 @@ public static Preference parse(String preference) { return ONLY_LOCAL; case "_only_nodes": return ONLY_NODES; + case "_search_replica": + return SEARCH_REPLICA; default: throw new IllegalArgumentException("no Preference for [" + preferenceType + "]"); } diff --git a/server/src/main/java/org/opensearch/cluster/routing/allocation/decider/SearchReplicaAllocationDecider.java b/server/src/main/java/org/opensearch/cluster/routing/allocation/decider/SearchReplicaAllocationDecider.java new file mode 100644 index 0000000000000..955c396bee4da --- /dev/null +++ b/server/src/main/java/org/opensearch/cluster/routing/allocation/decider/SearchReplicaAllocationDecider.java @@ -0,0 +1,99 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.routing.allocation.decider; + +import org.opensearch.cluster.node.DiscoveryNode; +import org.opensearch.cluster.node.DiscoveryNodeFilters; +import org.opensearch.cluster.routing.RoutingNode; +import org.opensearch.cluster.routing.ShardRouting; +import org.opensearch.cluster.routing.allocation.RoutingAllocation; +import org.opensearch.common.settings.ClusterSettings; +import org.opensearch.common.settings.Setting; +import org.opensearch.common.settings.Setting.Property; +import org.opensearch.common.settings.Settings; +import org.opensearch.node.remotestore.RemoteStoreNodeService; + +import java.util.Map; + +import static org.opensearch.cluster.node.DiscoveryNodeFilters.IP_VALIDATOR; +import static org.opensearch.cluster.node.DiscoveryNodeFilters.OpType.OR; + +/** + * This allocation decider is similar to FilterAllocationDecider but provides + * the option to filter specifically for search replicas. + * The filter behaves similar to an include for any defined node attribute. + * A search replica can be allocated to only nodes with one of the specified attributes while + * other shard types will be rejected from nodes with any othe attributes. + * @opensearch.internal + */ +public class SearchReplicaAllocationDecider extends AllocationDecider { + + public static final String NAME = "filter"; + private static final String SEARCH_REPLICA_ROUTING_INCLUDE_GROUP_PREFIX = "cluster.routing.allocation.search.replica.dedicated.include"; + public static final Setting.AffixSetting SEARCH_REPLICA_ROUTING_INCLUDE_GROUP_SETTING = Setting.prefixKeySetting( + SEARCH_REPLICA_ROUTING_INCLUDE_GROUP_PREFIX + ".", + key -> Setting.simpleString(key, value -> IP_VALIDATOR.accept(key, value), Property.Dynamic, Property.NodeScope) + ); + + private volatile DiscoveryNodeFilters searchReplicaIncludeFilters; + + private volatile RemoteStoreNodeService.Direction migrationDirection; + private volatile RemoteStoreNodeService.CompatibilityMode compatibilityMode; + + public SearchReplicaAllocationDecider(Settings settings, ClusterSettings clusterSettings) { + setSearchReplicaIncludeFilters(SEARCH_REPLICA_ROUTING_INCLUDE_GROUP_SETTING.getAsMap(settings)); + clusterSettings.addAffixMapUpdateConsumer( + SEARCH_REPLICA_ROUTING_INCLUDE_GROUP_SETTING, + this::setSearchReplicaIncludeFilters, + (a, b) -> {} + ); + } + + @Override + public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { + return shouldFilter(shardRouting, node.node(), allocation); + } + + @Override + public Decision canRemain(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { + return shouldFilter(shardRouting, node.node(), allocation); + } + + private Decision shouldFilter(ShardRouting shardRouting, DiscoveryNode node, RoutingAllocation allocation) { + if (searchReplicaIncludeFilters != null) { + final boolean match = searchReplicaIncludeFilters.match(node); + if (match == false && shardRouting.isSearchOnly()) { + return allocation.decision( + Decision.NO, + NAME, + "node does not match shard setting [%s] filters [%s]", + SEARCH_REPLICA_ROUTING_INCLUDE_GROUP_PREFIX, + searchReplicaIncludeFilters + ); + } + // filter will only apply to search replicas + if (shardRouting.isSearchOnly() == false && match) { + return allocation.decision( + Decision.NO, + NAME, + "only search replicas can be allocated to node with setting [%s] filters [%s]", + SEARCH_REPLICA_ROUTING_INCLUDE_GROUP_PREFIX, + searchReplicaIncludeFilters + ); + } + } + return allocation.decision(Decision.YES, NAME, "node passes include/exclude/require filters"); + } + + private void setSearchReplicaIncludeFilters(Map filters) { + searchReplicaIncludeFilters = DiscoveryNodeFilters.trimTier( + DiscoveryNodeFilters.buildOrUpdateFromKeyValue(searchReplicaIncludeFilters, OR, filters) + ); + } +} diff --git a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java index 9c0a192df9911..ab53a1d8312d0 100644 --- a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java +++ b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java @@ -76,6 +76,7 @@ import org.opensearch.cluster.routing.allocation.decider.FilterAllocationDecider; import org.opensearch.cluster.routing.allocation.decider.NodeLoadAwareAllocationDecider; import org.opensearch.cluster.routing.allocation.decider.SameShardAllocationDecider; +import org.opensearch.cluster.routing.allocation.decider.SearchReplicaAllocationDecider; import org.opensearch.cluster.routing.allocation.decider.ShardsLimitAllocationDecider; import org.opensearch.cluster.routing.allocation.decider.ThrottlingAllocationDecider; import org.opensearch.cluster.service.ClusterApplierService; @@ -550,6 +551,7 @@ public void apply(Settings value, Settings current, Settings previous) { SearchService.MAX_AGGREGATION_REWRITE_FILTERS, SearchService.INDICES_MAX_CLAUSE_COUNT_SETTING, SearchService.CARDINALITY_AGGREGATION_PRUNING_THRESHOLD, + SearchService.KEYWORD_INDEX_OR_DOC_VALUES_ENABLED, CreatePitController.PIT_INIT_KEEP_ALIVE, Node.WRITE_PORTS_FILE_SETTING, Node.NODE_NAME_SETTING, @@ -815,6 +817,8 @@ public void apply(Settings value, Settings current, Settings previous) { OpenSearchOnHeapCacheSettings.EXPIRE_AFTER_ACCESS_SETTING.getConcreteSettingForNamespace( CacheType.INDICES_REQUEST_CACHE.getSettingPrefix() ) - ) + ), + List.of(FeatureFlags.READER_WRITER_SPLIT_EXPERIMENTAL), + List.of(SearchReplicaAllocationDecider.SEARCH_REPLICA_ROUTING_INCLUDE_GROUP_SETTING) ); } diff --git a/server/src/main/java/org/opensearch/index/IndexModule.java b/server/src/main/java/org/opensearch/index/IndexModule.java index dc1bf94662385..79de97dc96fba 100644 --- a/server/src/main/java/org/opensearch/index/IndexModule.java +++ b/server/src/main/java/org/opensearch/index/IndexModule.java @@ -73,6 +73,7 @@ import org.opensearch.index.engine.EngineFactory; import org.opensearch.index.mapper.MapperService; import org.opensearch.index.shard.IndexEventListener; +import org.opensearch.index.shard.IndexShard; import org.opensearch.index.shard.IndexingOperationListener; import org.opensearch.index.shard.SearchOperationListener; import org.opensearch.index.similarity.SimilarityService; @@ -629,6 +630,56 @@ public IndexService newIndexService( Supplier clusterDefaultRefreshIntervalSupplier, RecoverySettings recoverySettings, RemoteStoreSettings remoteStoreSettings + ) throws IOException { + return newIndexService( + indexCreationContext, + environment, + xContentRegistry, + shardStoreDeleter, + circuitBreakerService, + bigArrays, + threadPool, + scriptService, + clusterService, + client, + indicesQueryCache, + mapperRegistry, + indicesFieldDataCache, + namedWriteableRegistry, + idFieldDataEnabled, + valuesSourceRegistry, + remoteDirectoryFactory, + translogFactorySupplier, + clusterDefaultRefreshIntervalSupplier, + recoverySettings, + remoteStoreSettings, + (s) -> {} + ); + } + + public IndexService newIndexService( + IndexService.IndexCreationContext indexCreationContext, + NodeEnvironment environment, + NamedXContentRegistry xContentRegistry, + IndexService.ShardStoreDeleter shardStoreDeleter, + CircuitBreakerService circuitBreakerService, + BigArrays bigArrays, + ThreadPool threadPool, + ScriptService scriptService, + ClusterService clusterService, + Client client, + IndicesQueryCache indicesQueryCache, + MapperRegistry mapperRegistry, + IndicesFieldDataCache indicesFieldDataCache, + NamedWriteableRegistry namedWriteableRegistry, + BooleanSupplier idFieldDataEnabled, + ValuesSourceRegistry valuesSourceRegistry, + IndexStorePlugin.DirectoryFactory remoteDirectoryFactory, + BiFunction translogFactorySupplier, + Supplier clusterDefaultRefreshIntervalSupplier, + RecoverySettings recoverySettings, + RemoteStoreSettings remoteStoreSettings, + Consumer replicator ) throws IOException { final IndexEventListener eventListener = freeze(); Function> readerWrapperFactory = indexReaderWrapper @@ -689,7 +740,8 @@ public IndexService newIndexService( recoverySettings, remoteStoreSettings, fileCache, - compositeIndexSettings + compositeIndexSettings, + replicator ); success = true; return indexService; diff --git a/server/src/main/java/org/opensearch/index/IndexService.java b/server/src/main/java/org/opensearch/index/IndexService.java index 12b02d3dbd6fa..e5dd44a70a11c 100644 --- a/server/src/main/java/org/opensearch/index/IndexService.java +++ b/server/src/main/java/org/opensearch/index/IndexService.java @@ -136,6 +136,7 @@ import static java.util.Collections.emptyMap; import static java.util.Collections.unmodifiableMap; import static org.opensearch.common.collect.MapBuilder.newMapBuilder; +import static org.opensearch.common.util.FeatureFlags.READER_WRITER_SPLIT_EXPERIMENTAL_SETTING; import static org.opensearch.index.remote.RemoteMigrationIndexMetadataUpdater.indexHasRemoteStoreSettings; /** @@ -174,6 +175,7 @@ public class IndexService extends AbstractIndexComponent implements IndicesClust private volatile AsyncTranslogFSync fsyncTask; private volatile AsyncGlobalCheckpointTask globalCheckpointTask; private volatile AsyncRetentionLeaseSyncTask retentionLeaseSyncTask; + private volatile AsyncReplicationTask asyncReplicationTask; // don't convert to Setting<> and register... we only set this in tests and register via a plugin private final String INDEX_TRANSLOG_RETENTION_CHECK_INTERVAL_SETTING = "index.translog.retention.check_interval"; @@ -194,6 +196,7 @@ public class IndexService extends AbstractIndexComponent implements IndicesClust private final RemoteStoreSettings remoteStoreSettings; private final FileCache fileCache; private final CompositeIndexSettings compositeIndexSettings; + private final Consumer replicator; public IndexService( IndexSettings indexSettings, @@ -231,7 +234,8 @@ public IndexService( RecoverySettings recoverySettings, RemoteStoreSettings remoteStoreSettings, FileCache fileCache, - CompositeIndexSettings compositeIndexSettings + CompositeIndexSettings compositeIndexSettings, + Consumer replicator ) { super(indexSettings); this.allowExpensiveQueries = allowExpensiveQueries; @@ -306,11 +310,15 @@ public IndexService( this.trimTranslogTask = new AsyncTrimTranslogTask(this); this.globalCheckpointTask = new AsyncGlobalCheckpointTask(this); this.retentionLeaseSyncTask = new AsyncRetentionLeaseSyncTask(this); + if (READER_WRITER_SPLIT_EXPERIMENTAL_SETTING.get(indexSettings.getNodeSettings())) { + this.asyncReplicationTask = new AsyncReplicationTask(this); + } this.translogFactorySupplier = translogFactorySupplier; this.recoverySettings = recoverySettings; this.remoteStoreSettings = remoteStoreSettings; this.compositeIndexSettings = compositeIndexSettings; this.fileCache = fileCache; + this.replicator = replicator; updateFsyncTaskIfNecessary(); } @@ -386,7 +394,8 @@ public IndexService( recoverySettings, remoteStoreSettings, null, - null + null, + s -> {} ); } @@ -395,6 +404,11 @@ static boolean needsMapperService(IndexSettings indexSettings, IndexCreationCont && indexCreationContext == IndexCreationContext.CREATE_INDEX); // metadata verification needs a mapper service } + // visible for tests + AsyncReplicationTask getReplicationTask() { + return asyncReplicationTask; + } + /** * Context for index creation * @@ -853,7 +867,7 @@ public IndexSettings getIndexSettings() { * {@link IndexReader}-specific optimizations, such as rewriting containing range queries. */ public QueryShardContext newQueryShardContext(int shardId, IndexSearcher searcher, LongSupplier nowInMillis, String clusterAlias) { - return newQueryShardContext(shardId, searcher, nowInMillis, clusterAlias, false); + return newQueryShardContext(shardId, searcher, nowInMillis, clusterAlias, false, false); } /** @@ -867,7 +881,8 @@ public QueryShardContext newQueryShardContext( IndexSearcher searcher, LongSupplier nowInMillis, String clusterAlias, - boolean validate + boolean validate, + boolean keywordIndexOrDocValuesEnabled ) { final SearchIndexNameMatcher indexNameMatcher = new SearchIndexNameMatcher( index().getName(), @@ -893,7 +908,8 @@ public QueryShardContext newQueryShardContext( indexNameMatcher, allowExpensiveQueries, valuesSourceRegistry, - validate + validate, + keywordIndexOrDocValuesEnabled ); } @@ -1065,11 +1081,22 @@ public synchronized void updateMetadata(final IndexMetadata currentIndexMetadata } onRefreshIntervalChange(); updateFsyncTaskIfNecessary(); + if (READER_WRITER_SPLIT_EXPERIMENTAL_SETTING.get(indexSettings.getNodeSettings())) { + updateReplicationTask(); + } } metadataListeners.forEach(c -> c.accept(newIndexMetadata)); } + private void updateReplicationTask() { + try { + asyncReplicationTask.close(); + } finally { + asyncReplicationTask = new AsyncReplicationTask(this); + } + } + /** * Called whenever the refresh interval changes. This can happen in 2 cases - * 1. {@code cluster.default.index.refresh_interval} cluster setting changes. The change would only happen for @@ -1334,6 +1361,47 @@ public String toString() { } } + final class AsyncReplicationTask extends BaseAsyncTask { + + AsyncReplicationTask(IndexService indexService) { + super(indexService, indexService.getRefreshInterval()); + } + + @Override + protected void runInternal() { + indexService.maybeSyncSegments(false); + } + + @Override + protected String getThreadPool() { + return ThreadPool.Names.GENERIC; + } + + @Override + public String toString() { + return "replication"; + } + + @Override + protected boolean mustReschedule() { + return indexSettings.isSegRepEnabledOrRemoteNode() && super.mustReschedule(); + } + } + + private void maybeSyncSegments(boolean force) { + if (getRefreshInterval().millis() > 0 || force) { + for (IndexShard shard : this.shards.values()) { + try { + if (shard.routingEntry().isSearchOnly() && shard.routingEntry().active()) { + replicator.accept(shard); + } + } catch (IndexShardClosedException | AlreadyClosedException ex) { + // do nothing + } + } + } + } + final class AsyncTrimTranslogTask extends BaseAsyncTask { AsyncTrimTranslogTask(IndexService indexService) { diff --git a/server/src/main/java/org/opensearch/index/codec/composite/composite99/Composite99DocValuesReader.java b/server/src/main/java/org/opensearch/index/codec/composite/composite99/Composite99DocValuesReader.java index ca55e8e4573ab..7901336151c8e 100644 --- a/server/src/main/java/org/opensearch/index/codec/composite/composite99/Composite99DocValuesReader.java +++ b/server/src/main/java/org/opensearch/index/codec/composite/composite99/Composite99DocValuesReader.java @@ -164,7 +164,7 @@ public Composite99DocValuesReader(DocValuesProducer producer, SegmentReadState r // adding metric fields for (Metric metric : starTreeMetadata.getMetrics()) { - for (MetricStat metricStat : metric.getMetrics()) { + for (MetricStat metricStat : metric.getBaseMetrics()) { fields.add( fullyQualifiedFieldNameForStarTreeMetricsDocValues( compositeFieldName, diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/Metric.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/Metric.java index 9accb0201170a..be16f1e9886cd 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/Metric.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/Metric.java @@ -13,6 +13,7 @@ import org.opensearch.core.xcontent.XContentBuilder; import java.io.IOException; +import java.util.ArrayList; import java.util.List; import java.util.Objects; @@ -23,10 +24,18 @@ public class Metric implements ToXContent { private final String field; private final List metrics; + private final List baseMetrics; public Metric(String field, List metrics) { this.field = field; this.metrics = metrics; + this.baseMetrics = new ArrayList<>(); + for (MetricStat metricStat : metrics) { + if (metricStat.isDerivedMetric()) { + continue; + } + baseMetrics.add(metricStat); + } } public String getField() { @@ -37,6 +46,13 @@ public List getMetrics() { return metrics; } + /** + * Returns only the base metrics + */ + public List getBaseMetrics() { + return baseMetrics; + } + @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { builder.startObject(); diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilder.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilder.java index ba4cb792c00df..a1d638616f2aa 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilder.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilder.java @@ -150,10 +150,7 @@ public List generateMetricAggregatorInfos(MapperService ma metricAggregatorInfos.add(metricAggregatorInfo); continue; } - for (MetricStat metricStat : metric.getMetrics()) { - if (metricStat.isDerivedMetric()) { - continue; - } + for (MetricStat metricStat : metric.getBaseMetrics()) { FieldValueConverter fieldValueConverter; Mapper fieldMapper = mapperService.documentMapper().mappers().getMapper(metric.getField()); if (fieldMapper instanceof FieldMapper && ((FieldMapper) fieldMapper).fieldType() instanceof FieldValueConverter) { @@ -185,7 +182,7 @@ public List getMetricReaders(SegmentWriteState stat List metricReaders = new ArrayList<>(); for (Metric metric : this.starTreeField.getMetrics()) { - for (MetricStat metricStat : metric.getMetrics()) { + for (MetricStat metricStat : metric.getBaseMetrics()) { SequentialDocValuesIterator metricReader; FieldInfo metricFieldInfo = state.fieldInfos.fieldInfo(metric.getField()); if (metricStat.equals(MetricStat.DOC_COUNT)) { diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/OffHeapStarTreeBuilder.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/OffHeapStarTreeBuilder.java index 62fda3e56d289..1613b7c5a3ac0 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/OffHeapStarTreeBuilder.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/OffHeapStarTreeBuilder.java @@ -158,7 +158,7 @@ Iterator mergeStarTrees(List starTreeValuesSub List metricReaders = new ArrayList<>(); // get doc id set iterators for metrics for (Metric metric : starTreeValues.getStarTreeField().getMetrics()) { - for (MetricStat metricStat : metric.getMetrics()) { + for (MetricStat metricStat : metric.getBaseMetrics()) { String metricFullName = fullyQualifiedFieldNameForStarTreeMetricsDocValues( starTreeValues.getStarTreeField().getName(), metric.getField(), diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/OnHeapStarTreeBuilder.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/OnHeapStarTreeBuilder.java index ec9475caf7d6d..1a5c906ad413b 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/OnHeapStarTreeBuilder.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/OnHeapStarTreeBuilder.java @@ -144,7 +144,7 @@ StarTreeDocument[] getSegmentsStarTreeDocuments(List starTreeVal List metricReaders = new ArrayList<>(); // get doc id set iterators for metrics for (Metric metric : starTreeValues.getStarTreeField().getMetrics()) { - for (MetricStat metricStat : metric.getMetrics()) { + for (MetricStat metricStat : metric.getBaseMetrics()) { String metricFullName = fullyQualifiedFieldNameForStarTreeMetricsDocValues( starTreeValues.getStarTreeField().getName(), metric.getField(), diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetadata.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetadata.java index e7bb32282ece7..7352c215ee390 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetadata.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetadata.java @@ -220,16 +220,37 @@ private int readMetricsCount() throws IOException { private List readMetricEntries() throws IOException { int metricCount = readMetricsCount(); - Map starTreeMetricMap = new LinkedHashMap<>(); + Map> starTreeMetricStatMap = new LinkedHashMap<>(); for (int i = 0; i < metricCount; i++) { String metricName = meta.readString(); int metricStatOrdinal = meta.readVInt(); MetricStat metricStat = MetricStat.fromMetricOrdinal(metricStatOrdinal); - Metric metric = starTreeMetricMap.computeIfAbsent(metricName, field -> new Metric(field, new ArrayList<>())); - metric.getMetrics().add(metricStat); + List metricStats = starTreeMetricStatMap.computeIfAbsent(metricName, field -> new ArrayList<>()); + metricStats.add(metricStat); } + List starTreeMetricMap = new ArrayList<>(); + for (Map.Entry> metricStatsEntry : starTreeMetricStatMap.entrySet()) { + addEligibleDerivedMetrics(metricStatsEntry.getValue()); + starTreeMetricMap.add(new Metric(metricStatsEntry.getKey(), metricStatsEntry.getValue())); - return new ArrayList<>(starTreeMetricMap.values()); + } + return starTreeMetricMap; + } + + /** + * Add derived metrics if all associated base metrics are present + */ + private void addEligibleDerivedMetrics(List metricStatsList) { + Set metricStatsSet = new HashSet<>(metricStatsList); + for (MetricStat metric : MetricStat.values()) { + if (metric.isDerivedMetric() && !metricStatsSet.contains(metric)) { + List sourceMetrics = metric.getBaseMetrics(); + if (metricStatsSet.containsAll(sourceMetrics)) { + metricStatsList.add(metric); + metricStatsSet.add(metric); + } + } + } } private int readSegmentAggregatedDocCount() throws IOException { diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/index/StarTreeValues.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/index/StarTreeValues.java index 7d5f5ba02b9f8..a34bbbe9ee738 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/index/StarTreeValues.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/index/StarTreeValues.java @@ -171,7 +171,7 @@ public StarTreeValues( // get doc id set iterators for metrics for (Metric metric : starTreeMetadata.getMetrics()) { - for (MetricStat metricStat : metric.getMetrics()) { + for (MetricStat metricStat : metric.getBaseMetrics()) { String metricFullName = fullyQualifiedFieldNameForStarTreeMetricsDocValues( starTreeField.getName(), metric.getField(), diff --git a/server/src/main/java/org/opensearch/index/mapper/KeywordFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/KeywordFieldMapper.java index 2116ac522b705..11ff601b3fd6d 100644 --- a/server/src/main/java/org/opensearch/index/mapper/KeywordFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/KeywordFieldMapper.java @@ -392,6 +392,9 @@ public Query termsQuery(List values, QueryShardContext context) { failIfNotIndexedAndNoDocValues(); // has index and doc_values enabled if (isSearchable() && hasDocValues()) { + if (!context.keywordFieldIndexOrDocValuesEnabled()) { + return super.termsQuery(values, context); + } BytesRef[] bytesRefs = new BytesRef[values.size()]; for (int i = 0; i < bytesRefs.length; i++) { bytesRefs[i] = indexedValueForSearch(values.get(i)); @@ -429,6 +432,9 @@ public Query prefixQuery( } failIfNotIndexedAndNoDocValues(); if (isSearchable() && hasDocValues()) { + if (!context.keywordFieldIndexOrDocValuesEnabled()) { + return super.prefixQuery(value, method, caseInsensitive, context); + } Query indexQuery = super.prefixQuery(value, method, caseInsensitive, context); Query dvQuery = super.prefixQuery(value, MultiTermQuery.DOC_VALUES_REWRITE, caseInsensitive, context); return new IndexOrDocValuesQuery(indexQuery, dvQuery); @@ -461,6 +467,9 @@ public Query regexpQuery( } failIfNotIndexedAndNoDocValues(); if (isSearchable() && hasDocValues()) { + if (!context.keywordFieldIndexOrDocValuesEnabled()) { + return super.regexpQuery(value, syntaxFlags, matchFlags, maxDeterminizedStates, method, context); + } Query indexQuery = super.regexpQuery(value, syntaxFlags, matchFlags, maxDeterminizedStates, method, context); Query dvQuery = super.regexpQuery( value, @@ -549,6 +558,9 @@ public Query fuzzyQuery( ); } if (isSearchable() && hasDocValues()) { + if (!context.keywordFieldIndexOrDocValuesEnabled()) { + return super.fuzzyQuery(value, fuzziness, prefixLength, maxExpansions, transpositions, method, context); + } Query indexQuery = super.fuzzyQuery(value, fuzziness, prefixLength, maxExpansions, transpositions, method, context); Query dvQuery = super.fuzzyQuery( value, @@ -591,6 +603,9 @@ public Query wildcardQuery( // wildcard // query text if (isSearchable() && hasDocValues()) { + if (!context.keywordFieldIndexOrDocValuesEnabled()) { + return super.wildcardQuery(value, method, caseInsensitive, true, context); + } Query indexQuery = super.wildcardQuery(value, method, caseInsensitive, true, context); Query dvQuery = super.wildcardQuery(value, MultiTermQuery.DOC_VALUES_REWRITE, caseInsensitive, true, context); return new IndexOrDocValuesQuery(indexQuery, dvQuery); diff --git a/server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java index eb0694edc70ba..43e975f95757b 100644 --- a/server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java @@ -653,9 +653,7 @@ public byte[] encodePoint(Number value) { @Override public double toDoubleValue(long value) { - byte[] bytes = new byte[8]; - NumericUtils.longToSortableBytes(value, bytes, 0); - return NumericUtils.sortableLongToDouble(NumericUtils.sortableBytesToLong(bytes, 0)); + return objectToDouble(value); } @Override diff --git a/server/src/main/java/org/opensearch/index/mapper/StarTreeMapper.java b/server/src/main/java/org/opensearch/index/mapper/StarTreeMapper.java index 9e2e7ef3c3889..52dab17e0b0bb 100644 --- a/server/src/main/java/org/opensearch/index/mapper/StarTreeMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/StarTreeMapper.java @@ -287,11 +287,7 @@ private List buildMetrics(String fieldName, Map map, Map } int numBaseMetrics = 0; for (Metric metric : metrics) { - for (MetricStat metricStat : metric.getMetrics()) { - if (metricStat.isDerivedMetric() == false) { - numBaseMetrics++; - } - } + numBaseMetrics += metric.getBaseMetrics().size(); } if (numBaseMetrics > context.getSettings() .getAsInt( diff --git a/server/src/main/java/org/opensearch/index/mapper/WildcardFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/WildcardFieldMapper.java index 4998a822917b4..cd95e320209ee 100644 --- a/server/src/main/java/org/opensearch/index/mapper/WildcardFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/WildcardFieldMapper.java @@ -635,7 +635,7 @@ public Query rangeQuery(Object lowerTerm, Object upperTerm, boolean includeLower @Override public Query termQueryCaseInsensitive(Object value, QueryShardContext context) { - return wildcardQuery(value.toString(), MultiTermQuery.CONSTANT_SCORE_REWRITE, true, context); + return wildcardQuery(BytesRefs.toString(value), MultiTermQuery.CONSTANT_SCORE_REWRITE, true, context); } @Override @@ -649,7 +649,7 @@ public Query termsQuery(List values, QueryShardContext context) { Set expectedValues = new HashSet<>(); StringBuilder pattern = new StringBuilder(); for (Object value : values) { - String stringVal = value.toString(); + String stringVal = BytesRefs.toString(value); builder.add(matchAllTermsQuery(name(), getRequiredNGrams(stringVal)), BooleanClause.Occur.SHOULD); expectedValues.add(stringVal); if (pattern.length() > 0) { diff --git a/server/src/main/java/org/opensearch/index/query/QueryShardContext.java b/server/src/main/java/org/opensearch/index/query/QueryShardContext.java index 91313092d8d28..bccead2b029d0 100644 --- a/server/src/main/java/org/opensearch/index/query/QueryShardContext.java +++ b/server/src/main/java/org/opensearch/index/query/QueryShardContext.java @@ -125,6 +125,7 @@ public class QueryShardContext extends QueryRewriteContext { private final ValuesSourceRegistry valuesSourceRegistry; private BitSetProducer parentFilter; private DerivedFieldResolver derivedFieldResolver; + private boolean keywordIndexOrDocValuesEnabled; public QueryShardContext( int shardId, @@ -208,7 +209,55 @@ public QueryShardContext( ), allowExpensiveQueries, valuesSourceRegistry, - validate + validate, + false + ); + } + + public QueryShardContext( + int shardId, + IndexSettings indexSettings, + BigArrays bigArrays, + BitsetFilterCache bitsetFilterCache, + TriFunction, IndexFieldData> indexFieldDataLookup, + MapperService mapperService, + SimilarityService similarityService, + ScriptService scriptService, + NamedXContentRegistry xContentRegistry, + NamedWriteableRegistry namedWriteableRegistry, + Client client, + IndexSearcher searcher, + LongSupplier nowInMillis, + String clusterAlias, + Predicate indexNameMatcher, + BooleanSupplier allowExpensiveQueries, + ValuesSourceRegistry valuesSourceRegistry, + boolean validate, + boolean keywordIndexOrDocValuesEnabled + ) { + this( + shardId, + indexSettings, + bigArrays, + bitsetFilterCache, + indexFieldDataLookup, + mapperService, + similarityService, + scriptService, + xContentRegistry, + namedWriteableRegistry, + client, + searcher, + nowInMillis, + indexNameMatcher, + new Index( + RemoteClusterAware.buildRemoteIndexName(clusterAlias, indexSettings.getIndex().getName()), + indexSettings.getIndex().getUUID() + ), + allowExpensiveQueries, + valuesSourceRegistry, + validate, + keywordIndexOrDocValuesEnabled ); } @@ -231,7 +280,8 @@ public QueryShardContext(QueryShardContext source) { source.fullyQualifiedIndex, source.allowExpensiveQueries, source.valuesSourceRegistry, - source.validate() + source.validate(), + source.keywordIndexOrDocValuesEnabled ); } @@ -253,7 +303,8 @@ private QueryShardContext( Index fullyQualifiedIndex, BooleanSupplier allowExpensiveQueries, ValuesSourceRegistry valuesSourceRegistry, - boolean validate + boolean validate, + boolean keywordIndexOrDocValuesEnabled ) { super(xContentRegistry, namedWriteableRegistry, client, nowInMillis, validate); this.shardId = shardId; @@ -277,6 +328,7 @@ private QueryShardContext( emptyList(), indexSettings.isDerivedFieldAllowed() ); + this.keywordIndexOrDocValuesEnabled = keywordIndexOrDocValuesEnabled; } private void reset() { @@ -414,6 +466,14 @@ public void setDerivedFieldResolver(DerivedFieldResolver derivedFieldResolver) { this.derivedFieldResolver = derivedFieldResolver; } + public boolean keywordFieldIndexOrDocValuesEnabled() { + return keywordIndexOrDocValuesEnabled; + } + + public void setKeywordFieldIndexOrDocValuesEnabled(boolean keywordIndexOrDocValuesEnabled) { + this.keywordIndexOrDocValuesEnabled = keywordIndexOrDocValuesEnabled; + } + public void setAllowUnmappedFields(boolean allowUnmappedFields) { this.allowUnmappedFields = allowUnmappedFields; } diff --git a/server/src/main/java/org/opensearch/index/seqno/ReplicationTracker.java b/server/src/main/java/org/opensearch/index/seqno/ReplicationTracker.java index b9cb5e92d0ed1..1e43827afeb47 100644 --- a/server/src/main/java/org/opensearch/index/seqno/ReplicationTracker.java +++ b/server/src/main/java/org/opensearch/index/seqno/ReplicationTracker.java @@ -1251,12 +1251,13 @@ public ReplicationCheckpoint getLatestReplicationCheckpoint() { return this.latestReplicationCheckpoint; } - private boolean isPrimaryRelocation(String allocationId) { + // skip any shard that is a relocating primary or search only replica (not tracked by primary) + private boolean shouldSkipReplicationTimer(String allocationId) { Optional shardRouting = routingTable.shards() .stream() .filter(routing -> routing.allocationId().getId().equals(allocationId)) .findAny(); - return shardRouting.isPresent() && shardRouting.get().primary(); + return shardRouting.isPresent() && (shardRouting.get().primary() || shardRouting.get().isSearchOnly()); } private void createReplicationLagTimers() { @@ -1268,7 +1269,7 @@ private void createReplicationLagTimers() { // it is possible for a shard to be in-sync but not yet removed from the checkpoints collection after a failover event. if (cps.inSync && replicationGroup.getUnavailableInSyncShards().contains(allocationId) == false - && isPrimaryRelocation(allocationId) == false + && shouldSkipReplicationTimer(allocationId) == false && latestReplicationCheckpoint.isAheadOf(cps.visibleReplicationCheckpoint) && (indexSettings.isSegRepLocalEnabled() == true || isShardOnRemoteEnabledNode.apply(routingTable.getByAllocationId(allocationId).currentNodeId()))) { @@ -1302,7 +1303,7 @@ public synchronized void startReplicationLagTimers(ReplicationCheckpoint checkpo final CheckpointState cps = e.getValue(); if (cps.inSync && replicationGroup.getUnavailableInSyncShards().contains(allocationId) == false - && isPrimaryRelocation(e.getKey()) == false + && shouldSkipReplicationTimer(e.getKey()) == false && latestReplicationCheckpoint.isAheadOf(cps.visibleReplicationCheckpoint) && cps.checkpointTimers.containsKey(latestReplicationCheckpoint)) { cps.checkpointTimers.get(latestReplicationCheckpoint).start(); @@ -1330,7 +1331,7 @@ public synchronized Set getSegmentReplicationStats entry -> entry.getKey().equals(this.shardAllocationId) == false && entry.getValue().inSync && replicationGroup.getUnavailableInSyncShards().contains(entry.getKey()) == false - && isPrimaryRelocation(entry.getKey()) == false + && shouldSkipReplicationTimer(entry.getKey()) == false /*Check if the current primary shard is migrating to remote and all the other shard copies of the same index still hasn't completely moved over to the remote enabled nodes. Ensures that: diff --git a/server/src/main/java/org/opensearch/index/store/RemoteSegmentStoreDirectory.java b/server/src/main/java/org/opensearch/index/store/RemoteSegmentStoreDirectory.java index 53b43bbfb3bba..72bf07d4b03b2 100644 --- a/server/src/main/java/org/opensearch/index/store/RemoteSegmentStoreDirectory.java +++ b/server/src/main/java/org/opensearch/index/store/RemoteSegmentStoreDirectory.java @@ -412,7 +412,7 @@ static long getGeneration(String[] filenameTokens) { public static long getTimestamp(String filename) { String[] filenameTokens = filename.split(SEPARATOR); - return RemoteStoreUtils.invertLong(filenameTokens[6]); + return RemoteStoreUtils.invertLong(filenameTokens[filenameTokens.length - 2]); } public static Tuple getNodeIdByPrimaryTermAndGen(String filename) { diff --git a/server/src/main/java/org/opensearch/index/store/RemoteSegmentStoreDirectoryFactory.java b/server/src/main/java/org/opensearch/index/store/RemoteSegmentStoreDirectoryFactory.java index ea7e2506deec3..233665e65aed9 100644 --- a/server/src/main/java/org/opensearch/index/store/RemoteSegmentStoreDirectoryFactory.java +++ b/server/src/main/java/org/opensearch/index/store/RemoteSegmentStoreDirectoryFactory.java @@ -116,4 +116,8 @@ public Directory newDirectory(String repositoryName, String indexUUID, ShardId s } } + public Supplier getRepositoriesService() { + return this.repositoriesService; + } + } diff --git a/server/src/main/java/org/opensearch/index/translog/RemoteFsTimestampAwareTranslog.java b/server/src/main/java/org/opensearch/index/translog/RemoteFsTimestampAwareTranslog.java index 0b134b3bddbec..27d34ec0d05af 100644 --- a/server/src/main/java/org/opensearch/index/translog/RemoteFsTimestampAwareTranslog.java +++ b/server/src/main/java/org/opensearch/index/translog/RemoteFsTimestampAwareTranslog.java @@ -8,6 +8,7 @@ package org.opensearch.index.translog; +import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.opensearch.cluster.service.ClusterService; import org.opensearch.common.blobstore.BlobMetadata; @@ -33,6 +34,7 @@ import java.util.Optional; import java.util.Set; import java.util.TreeSet; +import java.util.concurrent.atomic.AtomicLong; import java.util.function.BooleanSupplier; import java.util.function.LongConsumer; import java.util.function.LongSupplier; @@ -52,10 +54,13 @@ */ public class RemoteFsTimestampAwareTranslog extends RemoteFsTranslog { + private static Logger staticLogger = LogManager.getLogger(RemoteFsTimestampAwareTranslog.class); private final Logger logger; private final Map metadataFilePinnedTimestampMap; // For metadata files, with no min generation in the name, we cache generation data to avoid multiple reads. private final Map> oldFormatMetadataFileGenerationMap; + private final Map> oldFormatMetadataFilePrimaryTermMap; + private final AtomicLong minPrimaryTermInRemote = new AtomicLong(Long.MAX_VALUE); public RemoteFsTimestampAwareTranslog( TranslogConfig config, @@ -86,6 +91,7 @@ public RemoteFsTimestampAwareTranslog( logger = Loggers.getLogger(getClass(), shardId); this.metadataFilePinnedTimestampMap = new HashMap<>(); this.oldFormatMetadataFileGenerationMap = new HashMap<>(); + this.oldFormatMetadataFilePrimaryTermMap = new HashMap<>(); } @Override @@ -165,7 +171,11 @@ public void onResponse(List blobMetadata) { return; } - List metadataFilesToBeDeleted = getMetadataFilesToBeDeleted(metadataFiles); + List metadataFilesToBeDeleted = getMetadataFilesToBeDeleted( + metadataFiles, + metadataFilePinnedTimestampMap, + logger + ); // If index is not deleted, make sure to keep latest metadata file if (indexDeleted == false) { @@ -209,7 +219,7 @@ public void onResponse(List blobMetadata) { oldFormatMetadataFileGenerationMap.keySet().retainAll(metadataFilesNotToBeDeleted); // Delete stale primary terms - deleteStaleRemotePrimaryTerms(metadataFiles); + deleteStaleRemotePrimaryTerms(metadataFilesNotToBeDeleted); } else { remoteGenerationDeletionPermits.release(REMOTE_DELETION_PERMITS); } @@ -259,8 +269,16 @@ protected Set getGenerationsToBeDeleted( return generationsToBeDeleted; } - // Visible for testing protected List getMetadataFilesToBeDeleted(List metadataFiles) { + return getMetadataFilesToBeDeleted(metadataFiles, metadataFilePinnedTimestampMap, logger); + } + + // Visible for testing + protected static List getMetadataFilesToBeDeleted( + List metadataFiles, + Map metadataFilePinnedTimestampMap, + Logger logger + ) { Tuple> pinnedTimestampsState = RemoteStorePinnedTimestampService.getPinnedTimestamps(); // Keep files since last successful run of scheduler @@ -351,27 +369,153 @@ protected Tuple getMinMaxTranslogGenerationFromMetadataFile( } } + private void deleteStaleRemotePrimaryTerms(List metadataFiles) { + deleteStaleRemotePrimaryTerms( + metadataFiles, + translogTransferManager, + oldFormatMetadataFilePrimaryTermMap, + minPrimaryTermInRemote, + logger + ); + } + /** * This method must be called only after there are valid generations to delete in trimUnreferencedReaders as it ensures * implicitly that minimum primary term in latest translog metadata in remote store is the current primary term. *
* This will also delete all stale translog metadata files from remote except the latest basis the metadata file comparator. */ - private void deleteStaleRemotePrimaryTerms(List metadataFiles) { + protected static void deleteStaleRemotePrimaryTerms( + List metadataFiles, + TranslogTransferManager translogTransferManager, + Map> oldFormatMetadataFilePrimaryTermMap, + AtomicLong minPrimaryTermInRemoteAtomicLong, + Logger logger + ) { // The deletion of older translog files in remote store is on best-effort basis, there is a possibility that there // are older files that are no longer needed and should be cleaned up. In here, we delete all files that are part // of older primary term. - if (olderPrimaryCleaned.trySet(Boolean.TRUE)) { - if (metadataFiles.isEmpty()) { - logger.trace("No metadata is uploaded yet, returning from deleteStaleRemotePrimaryTerms"); - return; + if (metadataFiles.isEmpty()) { + logger.trace("No metadata is uploaded yet, returning from deleteStaleRemotePrimaryTerms"); + return; + } + Optional minPrimaryTermFromMetadataFiles = metadataFiles.stream().map(file -> { + try { + return getMinMaxPrimaryTermFromMetadataFile(file, translogTransferManager, oldFormatMetadataFilePrimaryTermMap).v1(); + } catch (IOException e) { + return Long.MAX_VALUE; + } + }).min(Long::compareTo); + // First we delete all stale primary terms folders from remote store + Long minPrimaryTermInRemote = getMinPrimaryTermInRemote(minPrimaryTermInRemoteAtomicLong, translogTransferManager, logger); + if (minPrimaryTermFromMetadataFiles.get() > minPrimaryTermInRemote) { + translogTransferManager.deletePrimaryTermsAsync(minPrimaryTermFromMetadataFiles.get()); + minPrimaryTermInRemoteAtomicLong.set(minPrimaryTermFromMetadataFiles.get()); + } else { + logger.debug( + "Skipping primary term cleanup. minimumReferencedPrimaryTerm = {}, minPrimaryTermInRemote = {}", + minPrimaryTermFromMetadataFiles.get(), + minPrimaryTermInRemote + ); + } + } + + private static Long getMinPrimaryTermInRemote( + AtomicLong minPrimaryTermInRemote, + TranslogTransferManager translogTransferManager, + Logger logger + ) { + if (minPrimaryTermInRemote.get() == Long.MAX_VALUE) { + try { + Set primaryTermsInRemote = translogTransferManager.listPrimaryTermsInRemote(); + if (primaryTermsInRemote.isEmpty() == false) { + Optional minPrimaryTerm = primaryTermsInRemote.stream().min(Long::compareTo); + minPrimaryTerm.ifPresent(minPrimaryTermInRemote::set); + } + } catch (IOException e) { + logger.error("Exception while listing primary terms in remote translog", e); + } + } + return minPrimaryTermInRemote.get(); + } + + protected static Tuple getMinMaxPrimaryTermFromMetadataFile( + String metadataFile, + TranslogTransferManager translogTransferManager, + Map> oldFormatMetadataFilePrimaryTermMap + ) throws IOException { + Tuple minMaxPrimaryTermFromFileName = TranslogTransferMetadata.getMinMaxPrimaryTermFromFilename(metadataFile); + if (minMaxPrimaryTermFromFileName != null) { + return minMaxPrimaryTermFromFileName; + } else { + if (oldFormatMetadataFilePrimaryTermMap.containsKey(metadataFile)) { + return oldFormatMetadataFilePrimaryTermMap.get(metadataFile); + } else { + TranslogTransferMetadata metadata = translogTransferManager.readMetadata(metadataFile); + long maxPrimaryTem = TranslogTransferMetadata.getPrimaryTermFromFileName(metadataFile); + long minPrimaryTem = -1; + if (metadata.getGenerationToPrimaryTermMapper() != null + && metadata.getGenerationToPrimaryTermMapper().values().isEmpty() == false) { + Optional primaryTerm = metadata.getGenerationToPrimaryTermMapper() + .values() + .stream() + .map(s -> Long.parseLong(s)) + .min(Long::compareTo); + if (primaryTerm.isPresent()) { + minPrimaryTem = primaryTerm.get(); + } + } + Tuple minMaxPrimaryTermTuple = new Tuple<>(minPrimaryTem, maxPrimaryTem); + oldFormatMetadataFilePrimaryTermMap.put(metadataFile, minMaxPrimaryTermTuple); + return minMaxPrimaryTermTuple; } - Optional minPrimaryTerm = metadataFiles.stream() - .map(file -> RemoteStoreUtils.invertLong(file.split(METADATA_SEPARATOR)[1])) - .min(Long::compareTo); - // First we delete all stale primary terms folders from remote store - long minimumReferencedPrimaryTerm = minPrimaryTerm.get() - 1; - translogTransferManager.deletePrimaryTermsAsync(minimumReferencedPrimaryTerm); } } + + public static void cleanup(TranslogTransferManager translogTransferManager) throws IOException { + ActionListener> listMetadataFilesListener = new ActionListener<>() { + @Override + public void onResponse(List blobMetadata) { + List metadataFiles = blobMetadata.stream().map(BlobMetadata::name).collect(Collectors.toList()); + + try { + if (metadataFiles.isEmpty()) { + staticLogger.debug("No stale translog metadata files found"); + return; + } + List metadataFilesToBeDeleted = getMetadataFilesToBeDeleted(metadataFiles, new HashMap<>(), staticLogger); + if (metadataFilesToBeDeleted.isEmpty()) { + staticLogger.debug("No metadata files to delete"); + return; + } + staticLogger.debug(() -> "metadataFilesToBeDeleted = " + metadataFilesToBeDeleted); + + // For all the files that we are keeping, fetch min and max generations + List metadataFilesNotToBeDeleted = new ArrayList<>(metadataFiles); + metadataFilesNotToBeDeleted.removeAll(metadataFilesToBeDeleted); + staticLogger.debug(() -> "metadataFilesNotToBeDeleted = " + metadataFilesNotToBeDeleted); + + // Delete stale metadata files + translogTransferManager.deleteMetadataFilesAsync(metadataFilesToBeDeleted, () -> {}); + + // Delete stale primary terms + deleteStaleRemotePrimaryTerms( + metadataFilesNotToBeDeleted, + translogTransferManager, + new HashMap<>(), + new AtomicLong(Long.MAX_VALUE), + staticLogger + ); + } catch (Exception e) { + staticLogger.error("Exception while cleaning up metadata and primary terms", e); + } + } + + @Override + public void onFailure(Exception e) { + staticLogger.error("Exception while cleaning up metadata and primary terms", e); + } + }; + translogTransferManager.listTranslogMetadataFilesAsync(listMetadataFilesListener); + } } diff --git a/server/src/main/java/org/opensearch/index/translog/transfer/TranslogTransferManager.java b/server/src/main/java/org/opensearch/index/translog/transfer/TranslogTransferManager.java index 56a9aa6447dec..291218ea47499 100644 --- a/server/src/main/java/org/opensearch/index/translog/transfer/TranslogTransferManager.java +++ b/server/src/main/java/org/opensearch/index/translog/transfer/TranslogTransferManager.java @@ -545,6 +545,14 @@ public void onFailure(Exception e) { }); } + public Set listPrimaryTermsInRemote() throws IOException { + Set primaryTermsStr = transferService.listFolders(remoteDataTransferPath); + if (primaryTermsStr != null) { + return primaryTermsStr.stream().map(Long::parseLong).collect(Collectors.toSet()); + } + return new HashSet<>(); + } + /** * Handles deletion of all translog files associated with a primary term. * diff --git a/server/src/main/java/org/opensearch/index/translog/transfer/TranslogTransferMetadata.java b/server/src/main/java/org/opensearch/index/translog/transfer/TranslogTransferMetadata.java index 745fa9a8a219a..3b8885055e8f7 100644 --- a/server/src/main/java/org/opensearch/index/translog/transfer/TranslogTransferMetadata.java +++ b/server/src/main/java/org/opensearch/index/translog/transfer/TranslogTransferMetadata.java @@ -19,6 +19,7 @@ import java.util.Arrays; import java.util.Map; import java.util.Objects; +import java.util.Optional; /** * The metadata associated with every transfer {@link TransferSnapshot}. The metadata is uploaded at the end of the @@ -108,11 +109,28 @@ public String getFileName() { RemoteStoreUtils.invertLong(createdAt), String.valueOf(Objects.hash(nodeId)), RemoteStoreUtils.invertLong(minTranslogGeneration), + String.valueOf(getMinPrimaryTermReferred()), String.valueOf(CURRENT_VERSION) ) ); } + private long getMinPrimaryTermReferred() { + if (generationToPrimaryTermMapper.get() == null || generationToPrimaryTermMapper.get().values().isEmpty()) { + return -1; + } + Optional minPrimaryTerm = generationToPrimaryTermMapper.get() + .values() + .stream() + .map(s -> Long.parseLong(s)) + .min(Long::compareTo); + if (minPrimaryTerm.isPresent()) { + return minPrimaryTerm.get(); + } else { + return -1; + } + } + public static Tuple, String> getNodeIdByPrimaryTermAndGeneration(String filename) { String[] tokens = filename.split(METADATA_SEPARATOR); if (tokens.length < 6) { @@ -143,15 +161,43 @@ public static Tuple getMinMaxTranslogGenerationFromFilename(String f assert Version.CURRENT.onOrAfter(Version.V_2_17_0); try { // instead of direct index, we go backwards to avoid running into same separator in nodeId - String minGeneration = tokens[tokens.length - 2]; + String minGeneration = tokens[tokens.length - 3]; String maxGeneration = tokens[2]; return new Tuple<>(RemoteStoreUtils.invertLong(minGeneration), RemoteStoreUtils.invertLong(maxGeneration)); - } catch (NumberFormatException e) { + } catch (Exception e) { logger.error(() -> new ParameterizedMessage("Exception while getting min and max translog generation from: {}", filename), e); return null; } } + public static Tuple getMinMaxPrimaryTermFromFilename(String filename) { + String[] tokens = filename.split(METADATA_SEPARATOR); + if (tokens.length < 7) { + // For versions < 2.17, we don't have min primary term. + return null; + } + assert Version.CURRENT.onOrAfter(Version.V_2_17_0); + try { + // instead of direct index, we go backwards to avoid running into same separator in nodeId + String minPrimaryTerm = tokens[tokens.length - 2]; + String maxPrimaryTerm = tokens[1]; + return new Tuple<>(Long.parseLong(minPrimaryTerm), RemoteStoreUtils.invertLong(maxPrimaryTerm)); + } catch (Exception e) { + logger.error(() -> new ParameterizedMessage("Exception while getting min and max primary term from: {}", filename), e); + return null; + } + } + + public static long getPrimaryTermFromFileName(String filename) { + String[] tokens = filename.split(METADATA_SEPARATOR); + try { + return RemoteStoreUtils.invertLong(tokens[1]); + } catch (Exception e) { + logger.error(() -> new ParameterizedMessage("Exception while getting max primary term from: {}", filename), e); + return -1; + } + } + @Override public int hashCode() { return Objects.hash(primaryTerm, generation); diff --git a/server/src/main/java/org/opensearch/indices/IndicesService.java b/server/src/main/java/org/opensearch/indices/IndicesService.java index be16d4ea184fa..4593aedfe1f83 100644 --- a/server/src/main/java/org/opensearch/indices/IndicesService.java +++ b/server/src/main/java/org/opensearch/indices/IndicesService.java @@ -359,6 +359,7 @@ public class IndicesService extends AbstractLifecycleComponent private final SearchRequestStats searchRequestStats; private final FileCache fileCache; private final CompositeIndexSettings compositeIndexSettings; + private final Consumer replicator; @Override protected void doStart() { @@ -395,7 +396,8 @@ public IndicesService( CacheService cacheService, RemoteStoreSettings remoteStoreSettings, FileCache fileCache, - CompositeIndexSettings compositeIndexSettings + CompositeIndexSettings compositeIndexSettings, + Consumer replicator ) { this.settings = settings; this.threadPool = threadPool; @@ -504,6 +506,7 @@ protected void closeInternal() { this.remoteStoreSettings = remoteStoreSettings; this.compositeIndexSettings = compositeIndexSettings; this.fileCache = fileCache; + this.replicator = replicator; } public IndicesService( @@ -564,6 +567,7 @@ public IndicesService( cacheService, remoteStoreSettings, null, + null, null ); } @@ -980,7 +984,8 @@ private synchronized IndexService createIndexService( translogFactorySupplier, this::getClusterDefaultRefreshInterval, this.recoverySettings, - this.remoteStoreSettings + this.remoteStoreSettings, + replicator ); } diff --git a/server/src/main/java/org/opensearch/indices/NodeIndicesStats.java b/server/src/main/java/org/opensearch/indices/NodeIndicesStats.java index 83a759cdb71c5..4c28c08d8061b 100644 --- a/server/src/main/java/org/opensearch/indices/NodeIndicesStats.java +++ b/server/src/main/java/org/opensearch/indices/NodeIndicesStats.java @@ -83,7 +83,7 @@ public class NodeIndicesStats implements Writeable, ToXContentFragment { public NodeIndicesStats(StreamInput in) throws IOException { stats = new CommonStats(in); - if (in.getVersion().onOrAfter(Version.V_3_0_0)) { + if (in.getVersion().onOrAfter(Version.V_2_17_0)) { // contains statsByIndex if (in.readBoolean()) { statsByIndex = readStatsByIndex(in); @@ -284,7 +284,7 @@ public RecoveryStats getRecoveryStats() { public void writeTo(StreamOutput out) throws IOException { stats.writeTo(out); - if (out.getVersion().onOrAfter(Version.V_3_0_0)) { + if (out.getVersion().onOrAfter(Version.V_2_17_0)) { out.writeBoolean(statsByIndex != null); if (statsByIndex != null) { writeStatsByIndex(out); diff --git a/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationTargetService.java b/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationTargetService.java index a042e08c2a53f..8fee3f671ecc9 100644 --- a/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationTargetService.java +++ b/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationTargetService.java @@ -157,6 +157,7 @@ public SegmentReplicationTargetService( ForceSyncRequest::new, new ForceSyncTransportRequestHandler() ); + replicator.setSourceFactory(sourceFactory); } @Override diff --git a/server/src/main/java/org/opensearch/indices/replication/SegmentReplicator.java b/server/src/main/java/org/opensearch/indices/replication/SegmentReplicator.java index 417556c22636e..ad3bc1933208c 100644 --- a/server/src/main/java/org/opensearch/indices/replication/SegmentReplicator.java +++ b/server/src/main/java/org/opensearch/indices/replication/SegmentReplicator.java @@ -13,6 +13,7 @@ import org.apache.logging.log4j.message.ParameterizedMessage; import org.apache.lucene.index.CorruptIndexException; import org.opensearch.OpenSearchCorruptionException; +import org.opensearch.common.SetOnce; import org.opensearch.common.unit.TimeValue; import org.opensearch.common.util.concurrent.AbstractRunnable; import org.opensearch.common.util.concurrent.ConcurrentCollections; @@ -44,12 +45,44 @@ public class SegmentReplicator { private final Map completedReplications = ConcurrentCollections.newConcurrentMap(); private final ThreadPool threadPool; + private final SetOnce sourceFactory; + public SegmentReplicator(ThreadPool threadPool) { this.onGoingReplications = new ReplicationCollection<>(logger, threadPool); this.threadPool = threadPool; + this.sourceFactory = new SetOnce<>(); + } + + /** + * Starts a replication event for the given shard. + * @param shard - {@link IndexShard} replica shard + */ + public void startReplication(IndexShard shard) { + if (sourceFactory.get() == null) return; + startReplication( + shard, + shard.getLatestReplicationCheckpoint(), + sourceFactory.get().get(shard), + new SegmentReplicationTargetService.SegmentReplicationListener() { + @Override + public void onReplicationDone(SegmentReplicationState state) { + logger.trace("Completed replication for {}", shard.shardId()); + } + + @Override + public void onReplicationFailure(SegmentReplicationState state, ReplicationFailedException e, boolean sendShardFailure) { + logger.error(() -> new ParameterizedMessage("Failed segment replication for {}", shard.shardId()), e); + if (sendShardFailure) { + shard.failShard("unrecoverable replication failure", e); + } + } + } + ); } - // TODO: Add public entrypoint for replication on an interval to be invoked via IndexService + void setSourceFactory(SegmentReplicationSourceFactory sourceFactory) { + this.sourceFactory.set(sourceFactory); + } /** * Start a round of replication and sync to at least the given checkpoint. diff --git a/server/src/main/java/org/opensearch/node/Node.java b/server/src/main/java/org/opensearch/node/Node.java index c55c2ab683b11..b143333954338 100644 --- a/server/src/main/java/org/opensearch/node/Node.java +++ b/server/src/main/java/org/opensearch/node/Node.java @@ -897,6 +897,7 @@ protected Node( remoteStoreStatsTrackerFactory = new RemoteStoreStatsTrackerFactory(clusterService, settings); CacheModule cacheModule = new CacheModule(pluginsService.filterPlugins(CachePlugin.class), settings); CacheService cacheService = cacheModule.getCacheService(); + final SegmentReplicator segmentReplicator = new SegmentReplicator(threadPool); final IndicesService indicesService = new IndicesService( settings, pluginsService, @@ -926,7 +927,8 @@ protected Node( cacheService, remoteStoreSettings, fileCache, - compositeIndexSettings + compositeIndexSettings, + segmentReplicator::startReplication ); final IngestService ingestService = new IngestService( @@ -1439,7 +1441,7 @@ protected Node( new SegmentReplicationSourceFactory(transportService, recoverySettings, clusterService), indicesService, clusterService, - new SegmentReplicator(threadPool) + segmentReplicator ) ); b.bind(SegmentReplicationSourceService.class) @@ -1474,6 +1476,7 @@ protected Node( b.bind(PersistedStateRegistry.class).toInstance(persistedStateRegistry); b.bind(SegmentReplicationStatsTracker.class).toInstance(segmentReplicationStatsTracker); b.bind(SearchRequestOperationsCompositeListenerFactory.class).toInstance(searchRequestOperationsCompositeListenerFactory); + b.bind(SegmentReplicator.class).toInstance(segmentReplicator); taskManagerClientOptional.ifPresent(value -> b.bind(TaskManagerClient.class).toInstance(value)); }); diff --git a/server/src/main/java/org/opensearch/node/remotestore/RemoteStoreNodeAttribute.java b/server/src/main/java/org/opensearch/node/remotestore/RemoteStoreNodeAttribute.java index efaefd530b34d..55971398634c5 100644 --- a/server/src/main/java/org/opensearch/node/remotestore/RemoteStoreNodeAttribute.java +++ b/server/src/main/java/org/opensearch/node/remotestore/RemoteStoreNodeAttribute.java @@ -55,6 +55,11 @@ public class RemoteStoreNodeAttribute { REMOTE_STORE_TRANSLOG_REPOSITORY_NAME_ATTRIBUTE_KEY ); + public static List REMOTE_CLUSTER_PUBLICATION_REPO_NAME_ATTRIBUTES = List.of( + REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY, + REMOTE_STORE_ROUTING_TABLE_REPOSITORY_NAME_ATTRIBUTE_KEY + ); + /** * Creates a new {@link RemoteStoreNodeAttribute} */ @@ -261,6 +266,14 @@ public boolean equalsWithRepoSkip(Object o, List reposToSkip) { return this.getRepositoriesMetadata().equalsIgnoreGenerationsWithRepoSkip(that.getRepositoriesMetadata(), reposToSkip); } + public boolean equalsForRepositories(Object otherNode, List repositoryToValidate) { + if (this == otherNode) return true; + if (otherNode == null || getClass() != otherNode.getClass()) return false; + + RemoteStoreNodeAttribute other = (RemoteStoreNodeAttribute) otherNode; + return this.getRepositoriesMetadata().equalsIgnoreGenerationsForRepo(other.repositoriesMetadata, repositoryToValidate); + } + @Override public boolean equals(Object o) { if (this == o) return true; diff --git a/server/src/main/java/org/opensearch/repositories/IndexId.java b/server/src/main/java/org/opensearch/repositories/IndexId.java index 238dffbb46bde..5a9a757e31cb1 100644 --- a/server/src/main/java/org/opensearch/repositories/IndexId.java +++ b/server/src/main/java/org/opensearch/repositories/IndexId.java @@ -78,7 +78,7 @@ public IndexId(String name, String id, int shardPathType) { public IndexId(final StreamInput in) throws IOException { this.name = in.readString(); this.id = in.readString(); - if (in.getVersion().onOrAfter(Version.CURRENT)) { + if (in.getVersion().onOrAfter(Version.V_2_17_0)) { this.shardPathType = in.readVInt(); } else { this.shardPathType = DEFAULT_SHARD_PATH_TYPE; @@ -145,7 +145,7 @@ private int computeHashCode() { public void writeTo(final StreamOutput out) throws IOException { out.writeString(name); out.writeString(id); - if (out.getVersion().onOrAfter(Version.CURRENT)) { + if (out.getVersion().onOrAfter(Version.V_2_17_0)) { out.writeVInt(shardPathType); } } diff --git a/server/src/main/java/org/opensearch/repositories/RepositoryData.java b/server/src/main/java/org/opensearch/repositories/RepositoryData.java index 7921a9589b443..f52f1fa76f6bf 100644 --- a/server/src/main/java/org/opensearch/repositories/RepositoryData.java +++ b/server/src/main/java/org/opensearch/repositories/RepositoryData.java @@ -551,7 +551,7 @@ public List resolveNewIndices(List indicesToResolve, Map requestShouldUseConcurrentSearch = new SetOnce<>(); private final int maxAggRewriteFilters; private final int cardinalityAggregationPruningThreshold; + private final boolean keywordIndexOrDocValuesEnabled; DefaultSearchContext( ReaderContext readerContext, @@ -256,7 +258,8 @@ final class DefaultSearchContext extends SearchContext { this.searcher, request::nowInMillis, shardTarget.getClusterAlias(), - validate + validate, + evaluateKeywordIndexOrDocValuesEnabled() ); queryBoost = request.indexBoost(); this.lowLevelCancellation = lowLevelCancellation; @@ -265,6 +268,7 @@ final class DefaultSearchContext extends SearchContext { this.maxAggRewriteFilters = evaluateFilterRewriteSetting(); this.cardinalityAggregationPruningThreshold = evaluateCardinalityAggregationPruningThreshold(); this.concurrentSearchDeciders = concurrentSearchDeciders; + this.keywordIndexOrDocValuesEnabled = evaluateKeywordIndexOrDocValuesEnabled(); } @Override @@ -1117,10 +1121,22 @@ public int cardinalityAggregationPruningThreshold() { return cardinalityAggregationPruningThreshold; } + @Override + public boolean keywordIndexOrDocValuesEnabled() { + return keywordIndexOrDocValuesEnabled; + } + private int evaluateCardinalityAggregationPruningThreshold() { if (clusterService != null) { return clusterService.getClusterSettings().get(CARDINALITY_AGGREGATION_PRUNING_THRESHOLD); } return 0; } + + public boolean evaluateKeywordIndexOrDocValuesEnabled() { + if (clusterService != null) { + return clusterService.getClusterSettings().get(KEYWORD_INDEX_OR_DOC_VALUES_ENABLED); + } + return false; + } } diff --git a/server/src/main/java/org/opensearch/search/SearchService.java b/server/src/main/java/org/opensearch/search/SearchService.java index 39c0c19a978e5..e2a804a674d8f 100644 --- a/server/src/main/java/org/opensearch/search/SearchService.java +++ b/server/src/main/java/org/opensearch/search/SearchService.java @@ -337,6 +337,13 @@ public class SearchService extends AbstractLifecycleComponent implements IndexEv Property.NodeScope ); + public static final Setting KEYWORD_INDEX_OR_DOC_VALUES_ENABLED = Setting.boolSetting( + "search.keyword_index_or_doc_values_enabled", + false, + Property.Dynamic, + Property.NodeScope + ); + public static final int DEFAULT_SIZE = 10; public static final int DEFAULT_FROM = 0; @@ -1173,6 +1180,7 @@ private DefaultSearchContext createSearchContext(ReaderContext reader, ShardSear context.getIndexSettings().isDerivedFieldAllowed() && allowDerivedField ); context.setDerivedFieldResolver(derivedFieldResolver); + context.setKeywordFieldIndexOrDocValuesEnabled(searchContext.keywordIndexOrDocValuesEnabled()); searchContext.getQueryShardContext().setDerivedFieldResolver(derivedFieldResolver); Rewriteable.rewrite(request.getRewriteable(), context, true); assert searchContext.getQueryShardContext().isCacheable(); diff --git a/server/src/main/java/org/opensearch/search/approximate/ApproximatePointRangeQuery.java b/server/src/main/java/org/opensearch/search/approximate/ApproximatePointRangeQuery.java index cee8bc43d7ffd..8076da6ab970b 100644 --- a/server/src/main/java/org/opensearch/search/approximate/ApproximatePointRangeQuery.java +++ b/server/src/main/java/org/opensearch/search/approximate/ApproximatePointRangeQuery.java @@ -146,10 +146,11 @@ public void grow(int count) { public void visit(int docID) { // it is possible that size < 1024 and docCount < size but we will continue to count through all the 1024 docs // and collect less, but it won't hurt performance - if (docCount[0] < size) { - adder.add(docID); - docCount[0]++; + if (docCount[0] >= size) { + return; } + adder.add(docID); + docCount[0]++; } @Override @@ -231,7 +232,7 @@ private void intersectRight(PointValues.PointTree pointTree, PointValues.Interse public void intersectLeft(PointValues.IntersectVisitor visitor, PointValues.PointTree pointTree, long[] docCount) throws IOException { PointValues.Relation r = visitor.compare(pointTree.getMinPackedValue(), pointTree.getMaxPackedValue()); - if (docCount[0] > size) { + if (docCount[0] >= size) { return; } switch (r) { @@ -279,7 +280,7 @@ public void intersectLeft(PointValues.IntersectVisitor visitor, PointValues.Poin public void intersectRight(PointValues.IntersectVisitor visitor, PointValues.PointTree pointTree, long[] docCount) throws IOException { PointValues.Relation r = visitor.compare(pointTree.getMinPackedValue(), pointTree.getMaxPackedValue()); - if (docCount[0] > size) { + if (docCount[0] >= size) { return; } switch (r) { @@ -435,6 +436,10 @@ public boolean canApproximate(SearchContext context) { if (!(context.query() instanceof ApproximateIndexOrDocValuesQuery)) { return false; } + // size 0 could be set for caching + if (context.from() + context.size() == 0) { + this.setSize(10_000); + } this.setSize(Math.max(context.from() + context.size(), context.trackTotalHitsUpTo())); if (context.request() != null && context.request().source() != null) { FieldSortBuilder primarySortField = FieldSortBuilder.getPrimaryFieldSortOrNull(context.request().source()); diff --git a/server/src/main/java/org/opensearch/search/internal/SearchContext.java b/server/src/main/java/org/opensearch/search/internal/SearchContext.java index bc4b7058651dd..5357206e8c117 100644 --- a/server/src/main/java/org/opensearch/search/internal/SearchContext.java +++ b/server/src/main/java/org/opensearch/search/internal/SearchContext.java @@ -526,4 +526,9 @@ public int maxAggRewriteFilters() { public int cardinalityAggregationPruningThreshold() { return 0; } + + public boolean keywordIndexOrDocValuesEnabled() { + return false; + } + } diff --git a/server/src/main/java/org/opensearch/snapshots/SnapshotsService.java b/server/src/main/java/org/opensearch/snapshots/SnapshotsService.java index c4c77b352addb..a2364d96f2098 100644 --- a/server/src/main/java/org/opensearch/snapshots/SnapshotsService.java +++ b/server/src/main/java/org/opensearch/snapshots/SnapshotsService.java @@ -288,7 +288,7 @@ public void executeSnapshot(final CreateSnapshotRequest request, final ActionLis if (remoteStoreIndexShallowCopy && isSnapshotV2 && request.indices().length == 0 - && clusterService.state().nodes().getMinNodeVersion().onOrAfter(Version.CURRENT)) { + && clusterService.state().nodes().getMinNodeVersion().onOrAfter(Version.V_2_17_0)) { createSnapshotV2(request, listener); } else { createSnapshot( @@ -361,7 +361,7 @@ public ClusterState execute(ClusterState currentState) { logger.trace("[{}][{}] creating snapshot for indices [{}]", repositoryName, snapshotName, indices); - int pathType = clusterService.state().nodes().getMinNodeVersion().onOrAfter(Version.CURRENT) + int pathType = clusterService.state().nodes().getMinNodeVersion().onOrAfter(Version.V_2_17_0) ? SHARD_PATH_TYPE.get(repository.getMetadata().settings()).getCode() : IndexId.DEFAULT_SHARD_PATH_TYPE; final List indexIds = repositoryData.resolveNewIndices( diff --git a/server/src/test/java/org/opensearch/action/admin/indices/segments/IndicesSegmentResponseTests.java b/server/src/test/java/org/opensearch/action/admin/indices/segments/IndicesSegmentResponseTests.java index b90be87e3a600..c01335eb38afc 100644 --- a/server/src/test/java/org/opensearch/action/admin/indices/segments/IndicesSegmentResponseTests.java +++ b/server/src/test/java/org/opensearch/action/admin/indices/segments/IndicesSegmentResponseTests.java @@ -42,7 +42,10 @@ import org.opensearch.index.engine.Segment; import org.opensearch.test.OpenSearchTestCase; +import java.util.ArrayList; import java.util.Collections; +import java.util.List; +import java.util.Map; import static org.opensearch.common.xcontent.XContentFactory.jsonBuilder; @@ -68,4 +71,54 @@ public void testToXContentSerialiationWithSortedFields() throws Exception { response.toXContent(builder, ToXContent.EMPTY_PARAMS); } } + + public void testGetIndices() { + final int totalIndices = 5; + final int shardsPerIndex = 3; + final int segmentsPerShard = 2; + // Preparing a ShardSegments list, which will have (totalIndices * shardsPerIndex) shardSegments. + // Indices will be named -> foo1, foo2, ..., foo{totalIndices} + List shardSegmentsList = new ArrayList<>(); + for (int indexName = 0; indexName < totalIndices; indexName++) { + for (int shardId = 0; shardId < shardsPerIndex; shardId++) { + ShardRouting shardRouting = TestShardRouting.newShardRouting( + "foo" + indexName, + shardId, + "node_id", + true, + ShardRoutingState.STARTED + ); + List segmentList = new ArrayList<>(); + for (int segmentNum = 0; segmentNum < segmentsPerShard; segmentNum++) { + segmentList.add(new Segment("foo" + indexName + shardId + segmentNum)); + } + shardSegmentsList.add(new ShardSegments(shardRouting, segmentList)); + } + } + Collections.shuffle(shardSegmentsList, random()); + + // Prepare the IndicesSegmentResponse object and get the indicesSegments map + IndicesSegmentResponse response = new IndicesSegmentResponse( + shardSegmentsList.toArray(new ShardSegments[0]), + totalIndices * shardsPerIndex, + totalIndices * shardsPerIndex, + 0, + Collections.emptyList() + ); + Map indicesSegments = response.getIndices(); + + assertEquals(totalIndices, indicesSegments.size()); + for (Map.Entry indexSegmentEntry : indicesSegments.entrySet()) { + assertEquals(shardsPerIndex, indexSegmentEntry.getValue().getShards().size()); + for (IndexShardSegments indexShardSegment : indexSegmentEntry.getValue().getShards().values()) { + for (ShardSegments shardSegment : indexShardSegment.getShards()) { + assertEquals(segmentsPerShard, shardSegment.getSegments().size()); + for (int i = 0; i < segmentsPerShard; i++) { + String segmentName = indexSegmentEntry.getKey() + shardSegment.getShardRouting().getId() + i; + assertEquals(segmentName, shardSegment.getSegments().get(i).getName()); + } + } + } + } + } } diff --git a/server/src/test/java/org/opensearch/action/support/clustermanager/TransportClusterManagerNodeActionTests.java b/server/src/test/java/org/opensearch/action/support/clustermanager/TransportClusterManagerNodeActionTests.java index 37e884502b613..3c1c84653b384 100644 --- a/server/src/test/java/org/opensearch/action/support/clustermanager/TransportClusterManagerNodeActionTests.java +++ b/server/src/test/java/org/opensearch/action/support/clustermanager/TransportClusterManagerNodeActionTests.java @@ -85,6 +85,7 @@ import java.util.concurrent.atomic.AtomicBoolean; import static org.opensearch.index.remote.RemoteMigrationIndexMetadataUpdaterTests.createIndexMetadataWithRemoteStoreSettings; +import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_TRANSLOG_REPOSITORY_NAME_ATTRIBUTE_KEY; import static org.opensearch.node.remotestore.RemoteStoreNodeService.REMOTE_STORE_COMPATIBILITY_MODE_SETTING; @@ -901,6 +902,7 @@ public void testDontAllowSwitchingCompatibilityModeForClusterWithMultipleVersion private Map getRemoteStoreNodeAttributes() { Map remoteStoreNodeAttributes = new HashMap<>(); + remoteStoreNodeAttributes.put(REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY, "my-cluster-repo-1"); remoteStoreNodeAttributes.put(REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY, "my-segment-repo-1"); remoteStoreNodeAttributes.put(REMOTE_STORE_TRANSLOG_REPOSITORY_NAME_ATTRIBUTE_KEY, "my-translog-repo-1"); return remoteStoreNodeAttributes; diff --git a/server/src/test/java/org/opensearch/cluster/coordination/JoinTaskExecutorTests.java b/server/src/test/java/org/opensearch/cluster/coordination/JoinTaskExecutorTests.java index 9f463673aa6a6..d192a2556c36b 100644 --- a/server/src/test/java/org/opensearch/cluster/coordination/JoinTaskExecutorTests.java +++ b/server/src/test/java/org/opensearch/cluster/coordination/JoinTaskExecutorTests.java @@ -611,6 +611,100 @@ public void testJoinClusterWithRemoteStateNodeJoiningRemoteStateCluster() { JoinTaskExecutor.ensureNodesCompatibility(joiningNode, currentState.getNodes(), currentState.metadata()); } + public void testJoinRemotePublicationClusterWithNonRemoteNodes() { + final DiscoveryNode existingNode = new DiscoveryNode( + UUIDs.base64UUID(), + buildNewFakeTransportAddress(), + remotePublicationNodeAttributes(), + DiscoveryNodeRole.BUILT_IN_ROLES, + Version.CURRENT + ); + ClusterState currentState = ClusterState.builder(ClusterName.DEFAULT) + .nodes(DiscoveryNodes.builder().add(existingNode).localNodeId(existingNode.getId()).build()) + .build(); + + DiscoveryNode joiningNode = newDiscoveryNode(new HashMap<>()); + JoinTaskExecutor.ensureNodesCompatibility(joiningNode, currentState.getNodes(), currentState.metadata()); + } + + public void testJoinRemotePublicationCluster() { + final DiscoveryNode existingNode = new DiscoveryNode( + UUIDs.base64UUID(), + buildNewFakeTransportAddress(), + remotePublicationNodeAttributes(), + DiscoveryNodeRole.BUILT_IN_ROLES, + Version.CURRENT + ); + ClusterState currentState = ClusterState.builder(ClusterName.DEFAULT) + .nodes(DiscoveryNodes.builder().add(existingNode).localNodeId(existingNode.getId()).build()) + .build(); + + DiscoveryNode joiningNode = newDiscoveryNode(remotePublicationNodeAttributes()); + JoinTaskExecutor.ensureNodesCompatibility(joiningNode, currentState.getNodes(), currentState.metadata()); + } + + public void testJoinRemotePubClusterWithRemoteStoreNodes() { + final DiscoveryNode existingNode = new DiscoveryNode( + UUIDs.base64UUID(), + buildNewFakeTransportAddress(), + remotePublicationNodeAttributes(), + DiscoveryNodeRole.BUILT_IN_ROLES, + Version.CURRENT + ); + ClusterState currentState = ClusterState.builder(ClusterName.DEFAULT) + .nodes(DiscoveryNodes.builder().add(existingNode).localNodeId(existingNode.getId()).build()) + .build(); + + Map newNodeAttributes = new HashMap<>(); + newNodeAttributes.putAll(remoteStateNodeAttributes(CLUSTER_STATE_REPO)); + newNodeAttributes.putAll(remoteRoutingTableAttributes(ROUTING_TABLE_REPO)); + newNodeAttributes.putAll(remoteStoreNodeAttributes(SEGMENT_REPO, TRANSLOG_REPO)); + + DiscoveryNode joiningNode = newDiscoveryNode(newNodeAttributes); + Exception e = assertThrows( + IllegalStateException.class, + () -> JoinTaskExecutor.ensureNodesCompatibility(joiningNode, currentState.getNodes(), currentState.metadata()) + ); + assertTrue(e.getMessage().equals("a remote store node [" + joiningNode + "] is trying to join a non remote store cluster")); + } + + public void testPreventJoinRemotePublicationClusterWithIncompatibleAttributes() { + Map existingNodeAttributes = remotePublicationNodeAttributes(); + Map remoteStoreNodeAttributes = remotePublicationNodeAttributes(); + final DiscoveryNode existingNode = new DiscoveryNode( + UUIDs.base64UUID(), + buildNewFakeTransportAddress(), + existingNodeAttributes, + DiscoveryNodeRole.BUILT_IN_ROLES, + Version.CURRENT + ); + ClusterState currentState = ClusterState.builder(ClusterName.DEFAULT) + .nodes(DiscoveryNodes.builder().add(existingNode).localNodeId(existingNode.getId()).build()) + .build(); + + for (Map.Entry nodeAttribute : existingNodeAttributes.entrySet()) { + remoteStoreNodeAttributes.put(nodeAttribute.getKey(), null); + DiscoveryNode joiningNode = newDiscoveryNode(remoteStoreNodeAttributes); + Exception e = assertThrows( + IllegalStateException.class, + () -> JoinTaskExecutor.ensureNodesCompatibility(joiningNode, currentState.getNodes(), currentState.metadata()) + ); + assertTrue( + e.getMessage().equals("joining node [" + joiningNode + "] doesn't have the node attribute [" + nodeAttribute.getKey() + "]") + || e.getMessage() + .equals( + "a remote store node [" + + joiningNode + + "] is trying to join a remote store cluster with incompatible node attributes in comparison with existing node [" + + currentState.getNodes().getNodes().values().stream().findFirst().get() + + "]" + ) + ); + + remoteStoreNodeAttributes.put(nodeAttribute.getKey(), nodeAttribute.getValue()); + } + } + public void testPreventJoinClusterWithRemoteStateNodeJoiningRemoteStoreCluster() { Map existingNodeAttributes = remoteStoreNodeAttributes(SEGMENT_REPO, TRANSLOG_REPO); final DiscoveryNode existingNode = new DiscoveryNode( @@ -628,16 +722,7 @@ public void testPreventJoinClusterWithRemoteStateNodeJoiningRemoteStoreCluster() IllegalStateException.class, () -> JoinTaskExecutor.ensureNodesCompatibility(joiningNode, currentState.getNodes(), currentState.metadata()) ); - assertTrue( - e.getMessage() - .equals( - "a remote store node [" - + joiningNode - + "] is trying to join a remote store cluster with incompatible node attributes in comparison with existing node [" - + currentState.getNodes().getNodes().values().stream().findFirst().get() - + "]" - ) - ); + assertTrue(e.getMessage().equals("a non remote store node [" + joiningNode + "] is trying to join a remote store cluster")); } public void testPreventJoinClusterWithRemoteStoreNodeJoiningRemoteStateCluster() { @@ -657,16 +742,7 @@ public void testPreventJoinClusterWithRemoteStoreNodeJoiningRemoteStateCluster() IllegalStateException.class, () -> JoinTaskExecutor.ensureNodesCompatibility(joiningNode, currentState.getNodes(), currentState.metadata()) ); - assertTrue( - e.getMessage() - .equals( - "a remote store node [" - + joiningNode - + "] is trying to join a remote store cluster with incompatible node attributes in comparison with existing node [" - + currentState.getNodes().getNodes().values().stream().findFirst().get() - + "]" - ) - ); + assertTrue(e.getMessage().equals("a remote store node [" + joiningNode + "] is trying to join a non remote store cluster")); } public void testUpdatesClusterStateWithSingleNodeCluster() throws Exception { @@ -1077,6 +1153,39 @@ public void testRemoteRoutingTableNodeJoinNodeWithRemoteAndRoutingRepoDifference JoinTaskExecutor.ensureNodesCompatibility(joiningNode, currentState.getNodes(), currentState.metadata()); } + public void testJoinRemoteStoreClusterWithRemotePublicationNodeInMixedMode() { + final DiscoveryNode remoteStoreNode = new DiscoveryNode( + UUIDs.base64UUID(), + buildNewFakeTransportAddress(), + remoteStoreNodeAttributes(SEGMENT_REPO, TRANSLOG_REPO), + DiscoveryNodeRole.BUILT_IN_ROLES, + Version.CURRENT + ); + + final DiscoveryNode nonRemoteStoreNode = new DiscoveryNode( + UUIDs.base64UUID(), + buildNewFakeTransportAddress(), + new HashMap<>(), + DiscoveryNodeRole.BUILT_IN_ROLES, + Version.CURRENT + ); + + final Settings settings = Settings.builder() + .put(MIGRATION_DIRECTION_SETTING.getKey(), RemoteStoreNodeService.Direction.REMOTE_STORE) + .put(REMOTE_STORE_COMPATIBILITY_MODE_SETTING.getKey(), "mixed") + .build(); + final Settings nodeSettings = Settings.builder().put(REMOTE_STORE_MIGRATION_EXPERIMENTAL, "true").build(); + FeatureFlags.initializeFeatureFlags(nodeSettings); + Metadata metadata = Metadata.builder().persistentSettings(settings).build(); + ClusterState currentState = ClusterState.builder(ClusterName.DEFAULT) + .nodes(DiscoveryNodes.builder().add(remoteStoreNode).add(nonRemoteStoreNode).localNodeId(remoteStoreNode.getId()).build()) + .metadata(metadata) + .build(); + + DiscoveryNode joiningNode = newDiscoveryNode(remotePublicationNodeAttributes()); + JoinTaskExecutor.ensureNodesCompatibility(joiningNode, currentState.getNodes(), currentState.metadata()); + } + private void validateRepositoryMetadata(ClusterState updatedState, DiscoveryNode existingNode, int expectedRepositories) throws Exception { @@ -1115,6 +1224,7 @@ private DiscoveryNode newDiscoveryNode(Map attributes) { } private static final String SEGMENT_REPO = "segment-repo"; + private static final String TRANSLOG_REPO = "translog-repo"; private static final String CLUSTER_STATE_REPO = "cluster-state-repo"; private static final String COMMON_REPO = "remote-repo"; @@ -1161,6 +1271,13 @@ private Map remoteStoreNodeAttributes(String segmentRepoName, St }; } + private Map remotePublicationNodeAttributes() { + Map existingNodeAttributes = new HashMap<>(); + existingNodeAttributes.putAll(remoteStateNodeAttributes(CLUSTER_STATE_REPO)); + existingNodeAttributes.putAll(remoteRoutingTableAttributes(ROUTING_TABLE_REPO)); + return existingNodeAttributes; + } + private Map remoteStateNodeAttributes(String clusterStateRepo) { String clusterStateRepositoryTypeAttributeKey = String.format( Locale.getDefault(), diff --git a/server/src/test/java/org/opensearch/cluster/metadata/MetadataCreateIndexServiceTests.java b/server/src/test/java/org/opensearch/cluster/metadata/MetadataCreateIndexServiceTests.java index 410c8a3d41940..3f223706819b7 100644 --- a/server/src/test/java/org/opensearch/cluster/metadata/MetadataCreateIndexServiceTests.java +++ b/server/src/test/java/org/opensearch/cluster/metadata/MetadataCreateIndexServiceTests.java @@ -137,12 +137,10 @@ import static java.util.Collections.singleton; import static java.util.Collections.singletonList; import static org.opensearch.cluster.metadata.IndexMetadata.INDEX_NUMBER_OF_ROUTING_SHARDS_SETTING; -import static org.opensearch.cluster.metadata.IndexMetadata.INDEX_NUMBER_OF_SEARCH_REPLICAS_SETTING; import static org.opensearch.cluster.metadata.IndexMetadata.INDEX_NUMBER_OF_SHARDS_SETTING; import static org.opensearch.cluster.metadata.IndexMetadata.INDEX_READ_ONLY_BLOCK; import static org.opensearch.cluster.metadata.IndexMetadata.INDEX_REPLICATION_TYPE_SETTING; import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_REPLICAS; -import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_SEARCH_REPLICAS; import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_SHARDS; import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_READ_ONLY; import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_REMOTE_SEGMENT_STORE_REPOSITORY; @@ -156,7 +154,6 @@ import static org.opensearch.cluster.metadata.MetadataCreateIndexService.getIndexNumberOfRoutingShards; import static org.opensearch.cluster.metadata.MetadataCreateIndexService.parseV1Mappings; import static org.opensearch.cluster.metadata.MetadataCreateIndexService.resolveAndValidateAliases; -import static org.opensearch.common.util.FeatureFlags.READER_WRITER_SPLIT_EXPERIMENTAL; import static org.opensearch.common.util.FeatureFlags.REMOTE_STORE_MIGRATION_EXPERIMENTAL; import static org.opensearch.index.IndexModule.INDEX_STORE_TYPE_SETTING; import static org.opensearch.index.IndexSettings.INDEX_MERGE_POLICY; @@ -171,6 +168,7 @@ import static org.opensearch.indices.IndicesService.CLUSTER_REPLICATION_TYPE_SETTING; import static org.opensearch.indices.ShardLimitValidatorTests.createTestShardLimitService; import static org.opensearch.node.Node.NODE_ATTRIBUTES; +import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_TRANSLOG_REPOSITORY_NAME_ATTRIBUTE_KEY; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.getRemoteStoreTranslogRepo; @@ -1657,7 +1655,12 @@ public void testNewIndexIsRemoteStoreBackedForRemoteStoreDirectionAndMixedMode() null ); - Map missingTranslogAttribute = Map.of(REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY, "my-segment-repo-1"); + Map missingTranslogAttribute = Map.of( + REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY, + "cluster-state-repo-1", + REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY, + "my-segment-repo-1" + ); DiscoveryNodes finalDiscoveryNodes = DiscoveryNodes.builder() .add(nonRemoteClusterManagerNode) @@ -2501,71 +2504,6 @@ public void testApplyContextWithSettingsOverlap() throws IOException { } } - public void testDefaultSearchReplicasSetting() { - FeatureFlags.initializeFeatureFlags(Settings.builder().put(READER_WRITER_SPLIT_EXPERIMENTAL, Boolean.TRUE).build()); - Settings templateSettings = Settings.EMPTY; - request = new CreateIndexClusterStateUpdateRequest("create index", "test", "test"); - final Settings.Builder requestSettings = Settings.builder(); - request.settings(requestSettings.build()); - Settings indexSettings = aggregateIndexSettings( - ClusterState.EMPTY_STATE, - request, - templateSettings, - null, - Settings.EMPTY, - IndexScopedSettings.DEFAULT_SCOPED_SETTINGS, - randomShardLimitService(), - Collections.emptySet(), - clusterSettings - ); - assertFalse(INDEX_NUMBER_OF_SEARCH_REPLICAS_SETTING.exists(indexSettings)); - } - - public void testSearchReplicasValidationWithSegmentReplication() { - FeatureFlags.initializeFeatureFlags(Settings.builder().put(READER_WRITER_SPLIT_EXPERIMENTAL, Boolean.TRUE).build()); - Settings templateSettings = Settings.builder().put(SETTING_REPLICATION_TYPE, ReplicationType.SEGMENT).build(); - request = new CreateIndexClusterStateUpdateRequest("create index", "test", "test"); - final Settings.Builder requestSettings = Settings.builder().put(SETTING_NUMBER_OF_SEARCH_REPLICAS, 2); - request.settings(requestSettings.build()); - Settings indexSettings = aggregateIndexSettings( - ClusterState.EMPTY_STATE, - request, - templateSettings, - null, - Settings.EMPTY, - IndexScopedSettings.DEFAULT_SCOPED_SETTINGS, - randomShardLimitService(), - Collections.emptySet(), - clusterSettings - ); - assertEquals("2", indexSettings.get(SETTING_NUMBER_OF_SEARCH_REPLICAS)); - assertEquals(ReplicationType.SEGMENT.toString(), indexSettings.get(SETTING_REPLICATION_TYPE)); - } - - public void testSearchReplicasValidationWithDocumentReplication() { - FeatureFlags.initializeFeatureFlags(Settings.builder().put(READER_WRITER_SPLIT_EXPERIMENTAL, Boolean.TRUE).build()); - Settings templateSettings = Settings.builder().put(SETTING_REPLICATION_TYPE, ReplicationType.DOCUMENT).build(); - request = new CreateIndexClusterStateUpdateRequest("create index", "test", "test"); - final Settings.Builder requestSettings = Settings.builder().put(SETTING_NUMBER_OF_SEARCH_REPLICAS, 2); - request.settings(requestSettings.build()); - - IllegalArgumentException exception = expectThrows( - IllegalArgumentException.class, - () -> aggregateIndexSettings( - ClusterState.EMPTY_STATE, - request, - templateSettings, - null, - Settings.EMPTY, - IndexScopedSettings.DEFAULT_SCOPED_SETTINGS, - randomShardLimitService(), - Collections.emptySet(), - clusterSettings - ) - ); - assertEquals("To set index.number_of_search_only_replicas, index.replication.type must be set to SEGMENT", exception.getMessage()); - } - private IndexTemplateMetadata addMatchingTemplate(Consumer configurator) { IndexTemplateMetadata.Builder builder = templateMetadataBuilder("template1", "te*"); configurator.accept(builder); @@ -2627,6 +2565,7 @@ private void verifyRemoteStoreIndexSettings( private DiscoveryNode getRemoteNode() { Map attributes = new HashMap<>(); + attributes.put(REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY, "my-cluster-rep-1"); attributes.put(REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY, "my-segment-repo-1"); attributes.put(REMOTE_STORE_TRANSLOG_REPOSITORY_NAME_ATTRIBUTE_KEY, "my-translog-repo-1"); return new DiscoveryNode( diff --git a/server/src/test/java/org/opensearch/cluster/metadata/SearchOnlyReplicaTests.java b/server/src/test/java/org/opensearch/cluster/metadata/SearchOnlyReplicaTests.java index b1dd397c97218..3d11193a07884 100644 --- a/server/src/test/java/org/opensearch/cluster/metadata/SearchOnlyReplicaTests.java +++ b/server/src/test/java/org/opensearch/cluster/metadata/SearchOnlyReplicaTests.java @@ -24,9 +24,11 @@ import org.opensearch.indices.ShardLimitValidator; import org.opensearch.indices.cluster.ClusterStateChanges; import org.opensearch.indices.replication.common.ReplicationType; -import org.opensearch.test.OpenSearchTestCase; +import org.opensearch.test.OpenSearchSingleNodeTestCase; import org.opensearch.threadpool.TestThreadPool; import org.opensearch.threadpool.ThreadPool; +import org.junit.After; +import org.junit.Before; import java.util.ArrayList; import java.util.Collections; @@ -40,157 +42,194 @@ import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_REPLICAS; import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_SEARCH_REPLICAS; import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_SHARDS; -import static org.opensearch.common.util.FeatureFlags.READER_WRITER_SPLIT_EXPERIMENTAL; -public class SearchOnlyReplicaTests extends OpenSearchTestCase { +public class SearchOnlyReplicaTests extends OpenSearchSingleNodeTestCase { - public void testUpdateSearchReplicaCount() { - FeatureFlags.initializeFeatureFlags(Settings.builder().put(READER_WRITER_SPLIT_EXPERIMENTAL, "true").build()); - final ThreadPool threadPool = new TestThreadPool(getClass().getName()); + private ThreadPool threadPool; + + @Before + public void setUp() throws Exception { + super.setUp(); + this.threadPool = new TestThreadPool(getClass().getName()); + } + + @After + public void tearDown() throws Exception { + super.tearDown(); + terminate(threadPool); + } + + @Override + protected Settings featureFlagSettings() { + return Settings.builder() + .put(super.featureFlagSettings()) + .put(FeatureFlags.READER_WRITER_SPLIT_EXPERIMENTAL_SETTING.getKey(), true) + .build(); + } + + public void testCreateWithDefaultSearchReplicasSetting() { final ClusterStateChanges cluster = new ClusterStateChanges(xContentRegistry(), threadPool); + ClusterState state = createIndexWithSettings(cluster, Settings.builder().build()); + IndexShardRoutingTable indexShardRoutingTable = state.getRoutingTable().index("index").getShards().get(0); + assertEquals(1, indexShardRoutingTable.replicaShards().size()); + assertEquals(0, indexShardRoutingTable.searchOnlyReplicas().size()); + assertEquals(1, indexShardRoutingTable.writerReplicas().size()); + } - try { - List allNodes = new ArrayList<>(); - // node for primary/local - DiscoveryNode localNode = createNode(Version.CURRENT, DiscoveryNodeRole.CLUSTER_MANAGER_ROLE, DiscoveryNodeRole.DATA_ROLE); - allNodes.add(localNode); - // node for search replicas - we'll start with 1 and add another - for (int i = 0; i < 2; i++) { - allNodes.add(createNode(Version.CURRENT, DiscoveryNodeRole.CLUSTER_MANAGER_ROLE, DiscoveryNodeRole.DATA_ROLE)); - } - ClusterState state = ClusterStateCreationUtils.state(localNode, localNode, allNodes.toArray(new DiscoveryNode[0])); - - CreateIndexRequest request = new CreateIndexRequest( - "index", + public void testSearchReplicasValidationWithDocumentReplication() { + final ClusterStateChanges cluster = new ClusterStateChanges(xContentRegistry(), threadPool); + RuntimeException exception = expectThrows( + RuntimeException.class, + () -> createIndexWithSettings( + cluster, Settings.builder() .put(SETTING_NUMBER_OF_SHARDS, 1) .put(SETTING_NUMBER_OF_REPLICAS, 0) - .put(INDEX_REPLICATION_TYPE_SETTING.getKey(), ReplicationType.SEGMENT) + .put(INDEX_REPLICATION_TYPE_SETTING.getKey(), ReplicationType.DOCUMENT) .put(SETTING_NUMBER_OF_SEARCH_REPLICAS, 1) .build() - ).waitForActiveShards(ActiveShardCount.NONE); - state = cluster.createIndex(state, request); - assertTrue(state.metadata().hasIndex("index")); - rerouteUntilActive(state, cluster); - IndexShardRoutingTable indexShardRoutingTable = state.getRoutingTable().index("index").getShards().get(0); - assertEquals(1, indexShardRoutingTable.replicaShards().size()); - assertEquals(1, indexShardRoutingTable.searchOnlyReplicas().size()); - assertEquals(0, indexShardRoutingTable.writerReplicas().size()); - - // add another replica - state = cluster.updateSettings( - state, - new UpdateSettingsRequest("index").settings(Settings.builder().put(SETTING_NUMBER_OF_SEARCH_REPLICAS, 2).build()) - ); - rerouteUntilActive(state, cluster); - indexShardRoutingTable = state.getRoutingTable().index("index").getShards().get(0); - assertEquals(2, indexShardRoutingTable.replicaShards().size()); - assertEquals(2, indexShardRoutingTable.searchOnlyReplicas().size()); - assertEquals(0, indexShardRoutingTable.writerReplicas().size()); - - // remove all replicas - state = cluster.updateSettings( - state, - new UpdateSettingsRequest("index").settings(Settings.builder().put(SETTING_NUMBER_OF_SEARCH_REPLICAS, 0).build()) - ); - rerouteUntilActive(state, cluster); - indexShardRoutingTable = state.getRoutingTable().index("index").getShards().get(0); - assertEquals(0, indexShardRoutingTable.replicaShards().size()); - assertEquals(0, indexShardRoutingTable.searchOnlyReplicas().size()); - assertEquals(0, indexShardRoutingTable.writerReplicas().size()); - } finally { - terminate(threadPool); - } + ) + ); + assertEquals( + "To set index.number_of_search_only_replicas, index.replication.type must be set to SEGMENT", + exception.getCause().getMessage() + ); } - public void testUpdateSearchReplicasOverShardLimit() { - FeatureFlags.initializeFeatureFlags(Settings.builder().put(READER_WRITER_SPLIT_EXPERIMENTAL, "true").build()); - final ThreadPool threadPool = new TestThreadPool(getClass().getName()); + public void testUpdateSearchReplicaCount() { final ClusterStateChanges cluster = new ClusterStateChanges(xContentRegistry(), threadPool); - try { - List allNodes = new ArrayList<>(); - // node for primary/local - DiscoveryNode localNode = createNode(Version.CURRENT, DiscoveryNodeRole.CLUSTER_MANAGER_ROLE, DiscoveryNodeRole.DATA_ROLE); - allNodes.add(localNode); + ClusterState state = createIndexWithSettings( + cluster, + Settings.builder() + .put(SETTING_NUMBER_OF_SHARDS, 1) + .put(SETTING_NUMBER_OF_REPLICAS, 0) + .put(INDEX_REPLICATION_TYPE_SETTING.getKey(), ReplicationType.SEGMENT) + .put(SETTING_NUMBER_OF_SEARCH_REPLICAS, 1) + .build() + ); + assertTrue(state.metadata().hasIndex("index")); + rerouteUntilActive(state, cluster); + IndexShardRoutingTable indexShardRoutingTable = state.getRoutingTable().index("index").getShards().get(0); + assertEquals(1, indexShardRoutingTable.replicaShards().size()); + assertEquals(1, indexShardRoutingTable.searchOnlyReplicas().size()); + assertEquals(0, indexShardRoutingTable.writerReplicas().size()); + + // add another replica + state = cluster.updateSettings( + state, + new UpdateSettingsRequest("index").settings(Settings.builder().put(SETTING_NUMBER_OF_SEARCH_REPLICAS, 2).build()) + ); + rerouteUntilActive(state, cluster); + indexShardRoutingTable = state.getRoutingTable().index("index").getShards().get(0); + assertEquals(2, indexShardRoutingTable.replicaShards().size()); + assertEquals(2, indexShardRoutingTable.searchOnlyReplicas().size()); + assertEquals(0, indexShardRoutingTable.writerReplicas().size()); + + // remove all replicas + state = cluster.updateSettings( + state, + new UpdateSettingsRequest("index").settings(Settings.builder().put(SETTING_NUMBER_OF_SEARCH_REPLICAS, 0).build()) + ); + rerouteUntilActive(state, cluster); + indexShardRoutingTable = state.getRoutingTable().index("index").getShards().get(0); + assertEquals(0, indexShardRoutingTable.replicaShards().size()); + assertEquals(0, indexShardRoutingTable.searchOnlyReplicas().size()); + assertEquals(0, indexShardRoutingTable.writerReplicas().size()); + } + private ClusterState createIndexWithSettings(ClusterStateChanges cluster, Settings settings) { + List allNodes = new ArrayList<>(); + // node for primary/local + DiscoveryNode localNode = createNode(Version.CURRENT, DiscoveryNodeRole.CLUSTER_MANAGER_ROLE, DiscoveryNodeRole.DATA_ROLE); + allNodes.add(localNode); + // node for search replicas - we'll start with 1 and add another + for (int i = 0; i < 2; i++) { allNodes.add(createNode(Version.CURRENT, DiscoveryNodeRole.CLUSTER_MANAGER_ROLE, DiscoveryNodeRole.DATA_ROLE)); + } + ClusterState state = ClusterStateCreationUtils.state(localNode, localNode, allNodes.toArray(new DiscoveryNode[0])); - ClusterState state = ClusterStateCreationUtils.state(localNode, localNode, allNodes.toArray(new DiscoveryNode[0])); + CreateIndexRequest request = new CreateIndexRequest("index", settings).waitForActiveShards(ActiveShardCount.NONE); + state = cluster.createIndex(state, request); + return state; + } - CreateIndexRequest request = new CreateIndexRequest( - "index", - Settings.builder() - .put(SETTING_NUMBER_OF_SHARDS, 1) - .put(SETTING_NUMBER_OF_REPLICAS, 0) - .put(INDEX_REPLICATION_TYPE_SETTING.getKey(), ReplicationType.SEGMENT) - .put(SETTING_NUMBER_OF_SEARCH_REPLICAS, 1) - .build() - ).waitForActiveShards(ActiveShardCount.NONE); - state = cluster.createIndex(state, request); - assertTrue(state.metadata().hasIndex("index")); - rerouteUntilActive(state, cluster); - - // add another replica - ClusterState finalState = state; - Integer maxShardPerNode = ShardLimitValidator.SETTING_CLUSTER_MAX_SHARDS_PER_NODE.getDefault(Settings.EMPTY); - expectThrows( - RuntimeException.class, - () -> cluster.updateSettings( - finalState, - new UpdateSettingsRequest("index").settings( - Settings.builder().put(SETTING_NUMBER_OF_SEARCH_REPLICAS, maxShardPerNode * 2).build() - ) - ) - ); + public void testUpdateSearchReplicasOverShardLimit() { + final ClusterStateChanges cluster = new ClusterStateChanges(xContentRegistry(), threadPool); - } finally { - terminate(threadPool); - } + List allNodes = new ArrayList<>(); + // node for primary/local + DiscoveryNode localNode = createNode(Version.CURRENT, DiscoveryNodeRole.CLUSTER_MANAGER_ROLE, DiscoveryNodeRole.DATA_ROLE); + allNodes.add(localNode); + + allNodes.add(createNode(Version.CURRENT, DiscoveryNodeRole.CLUSTER_MANAGER_ROLE, DiscoveryNodeRole.DATA_ROLE)); + + ClusterState state = ClusterStateCreationUtils.state(localNode, localNode, allNodes.toArray(new DiscoveryNode[0])); + + CreateIndexRequest request = new CreateIndexRequest( + "index", + Settings.builder() + .put(SETTING_NUMBER_OF_SHARDS, 1) + .put(SETTING_NUMBER_OF_REPLICAS, 0) + .put(INDEX_REPLICATION_TYPE_SETTING.getKey(), ReplicationType.SEGMENT) + .put(SETTING_NUMBER_OF_SEARCH_REPLICAS, 1) + .build() + ).waitForActiveShards(ActiveShardCount.NONE); + state = cluster.createIndex(state, request); + assertTrue(state.metadata().hasIndex("index")); + rerouteUntilActive(state, cluster); + + // add another replica + ClusterState finalState = state; + Integer maxShardPerNode = ShardLimitValidator.SETTING_CLUSTER_MAX_SHARDS_PER_NODE.getDefault(Settings.EMPTY); + expectThrows( + RuntimeException.class, + () -> cluster.updateSettings( + finalState, + new UpdateSettingsRequest("index").settings( + Settings.builder().put(SETTING_NUMBER_OF_SEARCH_REPLICAS, maxShardPerNode * 2).build() + ) + ) + ); } public void testUpdateSearchReplicasOnDocrepCluster() { - FeatureFlags.initializeFeatureFlags(Settings.builder().put(READER_WRITER_SPLIT_EXPERIMENTAL, "true").build()); - final ThreadPool threadPool = new TestThreadPool(getClass().getName()); final ClusterStateChanges cluster = new ClusterStateChanges(xContentRegistry(), threadPool); - try { - List allNodes = new ArrayList<>(); - // node for primary/local - DiscoveryNode localNode = createNode(Version.CURRENT, DiscoveryNodeRole.CLUSTER_MANAGER_ROLE, DiscoveryNodeRole.DATA_ROLE); - allNodes.add(localNode); - - allNodes.add(createNode(Version.CURRENT, DiscoveryNodeRole.CLUSTER_MANAGER_ROLE, DiscoveryNodeRole.DATA_ROLE)); - - ClusterState state = ClusterStateCreationUtils.state(localNode, localNode, allNodes.toArray(new DiscoveryNode[0])); - - CreateIndexRequest request = new CreateIndexRequest( - "index", - Settings.builder() - .put(SETTING_NUMBER_OF_SHARDS, 1) - .put(SETTING_NUMBER_OF_REPLICAS, 0) - .put(INDEX_REPLICATION_TYPE_SETTING.getKey(), ReplicationType.DOCUMENT) - .build() - ).waitForActiveShards(ActiveShardCount.NONE); - state = cluster.createIndex(state, request); - assertTrue(state.metadata().hasIndex("index")); - rerouteUntilActive(state, cluster); - - // add another replica - ClusterState finalState = state; - Integer maxShardPerNode = ShardLimitValidator.SETTING_CLUSTER_MAX_SHARDS_PER_NODE.getDefault(Settings.EMPTY); - expectThrows( - RuntimeException.class, - () -> cluster.updateSettings( - finalState, - new UpdateSettingsRequest("index").settings( - Settings.builder().put(SETTING_NUMBER_OF_SEARCH_REPLICAS, maxShardPerNode * 2).build() - ) + List allNodes = new ArrayList<>(); + // node for primary/local + DiscoveryNode localNode = createNode(Version.CURRENT, DiscoveryNodeRole.CLUSTER_MANAGER_ROLE, DiscoveryNodeRole.DATA_ROLE); + allNodes.add(localNode); + + allNodes.add(createNode(Version.CURRENT, DiscoveryNodeRole.CLUSTER_MANAGER_ROLE, DiscoveryNodeRole.DATA_ROLE)); + + ClusterState state = ClusterStateCreationUtils.state(localNode, localNode, allNodes.toArray(new DiscoveryNode[0])); + + CreateIndexRequest request = new CreateIndexRequest( + "index", + Settings.builder() + .put(SETTING_NUMBER_OF_SHARDS, 1) + .put(SETTING_NUMBER_OF_REPLICAS, 0) + .put(INDEX_REPLICATION_TYPE_SETTING.getKey(), ReplicationType.DOCUMENT) + .build() + ).waitForActiveShards(ActiveShardCount.NONE); + state = cluster.createIndex(state, request); + assertTrue(state.metadata().hasIndex("index")); + rerouteUntilActive(state, cluster); + + // add another replica + ClusterState finalState = state; + Integer maxShardPerNode = ShardLimitValidator.SETTING_CLUSTER_MAX_SHARDS_PER_NODE.getDefault(Settings.EMPTY); + expectThrows( + RuntimeException.class, + () -> cluster.updateSettings( + finalState, + new UpdateSettingsRequest("index").settings( + Settings.builder().put(SETTING_NUMBER_OF_SEARCH_REPLICAS, maxShardPerNode * 2).build() ) - ); - } finally { - terminate(threadPool); - } + ) + ); + } private static void rerouteUntilActive(ClusterState state, ClusterStateChanges cluster) { diff --git a/server/src/test/java/org/opensearch/cluster/routing/OperationRoutingTests.java b/server/src/test/java/org/opensearch/cluster/routing/OperationRoutingTests.java index ad8b48d56c417..aaeeb52ab5709 100644 --- a/server/src/test/java/org/opensearch/cluster/routing/OperationRoutingTests.java +++ b/server/src/test/java/org/opensearch/cluster/routing/OperationRoutingTests.java @@ -1118,6 +1118,82 @@ public void testPartialIndexPrimaryDefault() throws Exception { } } + public void testSearchReplicaDefaultRouting() throws Exception { + final int numShards = 1; + final int numReplicas = 2; + final int numSearchReplicas = 2; + final String indexName = "test"; + final String[] indexNames = new String[] { indexName }; + + ClusterService clusterService = null; + ThreadPool threadPool = null; + + try { + OperationRouting opRouting = new OperationRouting( + Settings.builder().put(FeatureFlags.READER_WRITER_SPLIT_EXPERIMENTAL, "true").build(), + new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS) + ); + + ClusterState state = ClusterStateCreationUtils.stateWithAssignedPrimariesAndReplicas( + indexNames, + numShards, + numReplicas, + numSearchReplicas + ); + IndexShardRoutingTable indexShardRoutingTable = state.getRoutingTable().index(indexName).getShards().get(0); + ShardId shardId = indexShardRoutingTable.searchOnlyReplicas().get(0).shardId(); + + threadPool = new TestThreadPool("testSearchReplicaDefaultRouting"); + clusterService = ClusterServiceUtils.createClusterService(threadPool); + + // add a search replica in initializing state: + DiscoveryNode node = new DiscoveryNode( + "node_initializing", + OpenSearchTestCase.buildNewFakeTransportAddress(), + Collections.emptyMap(), + new HashSet<>(DiscoveryNodeRole.BUILT_IN_ROLES), + Version.CURRENT + ); + + IndexMetadata indexMetadata = IndexMetadata.builder(indexName) + .settings(Settings.builder().put(state.metadata().index(indexName).getSettings()).build()) + .numberOfSearchReplicas(3) + .numberOfReplicas(2) + .build(); + Metadata.Builder metadataBuilder = Metadata.builder(state.metadata()).put(indexMetadata, false).generateClusterUuidIfNeeded(); + IndexRoutingTable.Builder indexShardRoutingBuilder = IndexRoutingTable.builder(indexMetadata.getIndex()); + indexShardRoutingBuilder.addIndexShard(indexShardRoutingTable); + indexShardRoutingBuilder.addShard( + TestShardRouting.newShardRouting(shardId, node.getId(), null, false, true, ShardRoutingState.INITIALIZING, null) + ); + state = ClusterState.builder(state) + .routingTable(RoutingTable.builder().add(indexShardRoutingBuilder).build()) + .metadata(metadataBuilder.build()) + .build(); + + // Verify default preference is primary only + GroupShardsIterator groupIterator = opRouting.searchShards(state, indexNames, null, null); + assertThat("one group per shard", groupIterator.size(), equalTo(numShards)); + for (ShardIterator shardIterator : groupIterator) { + assertEquals("We should have 3 shards returned", shardIterator.size(), 3); + int i = 0; + for (ShardRouting shardRouting : shardIterator) { + assertTrue( + "Only search replicas should exist with preference SEARCH_REPLICA", + shardIterator.nextOrNull().isSearchOnly() + ); + if (i == shardIterator.size()) { + assertTrue("Initializing shard should appear last", shardRouting.initializing()); + assertFalse("Initializing shard should appear last", shardRouting.active()); + } + } + } + } finally { + IOUtils.close(clusterService); + terminate(threadPool); + } + } + private DiscoveryNode[] setupNodes() { // Sets up two data nodes in zone-a and one data node in zone-b List zones = Arrays.asList("a", "a", "b"); diff --git a/server/src/test/java/org/opensearch/cluster/routing/allocation/FailedShardsRoutingTests.java b/server/src/test/java/org/opensearch/cluster/routing/allocation/FailedShardsRoutingTests.java index 5e3b74ee138ab..f8e1c609e6ee8 100644 --- a/server/src/test/java/org/opensearch/cluster/routing/allocation/FailedShardsRoutingTests.java +++ b/server/src/test/java/org/opensearch/cluster/routing/allocation/FailedShardsRoutingTests.java @@ -68,6 +68,7 @@ import static org.opensearch.cluster.routing.ShardRoutingState.STARTED; import static org.opensearch.cluster.routing.ShardRoutingState.UNASSIGNED; import static org.opensearch.common.util.FeatureFlags.REMOTE_STORE_MIGRATION_EXPERIMENTAL; +import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_TRANSLOG_REPOSITORY_NAME_ATTRIBUTE_KEY; import static org.opensearch.node.remotestore.RemoteStoreNodeService.MIGRATION_DIRECTION_SETTING; @@ -853,6 +854,8 @@ public void testPreferReplicaOnRemoteNodeForPrimaryPromotion() { // add a remote node and start primary shard Map remoteStoreNodeAttributes = Map.of( + REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY, + "REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY", REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY, "REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_VALUE", REMOTE_STORE_TRANSLOG_REPOSITORY_NAME_ATTRIBUTE_KEY, diff --git a/server/src/test/java/org/opensearch/cluster/routing/allocation/RemoteStoreMigrationAllocationDeciderTests.java b/server/src/test/java/org/opensearch/cluster/routing/allocation/RemoteStoreMigrationAllocationDeciderTests.java index 5b29922f2400c..e6e81c94e7f32 100644 --- a/server/src/test/java/org/opensearch/cluster/routing/allocation/RemoteStoreMigrationAllocationDeciderTests.java +++ b/server/src/test/java/org/opensearch/cluster/routing/allocation/RemoteStoreMigrationAllocationDeciderTests.java @@ -68,6 +68,7 @@ import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_REMOTE_TRANSLOG_STORE_REPOSITORY; import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_REPLICATION_TYPE; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY; +import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY; import static org.opensearch.node.remotestore.RemoteStoreNodeService.Direction.NONE; import static org.opensearch.node.remotestore.RemoteStoreNodeService.Direction.REMOTE_STORE; import static org.opensearch.node.remotestore.RemoteStoreNodeService.MIGRATION_DIRECTION_SETTING; @@ -617,6 +618,7 @@ private DiscoveryNode getRemoteNode() { REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY, "REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_VALUE" ); + attributes.put(REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY, "REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_VALUE"); return new DiscoveryNode( UUIDs.base64UUID(), buildNewFakeTransportAddress(), diff --git a/server/src/test/java/org/opensearch/cluster/routing/allocation/decider/FilterAllocationDeciderTests.java b/server/src/test/java/org/opensearch/cluster/routing/allocation/decider/FilterAllocationDeciderTests.java index 2e303887e0f1b..f226c45553d57 100644 --- a/server/src/test/java/org/opensearch/cluster/routing/allocation/decider/FilterAllocationDeciderTests.java +++ b/server/src/test/java/org/opensearch/cluster/routing/allocation/decider/FilterAllocationDeciderTests.java @@ -323,7 +323,7 @@ private ClusterState createInitialClusterState(AllocationService service, Settin return createInitialClusterState(service, indexSettings, Settings.EMPTY); } - private ClusterState createInitialClusterState(AllocationService service, Settings idxSettings, Settings clusterSettings) { + static ClusterState createInitialClusterState(AllocationService service, Settings idxSettings, Settings clusterSettings) { Metadata.Builder metadata = Metadata.builder(); metadata.persistentSettings(clusterSettings); final Settings.Builder indexSettings = settings(Version.CURRENT).put(idxSettings); diff --git a/server/src/test/java/org/opensearch/cluster/routing/allocation/decider/SearchReplicaAllocationDeciderTests.java b/server/src/test/java/org/opensearch/cluster/routing/allocation/decider/SearchReplicaAllocationDeciderTests.java new file mode 100644 index 0000000000000..8d4f4cdee26cc --- /dev/null +++ b/server/src/test/java/org/opensearch/cluster/routing/allocation/decider/SearchReplicaAllocationDeciderTests.java @@ -0,0 +1,133 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.routing.allocation.decider; + +import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.EmptyClusterInfoService; +import org.opensearch.cluster.OpenSearchAllocationTestCase; +import org.opensearch.cluster.routing.RecoverySource; +import org.opensearch.cluster.routing.RoutingTable; +import org.opensearch.cluster.routing.ShardRouting; +import org.opensearch.cluster.routing.UnassignedInfo; +import org.opensearch.cluster.routing.allocation.AllocationService; +import org.opensearch.cluster.routing.allocation.RoutingAllocation; +import org.opensearch.cluster.routing.allocation.allocator.BalancedShardsAllocator; +import org.opensearch.common.settings.ClusterSettings; +import org.opensearch.common.settings.Setting; +import org.opensearch.common.settings.Settings; +import org.opensearch.snapshots.EmptySnapshotsInfoService; +import org.opensearch.test.gateway.TestGatewayAllocator; + +import java.util.Arrays; +import java.util.HashSet; +import java.util.Set; + +import static org.opensearch.cluster.routing.allocation.decider.SearchReplicaAllocationDecider.SEARCH_REPLICA_ROUTING_INCLUDE_GROUP_SETTING; + +public class SearchReplicaAllocationDeciderTests extends OpenSearchAllocationTestCase { + + public void testSearchReplicaRoutingDedicatedIncludes() { + // we aren't using a settingsModule here so we need to set feature flag gated setting + Set> settings = new HashSet<>(ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); + settings.add(SEARCH_REPLICA_ROUTING_INCLUDE_GROUP_SETTING); + ClusterSettings clusterSettings = new ClusterSettings(Settings.builder().build(), settings); + Settings initialSettings = Settings.builder() + .put("cluster.routing.allocation.search.replica.dedicated.include._id", "node1,node2") + .build(); + + SearchReplicaAllocationDecider filterAllocationDecider = new SearchReplicaAllocationDecider(initialSettings, clusterSettings); + AllocationDeciders allocationDeciders = new AllocationDeciders( + Arrays.asList( + filterAllocationDecider, + new SameShardAllocationDecider(Settings.EMPTY, clusterSettings), + new ReplicaAfterPrimaryActiveAllocationDecider() + ) + ); + AllocationService service = new AllocationService( + allocationDeciders, + new TestGatewayAllocator(), + new BalancedShardsAllocator(Settings.EMPTY), + EmptyClusterInfoService.INSTANCE, + EmptySnapshotsInfoService.INSTANCE + ); + ClusterState state = FilterAllocationDeciderTests.createInitialClusterState(service, Settings.EMPTY, Settings.EMPTY); + RoutingTable routingTable = state.routingTable(); + RoutingAllocation allocation = new RoutingAllocation(allocationDeciders, state.getRoutingNodes(), state, null, null, 0); + allocation.debugDecision(true); + + ShardRouting searchReplica = ShardRouting.newUnassigned( + routingTable.index("sourceIndex").shard(0).shardId(), + false, + true, + RecoverySource.PeerRecoverySource.INSTANCE, + new UnassignedInfo(UnassignedInfo.Reason.NODE_LEFT, "") + ); + + ShardRouting regularReplica = ShardRouting.newUnassigned( + routingTable.index("sourceIndex").shard(0).shardId(), + false, + false, + RecoverySource.PeerRecoverySource.INSTANCE, + new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, "") + ); + + ShardRouting primary = ShardRouting.newUnassigned( + routingTable.index("sourceIndex").shard(0).shardId(), + true, + false, + RecoverySource.PeerRecoverySource.INSTANCE, + new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, "") + ); + + Decision.Single decision = (Decision.Single) filterAllocationDecider.canAllocate( + searchReplica, + state.getRoutingNodes().node("node2"), + allocation + ); + assertEquals(decision.toString(), Decision.Type.YES, decision.type()); + decision = (Decision.Single) filterAllocationDecider.canAllocate(searchReplica, state.getRoutingNodes().node("node1"), allocation); + assertEquals(decision.toString(), Decision.Type.YES, decision.type()); + + decision = (Decision.Single) filterAllocationDecider.canAllocate(regularReplica, state.getRoutingNodes().node("node2"), allocation); + assertEquals(decision.toString(), Decision.Type.NO, decision.type()); + decision = (Decision.Single) filterAllocationDecider.canAllocate(regularReplica, state.getRoutingNodes().node("node1"), allocation); + assertEquals(decision.toString(), Decision.Type.NO, decision.type()); + + decision = (Decision.Single) filterAllocationDecider.canAllocate(primary, state.getRoutingNodes().node("node1"), allocation); + assertEquals(decision.toString(), Decision.Type.NO, decision.type()); + decision = (Decision.Single) filterAllocationDecider.canAllocate(primary, state.getRoutingNodes().node("node2"), allocation); + assertEquals(decision.toString(), Decision.Type.NO, decision.type()); + + Settings updatedSettings = Settings.builder() + .put("cluster.routing.allocation.search.replica.dedicated.include._id", "node2") + .build(); + clusterSettings.applySettings(updatedSettings); + + decision = (Decision.Single) filterAllocationDecider.canAllocate(searchReplica, state.getRoutingNodes().node("node2"), allocation); + assertEquals(decision.toString(), Decision.Type.YES, decision.type()); + decision = (Decision.Single) filterAllocationDecider.canAllocate(searchReplica, state.getRoutingNodes().node("node1"), allocation); + assertEquals(decision.toString(), Decision.Type.NO, decision.type()); + decision = (Decision.Single) filterAllocationDecider.canRemain(searchReplica, state.getRoutingNodes().node("node1"), allocation); + assertEquals(decision.toString(), Decision.Type.NO, decision.type()); + + decision = (Decision.Single) filterAllocationDecider.canAllocate(regularReplica, state.getRoutingNodes().node("node2"), allocation); + assertEquals(decision.toString(), Decision.Type.NO, decision.type()); + decision = (Decision.Single) filterAllocationDecider.canAllocate(regularReplica, state.getRoutingNodes().node("node1"), allocation); + assertEquals(decision.toString(), Decision.Type.YES, decision.type()); + decision = (Decision.Single) filterAllocationDecider.canRemain(regularReplica, state.getRoutingNodes().node("node1"), allocation); + assertEquals(decision.toString(), Decision.Type.YES, decision.type()); + + decision = (Decision.Single) filterAllocationDecider.canAllocate(primary, state.getRoutingNodes().node("node1"), allocation); + assertEquals(decision.toString(), Decision.Type.YES, decision.type()); + decision = (Decision.Single) filterAllocationDecider.canAllocate(primary, state.getRoutingNodes().node("node2"), allocation); + assertEquals(decision.toString(), Decision.Type.NO, decision.type()); + decision = (Decision.Single) filterAllocationDecider.canRemain(primary, state.getRoutingNodes().node("node1"), allocation); + assertEquals(decision.toString(), Decision.Type.YES, decision.type()); + } +} diff --git a/server/src/test/java/org/opensearch/index/IndexModuleTests.java b/server/src/test/java/org/opensearch/index/IndexModuleTests.java index 8618d776bdf04..bd86d3d396987 100644 --- a/server/src/test/java/org/opensearch/index/IndexModuleTests.java +++ b/server/src/test/java/org/opensearch/index/IndexModuleTests.java @@ -264,7 +264,8 @@ private IndexService newIndexService(IndexModule module) throws IOException { translogFactorySupplier, () -> IndexSettings.DEFAULT_REFRESH_INTERVAL, DefaultRecoverySettings.INSTANCE, - DefaultRemoteStoreSettings.INSTANCE + DefaultRemoteStoreSettings.INSTANCE, + s -> {} ); } diff --git a/server/src/test/java/org/opensearch/index/IndexServiceTests.java b/server/src/test/java/org/opensearch/index/IndexServiceTests.java index 14451ef21726e..5905e64cede1b 100644 --- a/server/src/test/java/org/opensearch/index/IndexServiceTests.java +++ b/server/src/test/java/org/opensearch/index/IndexServiceTests.java @@ -41,6 +41,7 @@ import org.opensearch.common.compress.CompressedXContent; import org.opensearch.common.settings.Settings; import org.opensearch.common.unit.TimeValue; +import org.opensearch.common.util.FeatureFlags; import org.opensearch.common.xcontent.XContentFactory; import org.opensearch.core.index.Index; import org.opensearch.core.xcontent.MediaTypeRegistry; @@ -52,6 +53,7 @@ import org.opensearch.index.shard.IndexShardTestCase; import org.opensearch.index.translog.Translog; import org.opensearch.indices.IndicesService; +import org.opensearch.indices.replication.common.ReplicationType; import org.opensearch.plugins.Plugin; import org.opensearch.test.InternalSettingsPlugin; import org.opensearch.test.OpenSearchSingleNodeTestCase; @@ -591,6 +593,57 @@ public void testIndexSortBackwardCompatible() { } } + public void testReplicationTask() throws Exception { + // create with docrep - task should not schedule + IndexService indexService = createIndex( + "docrep_index", + Settings.builder().put(IndexMetadata.INDEX_REPLICATION_TYPE_SETTING.getKey(), ReplicationType.DOCUMENT).build() + ); + final Index index = indexService.index(); + ensureGreen(index.getName()); + IndexService.AsyncReplicationTask task = indexService.getReplicationTask(); + assertFalse(task.isScheduled()); + assertFalse(task.mustReschedule()); + + // create for segrep - task should schedule + indexService = createIndex( + "segrep_index", + Settings.builder() + .put(IndexMetadata.INDEX_REPLICATION_TYPE_SETTING.getKey(), ReplicationType.SEGMENT) + .put(IndexSettings.INDEX_REFRESH_INTERVAL_SETTING.getKey(), "5s") + .build() + ); + final Index srIndex = indexService.index(); + ensureGreen(srIndex.getName()); + task = indexService.getReplicationTask(); + assertTrue(task.isScheduled()); + assertTrue(task.mustReschedule()); + assertEquals(5000, task.getInterval().millis()); + + // test we can update the interval + client().admin() + .indices() + .prepareUpdateSettings("segrep_index") + .setSettings(Settings.builder().put(IndexSettings.INDEX_REFRESH_INTERVAL_SETTING.getKey(), "1s")) + .get(); + + IndexService.AsyncReplicationTask updatedTask = indexService.getReplicationTask(); + assertNotSame(task, updatedTask); + assertFalse(task.isScheduled()); + assertTrue(task.isClosed()); + assertTrue(updatedTask.isScheduled()); + assertTrue(updatedTask.mustReschedule()); + assertEquals(1000, updatedTask.getInterval().millis()); + } + + @Override + protected Settings featureFlagSettings() { + return Settings.builder() + .put(super.featureFlagSettings()) + .put(FeatureFlags.READER_WRITER_SPLIT_EXPERIMENTAL_SETTING.getKey(), true) + .build(); + } + private static String createTestMapping(String type) { return " \"properties\": {\n" + " \"test\": {\n" diff --git a/server/src/test/java/org/opensearch/index/codec/composite99/datacube/startree/StarTreeDocValuesFormatTests.java b/server/src/test/java/org/opensearch/index/codec/composite99/datacube/startree/StarTreeDocValuesFormatTests.java index 0c6d21d28cc8a..1c267c67e60ed 100644 --- a/server/src/test/java/org/opensearch/index/codec/composite99/datacube/startree/StarTreeDocValuesFormatTests.java +++ b/server/src/test/java/org/opensearch/index/codec/composite99/datacube/startree/StarTreeDocValuesFormatTests.java @@ -119,23 +119,23 @@ public void testStarTreeDocValues() throws IOException { Document doc = new Document(); doc.add(new SortedNumericDocValuesField("sndv", 1)); doc.add(new SortedNumericDocValuesField("dv", 1)); - doc.add(new SortedNumericDocValuesField("field", 1)); + doc.add(new SortedNumericDocValuesField("field", -1)); iw.addDocument(doc); doc = new Document(); doc.add(new SortedNumericDocValuesField("sndv", 1)); doc.add(new SortedNumericDocValuesField("dv", 1)); - doc.add(new SortedNumericDocValuesField("field", 1)); + doc.add(new SortedNumericDocValuesField("field", -1)); iw.addDocument(doc); doc = new Document(); iw.forceMerge(1); doc.add(new SortedNumericDocValuesField("sndv", 2)); doc.add(new SortedNumericDocValuesField("dv", 2)); - doc.add(new SortedNumericDocValuesField("field", 2)); + doc.add(new SortedNumericDocValuesField("field", -2)); iw.addDocument(doc); doc = new Document(); doc.add(new SortedNumericDocValuesField("sndv", 2)); doc.add(new SortedNumericDocValuesField("dv", 2)); - doc.add(new SortedNumericDocValuesField("field", 2)); + doc.add(new SortedNumericDocValuesField("field", -2)); iw.addDocument(doc); iw.forceMerge(1); iw.close(); @@ -144,11 +144,39 @@ public void testStarTreeDocValues() throws IOException { TestUtil.checkReader(ir); assertEquals(1, ir.leaves().size()); + // Segment documents + /** + * sndv dv field + * [1, 1, -1] + * [1, 1, -1] + * [2, 2, -2] + * [2, 2, -2] + */ + // Star tree docuements + /** + * sndv dv | [ sum, value_count, min, max[field]] , [ sum, value_count, min, max[sndv]], doc_count + * [1, 1] | [-2.0, 2.0, -1.0, -1.0, 2.0, 2.0, 1.0, 1.0, 2.0] + * [2, 2] | [-4.0, 2.0, -2.0, -2.0, 4.0, 2.0, 2.0, 2.0, 2.0] + * [null, 1] | [-2.0, 2.0, -1.0, -1.0, 2.0, 2.0, 1.0, 1.0, 2.0] + * [null, 2] | [-4.0, 2.0, -2.0, -2.0, 4.0, 2.0, 2.0, 2.0, 2.0] + */ StarTreeDocument[] expectedStarTreeDocuments = new StarTreeDocument[4]; - expectedStarTreeDocuments[0] = new StarTreeDocument(new Long[] { 1L, 1L }, new Double[] { 2.0, 2.0, 2.0 }); - expectedStarTreeDocuments[1] = new StarTreeDocument(new Long[] { 2L, 2L }, new Double[] { 4.0, 2.0, 4.0 }); - expectedStarTreeDocuments[2] = new StarTreeDocument(new Long[] { null, 1L }, new Double[] { 2.0, 2.0, 2.0 }); - expectedStarTreeDocuments[3] = new StarTreeDocument(new Long[] { null, 2L }, new Double[] { 4.0, 2.0, 4.0 }); + expectedStarTreeDocuments[0] = new StarTreeDocument( + new Long[] { 1L, 1L }, + new Double[] { -2.0, 2.0, -1.0, -1.0, 2.0, 2.0, 1.0, 1.0, 2.0 } + ); + expectedStarTreeDocuments[1] = new StarTreeDocument( + new Long[] { 2L, 2L }, + new Double[] { -4.0, 2.0, -2.0, -2.0, 4.0, 2.0, 2.0, 2.0, 2.0 } + ); + expectedStarTreeDocuments[2] = new StarTreeDocument( + new Long[] { null, 1L }, + new Double[] { -2.0, 2.0, -1.0, -1.0, 2.0, 2.0, 1.0, 1.0, 2.0 } + ); + expectedStarTreeDocuments[3] = new StarTreeDocument( + new Long[] { null, 2L }, + new Double[] { -4.0, 2.0, -2.0, -2.0, 4.0, 2.0, 2.0, 2.0, 2.0 } + ); for (LeafReaderContext context : ir.leaves()) { SegmentReader reader = Lucene.segmentReader(context.reader()); @@ -159,7 +187,17 @@ public void testStarTreeDocValues() throws IOException { StarTreeValues starTreeValues = (StarTreeValues) starTreeDocValuesReader.getCompositeIndexValues(compositeIndexFieldInfo); StarTreeDocument[] starTreeDocuments = StarTreeTestUtils.getSegmentsStarTreeDocuments( List.of(starTreeValues), - List.of(NumberFieldMapper.NumberType.DOUBLE, NumberFieldMapper.NumberType.LONG, NumberFieldMapper.NumberType.LONG), + List.of( + NumberFieldMapper.NumberType.DOUBLE, + NumberFieldMapper.NumberType.LONG, + NumberFieldMapper.NumberType.DOUBLE, + NumberFieldMapper.NumberType.DOUBLE, + NumberFieldMapper.NumberType.DOUBLE, + NumberFieldMapper.NumberType.LONG, + NumberFieldMapper.NumberType.DOUBLE, + NumberFieldMapper.NumberType.DOUBLE, + NumberFieldMapper.NumberType.LONG + ), reader.maxDoc() ); assertStarTreeDocuments(starTreeDocuments, expectedStarTreeDocuments); @@ -190,6 +228,19 @@ private XContentBuilder getExpandedMapping() throws IOException { b.startArray("stats"); b.value("sum"); b.value("value_count"); + b.value("avg"); + b.value("min"); + b.value("max"); + b.endArray(); + b.endObject(); + b.startObject(); + b.field("name", "sndv"); + b.startArray("stats"); + b.value("sum"); + b.value("value_count"); + b.value("avg"); + b.value("min"); + b.value("max"); b.endArray(); b.endObject(); b.endArray(); diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/StarTreeTestUtils.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/StarTreeTestUtils.java index f3e111bf6caa6..b7395b993f67b 100644 --- a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/StarTreeTestUtils.java +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/StarTreeTestUtils.java @@ -61,6 +61,9 @@ public static StarTreeDocument[] getSegmentsStarTreeDocuments( // get doc id set iterators for metrics for (Metric metric : starTreeValues.getStarTreeField().getMetrics()) { for (MetricStat metricStat : metric.getMetrics()) { + if (metricStat.isDerivedMetric()) { + continue; + } String metricFullName = fullyQualifiedFieldNameForStarTreeMetricsDocValues( starTreeValues.getStarTreeField().getName(), metric.getField(), @@ -125,18 +128,18 @@ public static void assertStarTreeDocuments(StarTreeDocument[] starTreeDocuments, assertNotNull(resultStarTreeDocument.dimensions); assertNotNull(resultStarTreeDocument.metrics); - assertEquals(resultStarTreeDocument.dimensions.length, expectedStarTreeDocument.dimensions.length); - assertEquals(resultStarTreeDocument.metrics.length, expectedStarTreeDocument.metrics.length); + assertEquals(expectedStarTreeDocument.dimensions.length, resultStarTreeDocument.dimensions.length); + assertEquals(expectedStarTreeDocument.metrics.length, resultStarTreeDocument.metrics.length); for (int di = 0; di < resultStarTreeDocument.dimensions.length; di++) { - assertEquals(resultStarTreeDocument.dimensions[di], expectedStarTreeDocument.dimensions[di]); + assertEquals(expectedStarTreeDocument.dimensions[di], resultStarTreeDocument.dimensions[di]); } for (int mi = 0; mi < resultStarTreeDocument.metrics.length; mi++) { if (expectedStarTreeDocument.metrics[mi] instanceof Long) { - assertEquals(resultStarTreeDocument.metrics[mi], ((Long) expectedStarTreeDocument.metrics[mi]).doubleValue()); + assertEquals(((Long) expectedStarTreeDocument.metrics[mi]).doubleValue(), resultStarTreeDocument.metrics[mi]); } else { - assertEquals(resultStarTreeDocument.metrics[mi], expectedStarTreeDocument.metrics[mi]); + assertEquals(expectedStarTreeDocument.metrics[mi], resultStarTreeDocument.metrics[mi]); } } } @@ -267,9 +270,34 @@ public static void assertStarTreeMetadata(StarTreeMetadata expectedStarTreeMetad Metric expectedMetric = expectedStarTreeMetadata.getMetrics().get(i); Metric resultMetric = resultStarTreeMetadata.getMetrics().get(i); assertEquals(expectedMetric.getField(), resultMetric.getField()); + List metricStats = new ArrayList<>(); + for (MetricStat metricStat : expectedMetric.getMetrics()) { + if (metricStat.isDerivedMetric()) { + continue; + } + metricStats.add(metricStat); + } + Metric expectedMetricWithoutDerivedMetrics = new Metric(expectedMetric.getField(), metricStats); + metricStats = new ArrayList<>(); + for (MetricStat metricStat : resultMetric.getMetrics()) { + if (metricStat.isDerivedMetric()) { + continue; + } + metricStats.add(metricStat); + } + Metric resultantMetricWithoutDerivedMetrics = new Metric(resultMetric.getField(), metricStats); + + // assert base metrics are in order in metadata + for (int j = 0; j < expectedMetricWithoutDerivedMetrics.getMetrics().size(); j++) { + assertEquals( + expectedMetricWithoutDerivedMetrics.getMetrics().get(j), + resultantMetricWithoutDerivedMetrics.getMetrics().get(j) + ); + } + // assert all metrics ( including derived metrics are present ) for (int j = 0; j < expectedMetric.getMetrics().size(); j++) { - assertEquals(expectedMetric.getMetrics().get(j), resultMetric.getMetrics().get(j)); + assertTrue(resultMetric.getMetrics().contains(expectedMetric.getMetrics().get(j))); } } diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/AbstractStarTreeBuilderTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/AbstractStarTreeBuilderTests.java index ad54bda9a916e..65adc43ea8bea 100644 --- a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/AbstractStarTreeBuilderTests.java +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/AbstractStarTreeBuilderTests.java @@ -205,9 +205,7 @@ private SegmentReadState getReadState(int numDocs, List dimensionFields, int numMetrics = 0; for (Metric metric : metrics) { - for (MetricStat metricStat : metric.getMetrics()) { - numMetrics++; - } + numMetrics += metric.getBaseMetrics().size(); } FieldInfo[] fields = new FieldInfo[dimensionFields.size() + numMetrics]; @@ -237,7 +235,7 @@ private SegmentReadState getReadState(int numDocs, List dimensionFields, } for (Metric metric : metrics) { - for (MetricStat metricStat : metric.getMetrics()) { + for (MetricStat metricStat : metric.getBaseMetrics()) { fields[i] = new FieldInfo( fullyQualifiedFieldNameForStarTreeMetricsDocValues( compositeField.getName(), @@ -2047,7 +2045,7 @@ public void testFlushFlow() throws IOException { VERSION_CURRENT, builder.numStarTreeNodes, List.of("field1", "field3"), - List.of(new Metric("field2", List.of(MetricStat.SUM, MetricStat.VALUE_COUNT))), + List.of(new Metric("field2", List.of(MetricStat.SUM, MetricStat.VALUE_COUNT, MetricStat.AVG))), 6, builder.numStarTreeDocs, 1000, @@ -2138,7 +2136,7 @@ public void testFlushFlowDimsReverse() throws IOException { VERSION_CURRENT, builder.numStarTreeNodes, List.of("field1", "field3"), - List.of(new Metric("field2", List.of(MetricStat.SUM, MetricStat.VALUE_COUNT))), + List.of(new Metric("field2", List.of(MetricStat.SUM, MetricStat.VALUE_COUNT, MetricStat.AVG))), 6, builder.numStarTreeDocs, 1000, @@ -2270,8 +2268,9 @@ private StarTreeField getStarTreeFieldWithMultipleMetrics() { Dimension d2 = new NumericDimension("field3"); Metric m1 = new Metric("field2", List.of(MetricStat.SUM)); Metric m2 = new Metric("field2", List.of(MetricStat.VALUE_COUNT)); + Metric m3 = new Metric("field2", List.of(MetricStat.AVG)); List dims = List.of(d1, d2); - List metrics = List.of(m1, m2); + List metrics = List.of(m1, m2, m3); StarTreeFieldConfiguration c = new StarTreeFieldConfiguration(1000, new HashSet<>(), getBuildMode()); return new StarTreeField("sf", dims, metrics, c); } @@ -4069,12 +4068,19 @@ public void testMergeFlow() throws IOException { metricsWithField.add(i); } + List metricsListValueCount = new ArrayList<>(1000); + List metricsWithFieldValueCount = new ArrayList<>(1000); + for (int i = 0; i < 1000; i++) { + metricsListValueCount.add((long) i); + metricsWithFieldValueCount.add(i); + } + Dimension d1 = new NumericDimension("field1"); Dimension d2 = new NumericDimension("field3"); Dimension d3 = new NumericDimension("field5"); Dimension d4 = new NumericDimension("field8"); // Dimension d5 = new NumericDimension("field5"); - Metric m1 = new Metric("field2", List.of(MetricStat.SUM)); + Metric m1 = new Metric("field2", List.of(MetricStat.SUM, MetricStat.AVG, MetricStat.VALUE_COUNT)); Metric m2 = new Metric("_doc_count", List.of(MetricStat.DOC_COUNT)); List dims = List.of(d1, d2, d3, d4); List metrics = List.of(m1, m2); @@ -4085,6 +4091,7 @@ public void testMergeFlow() throws IOException { SortedNumericDocValues d3sndv = getSortedNumericMock(dimList3, docsWithField3); SortedNumericDocValues d4sndv = getSortedNumericMock(dimList4, docsWithField4); SortedNumericDocValues m1sndv = getSortedNumericMock(metricsList, metricsWithField); + SortedNumericDocValues valucountsndv = getSortedNumericMock(metricsListValueCount, metricsWithFieldValueCount); SortedNumericDocValues m2sndv = DocValues.emptySortedNumeric(); Map> dimDocIdSetIterators = Map.of( "field1", @@ -4100,6 +4107,8 @@ public void testMergeFlow() throws IOException { Map> metricDocIdSetIterators = Map.of( "sf_field2_sum_metric", () -> m1sndv, + "sf_field2_value_count_metric", + () -> valucountsndv, "sf__doc_count_doc_count_metric", () -> m2sndv ); @@ -4118,6 +4127,7 @@ public void testMergeFlow() throws IOException { SortedNumericDocValues f2d3sndv = getSortedNumericMock(dimList3, docsWithField3); SortedNumericDocValues f2d4sndv = getSortedNumericMock(dimList4, docsWithField4); SortedNumericDocValues f2m1sndv = getSortedNumericMock(metricsList, metricsWithField); + SortedNumericDocValues f2valucountsndv = getSortedNumericMock(metricsListValueCount, metricsWithFieldValueCount); SortedNumericDocValues f2m2sndv = DocValues.emptySortedNumeric(); Map> f2dimDocIdSetIterators = Map.of( "field1", @@ -4133,6 +4143,8 @@ public void testMergeFlow() throws IOException { Map> f2metricDocIdSetIterators = Map.of( "sf_field2_sum_metric", () -> f2m1sndv, + "sf_field2_value_count_metric", + () -> f2valucountsndv, "sf__doc_count_doc_count_metric", () -> f2m2sndv ); diff --git a/server/src/test/java/org/opensearch/index/mapper/KeywordFieldTypeTests.java b/server/src/test/java/org/opensearch/index/mapper/KeywordFieldTypeTests.java index b10035f54a0c0..f291b864beb59 100644 --- a/server/src/test/java/org/opensearch/index/mapper/KeywordFieldTypeTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/KeywordFieldTypeTests.java @@ -136,7 +136,7 @@ public void testTermsQuery() { new TermInSetQuery("field", terms), new TermInSetQuery(MultiTermQuery.DOC_VALUES_REWRITE, "field", terms) ); - assertEquals(expected, ft.termsQuery(Arrays.asList("foo", "bar"), null)); + assertEquals(expected, ft.termsQuery(Arrays.asList("foo", "bar"), MOCK_QSC_ENABLE_INDEX_DOC_VALUES)); MappedFieldType onlyIndexed = new KeywordFieldType("field", true, false, Collections.emptyMap()); Query expectedIndex = new TermInSetQuery("field", terms); @@ -225,7 +225,7 @@ public void testRegexpQuery() { new RegexpQuery(new Term("field", "foo.*")), new RegexpQuery(new Term("field", "foo.*"), 0, 0, RegexpQuery.DEFAULT_PROVIDER, 10, MultiTermQuery.DOC_VALUES_REWRITE) ), - ft.regexpQuery("foo.*", 0, 0, 10, MultiTermQuery.CONSTANT_SCORE_BLENDED_REWRITE, MOCK_QSC) + ft.regexpQuery("foo.*", 0, 0, 10, MultiTermQuery.CONSTANT_SCORE_BLENDED_REWRITE, MOCK_QSC_ENABLE_INDEX_DOC_VALUES) ); Query indexExpected = new RegexpQuery(new Term("field", "foo.*")); @@ -267,7 +267,7 @@ public void testFuzzyQuery() { new FuzzyQuery(new Term("field", "foo"), 2, 1, 50, true), new FuzzyQuery(new Term("field", "foo"), 2, 1, 50, true, MultiTermQuery.DOC_VALUES_REWRITE) ), - ft.fuzzyQuery("foo", Fuzziness.fromEdits(2), 1, 50, true, null, MOCK_QSC) + ft.fuzzyQuery("foo", Fuzziness.fromEdits(2), 1, 50, true, null, MOCK_QSC_ENABLE_INDEX_DOC_VALUES) ); Query indexExpected = new FuzzyQuery(new Term("field", "foo"), 2, 1, 50, true); @@ -308,7 +308,7 @@ public void testWildCardQuery() { MultiTermQuery.DOC_VALUES_REWRITE ) ); - assertEquals(expected, ft.wildcardQuery("foo*", MultiTermQuery.CONSTANT_SCORE_BLENDED_REWRITE, MOCK_QSC)); + assertEquals(expected, ft.wildcardQuery("foo*", MultiTermQuery.CONSTANT_SCORE_BLENDED_REWRITE, MOCK_QSC_ENABLE_INDEX_DOC_VALUES)); Query indexExpected = new WildcardQuery(new Term("field", new BytesRef("foo*"))); MappedFieldType onlyIndexed = new KeywordFieldType("field", true, false, Collections.emptyMap()); diff --git a/server/src/test/java/org/opensearch/index/remote/RemoteStoreUtilsTests.java b/server/src/test/java/org/opensearch/index/remote/RemoteStoreUtilsTests.java index a6db37285fe6f..be30de97ee830 100644 --- a/server/src/test/java/org/opensearch/index/remote/RemoteStoreUtilsTests.java +++ b/server/src/test/java/org/opensearch/index/remote/RemoteStoreUtilsTests.java @@ -67,6 +67,7 @@ import static org.opensearch.index.store.RemoteSegmentStoreDirectory.MetadataFilenameUtils.SEPARATOR; import static org.opensearch.index.translog.transfer.TranslogTransferMetadata.METADATA_SEPARATOR; import static org.opensearch.indices.RemoteStoreSettings.CLUSTER_REMOTE_STORE_PINNED_TIMESTAMP_ENABLED; +import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_TRANSLOG_REPOSITORY_NAME_ATTRIBUTE_KEY; import static org.opensearch.node.remotestore.RemoteStoreNodeService.REMOTE_STORE_COMPATIBILITY_MODE_SETTING; @@ -540,6 +541,7 @@ private RoutingTable createRoutingTableAllShardsStarted( private Map getRemoteStoreNodeAttributes() { Map remoteStoreNodeAttributes = new HashMap<>(); + remoteStoreNodeAttributes.put(REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY, "my-cluster-repo-1"); remoteStoreNodeAttributes.put(REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY, "my-segment-repo-1"); remoteStoreNodeAttributes.put(REMOTE_STORE_TRANSLOG_REPOSITORY_NAME_ATTRIBUTE_KEY, "my-translog-repo-1"); return remoteStoreNodeAttributes; @@ -635,7 +637,7 @@ private Tuple, Set> testGetPinnedTimestampLockedFilesW String metadataPrefix = "metadata__1__2__3__4__5__"; Map metadataFiles = new HashMap<>(); for (Long metadataFileTimestamp : metadataFileTimestamps) { - metadataFiles.put(metadataFileTimestamp, metadataPrefix + RemoteStoreUtils.invertLong(metadataFileTimestamp)); + metadataFiles.put(metadataFileTimestamp, metadataPrefix + RemoteStoreUtils.invertLong(metadataFileTimestamp) + "__1"); } return new Tuple<>( metadataFiles, @@ -660,7 +662,7 @@ private Tuple, Set> testGetPinnedTimestampLockedFilesW String primaryTerm = RemoteStoreUtils.invertLong(metadataFileTimestampPrimaryTerm.getValue()); String metadataPrefix = "metadata__" + primaryTerm + "__2__3__4__5__"; long metadataFileTimestamp = metadataFileTimestampPrimaryTerm.getKey(); - metadataFiles.put(metadataFileTimestamp, metadataPrefix + RemoteStoreUtils.invertLong(metadataFileTimestamp)); + metadataFiles.put(metadataFileTimestamp, metadataPrefix + RemoteStoreUtils.invertLong(metadataFileTimestamp) + "__1"); } return new Tuple<>( metadataFiles, diff --git a/server/src/test/java/org/opensearch/index/search/NestedHelperTests.java b/server/src/test/java/org/opensearch/index/search/NestedHelperTests.java index 7ffcc0fb7437a..f7f921e824490 100644 --- a/server/src/test/java/org/opensearch/index/search/NestedHelperTests.java +++ b/server/src/test/java/org/opensearch/index/search/NestedHelperTests.java @@ -57,6 +57,8 @@ import java.io.IOException; import java.util.Collections; +import static org.opensearch.index.mapper.FieldTypeTestCase.MOCK_QSC_ENABLE_INDEX_DOC_VALUES; + public class NestedHelperTests extends OpenSearchSingleNodeTestCase { IndexService indexService; @@ -132,28 +134,28 @@ public void testMatchNo() { } public void testTermsQuery() { - Query termsQuery = mapperService.fieldType("foo").termsQuery(Collections.singletonList("bar"), null); + Query termsQuery = mapperService.fieldType("foo").termsQuery(Collections.singletonList("bar"), MOCK_QSC_ENABLE_INDEX_DOC_VALUES); assertFalse(new NestedHelper(mapperService).mightMatchNestedDocs(termsQuery)); assertTrue(new NestedHelper(mapperService).mightMatchNonNestedDocs(termsQuery, "nested1")); assertTrue(new NestedHelper(mapperService).mightMatchNonNestedDocs(termsQuery, "nested2")); assertTrue(new NestedHelper(mapperService).mightMatchNonNestedDocs(termsQuery, "nested3")); assertTrue(new NestedHelper(mapperService).mightMatchNonNestedDocs(termsQuery, "nested_missing")); - termsQuery = mapperService.fieldType("nested1.foo").termsQuery(Collections.singletonList("bar"), null); + termsQuery = mapperService.fieldType("nested1.foo").termsQuery(Collections.singletonList("bar"), MOCK_QSC_ENABLE_INDEX_DOC_VALUES); assertTrue(new NestedHelper(mapperService).mightMatchNestedDocs(termsQuery)); assertFalse(new NestedHelper(mapperService).mightMatchNonNestedDocs(termsQuery, "nested1")); assertTrue(new NestedHelper(mapperService).mightMatchNonNestedDocs(termsQuery, "nested2")); assertTrue(new NestedHelper(mapperService).mightMatchNonNestedDocs(termsQuery, "nested3")); assertTrue(new NestedHelper(mapperService).mightMatchNonNestedDocs(termsQuery, "nested_missing")); - termsQuery = mapperService.fieldType("nested2.foo").termsQuery(Collections.singletonList("bar"), null); + termsQuery = mapperService.fieldType("nested2.foo").termsQuery(Collections.singletonList("bar"), MOCK_QSC_ENABLE_INDEX_DOC_VALUES); assertTrue(new NestedHelper(mapperService).mightMatchNestedDocs(termsQuery)); assertTrue(new NestedHelper(mapperService).mightMatchNonNestedDocs(termsQuery, "nested1")); assertTrue(new NestedHelper(mapperService).mightMatchNonNestedDocs(termsQuery, "nested2")); assertTrue(new NestedHelper(mapperService).mightMatchNonNestedDocs(termsQuery, "nested3")); assertTrue(new NestedHelper(mapperService).mightMatchNonNestedDocs(termsQuery, "nested_missing")); - termsQuery = mapperService.fieldType("nested3.foo").termsQuery(Collections.singletonList("bar"), null); + termsQuery = mapperService.fieldType("nested3.foo").termsQuery(Collections.singletonList("bar"), MOCK_QSC_ENABLE_INDEX_DOC_VALUES); assertTrue(new NestedHelper(mapperService).mightMatchNestedDocs(termsQuery)); assertTrue(new NestedHelper(mapperService).mightMatchNonNestedDocs(termsQuery, "nested1")); assertTrue(new NestedHelper(mapperService).mightMatchNonNestedDocs(termsQuery, "nested2")); diff --git a/server/src/test/java/org/opensearch/index/store/RemoteSegmentStoreDirectoryTests.java b/server/src/test/java/org/opensearch/index/store/RemoteSegmentStoreDirectoryTests.java index 336d4bafd4b66..ecd6620dbea15 100644 --- a/server/src/test/java/org/opensearch/index/store/RemoteSegmentStoreDirectoryTests.java +++ b/server/src/test/java/org/opensearch/index/store/RemoteSegmentStoreDirectoryTests.java @@ -1170,9 +1170,9 @@ public void testInitializeToSpecificTimestampNoMdMatchingTimestamp() throws IOEx public void testInitializeToSpecificTimestampMatchingMdFile() throws IOException { String metadataPrefix = "metadata__1__2__3__4__5__"; List metadataFiles = new ArrayList<>(); - metadataFiles.add(metadataPrefix + RemoteStoreUtils.invertLong(1000)); - metadataFiles.add(metadataPrefix + RemoteStoreUtils.invertLong(2000)); - metadataFiles.add(metadataPrefix + RemoteStoreUtils.invertLong(3000)); + metadataFiles.add(metadataPrefix + RemoteStoreUtils.invertLong(1000) + "__1"); + metadataFiles.add(metadataPrefix + RemoteStoreUtils.invertLong(2000) + "__1"); + metadataFiles.add(metadataPrefix + RemoteStoreUtils.invertLong(3000) + "__1"); Map metadata = new HashMap<>(); metadata.put("_0.cfe", "_0.cfe::_0.cfe__" + UUIDs.base64UUID() + "::1234::512::" + Version.LATEST.major); @@ -1184,7 +1184,7 @@ public void testInitializeToSpecificTimestampMatchingMdFile() throws IOException Integer.MAX_VALUE ) ).thenReturn(metadataFiles); - when(remoteMetadataDirectory.getBlobStream(metadataPrefix + RemoteStoreUtils.invertLong(1000))).thenReturn( + when(remoteMetadataDirectory.getBlobStream(metadataPrefix + RemoteStoreUtils.invertLong(1000) + "__1")).thenReturn( createMetadataFileBytes(metadata, indexShard.getLatestReplicationCheckpoint(), segmentInfos) ); diff --git a/server/src/test/java/org/opensearch/index/translog/RemoteFsTimestampAwareTranslogTests.java b/server/src/test/java/org/opensearch/index/translog/RemoteFsTimestampAwareTranslogTests.java index 1f82dd9d7e641..c510a6475147d 100644 --- a/server/src/test/java/org/opensearch/index/translog/RemoteFsTimestampAwareTranslogTests.java +++ b/server/src/test/java/org/opensearch/index/translog/RemoteFsTimestampAwareTranslogTests.java @@ -8,6 +8,8 @@ package org.opensearch.index.translog; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; import org.apache.lucene.tests.util.LuceneTestCase; import org.opensearch.action.LatchedActionListener; import org.opensearch.cluster.metadata.RepositoryMetadata; @@ -53,6 +55,7 @@ import java.util.Set; import java.util.TreeSet; import java.util.concurrent.CountDownLatch; +import java.util.concurrent.atomic.AtomicLong; import java.util.function.Supplier; import java.util.stream.Collectors; import java.util.stream.LongStream; @@ -63,7 +66,9 @@ import static org.opensearch.index.translog.transfer.TranslogTransferMetadata.METADATA_SEPARATOR; import static org.opensearch.indices.RemoteStoreSettings.CLUSTER_REMOTE_STORE_PINNED_TIMESTAMP_ENABLED; import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyLong; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; @@ -211,16 +216,44 @@ public void onFailure(Exception e) { // Node id containing separator String nodeIdWithSeparator = - "metadata__9223372036438563903__9223372036854774799__9223370311919910393__node__1__9223372036438563958__1"; + "metadata__9223372036438563903__9223372036854774799__9223370311919910393__node__1__9223372036438563958__2__1"; Tuple minMaxGen = TranslogTransferMetadata.getMinMaxTranslogGenerationFromFilename(nodeIdWithSeparator); Long minGen = Long.MAX_VALUE - 9223372036438563958L; assertEquals(minGen, minMaxGen.v1()); // Malformed md filename - String malformedMdFileName = "metadata__9223372036438563903__9223372036854774799__9223370311919910393__node1__xyz__1"; + String malformedMdFileName = "metadata__9223372036438563903__9223372036854774799__9223370311919910393__node1__xyz__3__1"; assertNull(TranslogTransferMetadata.getMinMaxTranslogGenerationFromFilename(malformedMdFileName)); } + public void testGetMinMaxPrimaryTermFromFilename() throws Exception { + // New format metadata file + String newFormatMetadataFile = + "metadata__9223372036854775800__9223372036854774799__9223370311919910393__node1__9223372036438563958__2__1"; + Tuple minMaxPrimaryterm = TranslogTransferMetadata.getMinMaxPrimaryTermFromFilename(newFormatMetadataFile); + Long minPrimaryTerm = 2L; + Long maxPrimaryTerm = 7L; + assertEquals(minPrimaryTerm, minMaxPrimaryterm.v1()); + assertEquals(maxPrimaryTerm, minMaxPrimaryterm.v2()); + + // Old format metadata file + String oldFormatMdFilename = "metadata__9223372036438563903__9223372036854774799__9223370311919910393__31__1"; + assertNull(TranslogTransferMetadata.getMinMaxPrimaryTermFromFilename(oldFormatMdFilename)); + + // Node id containing separator + String nodeIdWithSeparator = + "metadata__9223372036854775800__9223372036854774799__9223370311919910393__node__1__9223372036438563958__2__1"; + minMaxPrimaryterm = TranslogTransferMetadata.getMinMaxPrimaryTermFromFilename(nodeIdWithSeparator); + minPrimaryTerm = 2L; + maxPrimaryTerm = 7L; + assertEquals(minPrimaryTerm, minMaxPrimaryterm.v1()); + assertEquals(maxPrimaryTerm, minMaxPrimaryterm.v2()); + + // Malformed md filename + String malformedMdFileName = "metadata__9223372036854775800__9223372036854774799__9223370311919910393__node1__xyz__3qwe__1"; + assertNull(TranslogTransferMetadata.getMinMaxPrimaryTermFromFilename(malformedMdFileName)); + } + public void testIndexDeletionWithNoPinnedTimestampNoRecentMdFiles() throws Exception { RemoteStoreSettings.setPinnedTimestampsLookbackInterval(TimeValue.ZERO); ArrayList ops = new ArrayList<>(); @@ -604,11 +637,11 @@ public void testGetGenerationsToBeDeletedEmptyMetadataFilesNotToBeDeleted() thro List metadataFilesNotToBeDeleted = new ArrayList<>(); List metadataFilesToBeDeleted = List.of( // 4 to 7 - "metadata__9223372036438563903__9223372036854775800__9223370311919910398__31__9223372036854775803__1", + "metadata__9223372036854775806__9223372036854775800__9223370311919910398__31__9223372036854775803__1__1", // 17 to 37 - "metadata__9223372036438563903__9223372036854775770__9223370311919910398__31__9223372036854775790__1", + "metadata__9223372036854775806__9223372036854775770__9223370311919910398__31__9223372036854775790__1__1", // 27 to 42 - "metadata__9223372036438563903__9223372036854775765__9223370311919910403__31__9223372036854775780__1" + "metadata__9223372036854775806__9223372036854775765__9223370311919910403__31__9223372036854775780__1__1" ); Set generations = ((RemoteFsTimestampAwareTranslog) translog).getGenerationsToBeDeleted( metadataFilesNotToBeDeleted, @@ -618,6 +651,7 @@ public void testGetGenerationsToBeDeletedEmptyMetadataFilesNotToBeDeleted() thro Set md1Generations = LongStream.rangeClosed(4, 7).boxed().collect(Collectors.toSet()); Set md2Generations = LongStream.rangeClosed(17, 37).boxed().collect(Collectors.toSet()); Set md3Generations = LongStream.rangeClosed(27, 42).boxed().collect(Collectors.toSet()); + assertTrue(generations.containsAll(md1Generations)); assertTrue(generations.containsAll(md2Generations)); assertTrue(generations.containsAll(md3Generations)); @@ -631,19 +665,19 @@ public void testGetGenerationsToBeDeletedEmptyMetadataFilesNotToBeDeleted() thro public void testGetGenerationsToBeDeleted() throws IOException { List metadataFilesNotToBeDeleted = List.of( // 1 to 4 - "metadata__9223372036438563903__9223372036854775803__9223370311919910398__31__9223372036854775806__1", + "metadata__9223372036854775806__9223372036854775803__9223370311919910398__31__9223372036854775806__1__1", // 26 to 30 - "metadata__9223372036438563903__9223372036854775777__9223370311919910398__31__9223372036854775781__1", + "metadata__9223372036854775806__9223372036854775777__9223370311919910398__31__9223372036854775781__1__1", // 42 to 100 - "metadata__9223372036438563903__9223372036854775707__9223370311919910403__31__9223372036854775765__1" + "metadata__9223372036854775806__9223372036854775707__9223370311919910403__31__9223372036854775765__1__1" ); List metadataFilesToBeDeleted = List.of( // 4 to 7 - "metadata__9223372036438563903__9223372036854775800__9223370311919910398__31__9223372036854775803__1", + "metadata__9223372036854775806__9223372036854775800__9223370311919910398__31__9223372036854775803__1__1", // 17 to 37 - "metadata__9223372036438563903__9223372036854775770__9223370311919910398__31__9223372036854775790__1", + "metadata__9223372036854775806__9223372036854775770__9223370311919910398__31__9223372036854775790__1__1", // 27 to 42 - "metadata__9223372036438563903__9223372036854775765__9223370311919910403__31__9223372036854775780__1" + "metadata__9223372036854775806__9223372036854775765__9223370311919910403__31__9223372036854775780__1__1" ); Set generations = ((RemoteFsTimestampAwareTranslog) translog).getGenerationsToBeDeleted( metadataFilesNotToBeDeleted, @@ -653,6 +687,7 @@ public void testGetGenerationsToBeDeleted() throws IOException { Set md1Generations = LongStream.rangeClosed(5, 7).boxed().collect(Collectors.toSet()); Set md2Generations = LongStream.rangeClosed(17, 25).boxed().collect(Collectors.toSet()); Set md3Generations = LongStream.rangeClosed(31, 41).boxed().collect(Collectors.toSet()); + assertTrue(generations.containsAll(md1Generations)); assertTrue(generations.containsAll(md2Generations)); assertTrue(generations.containsAll(md3Generations)); @@ -783,49 +818,49 @@ public void testGetMinMaxTranslogGenerationFromMetadataFile() throws IOException assertEquals( new Tuple<>(701L, 1008L), translog.getMinMaxTranslogGenerationFromMetadataFile( - "metadata__9223372036438563903__9223372036854774799__9223370311919910393__31__9223372036854775106__1", + "metadata__9223372036438563903__9223372036854774799__9223370311919910393__31__9223372036854775106__1__1", translogTransferManager ) ); assertEquals( new Tuple<>(4L, 7L), translog.getMinMaxTranslogGenerationFromMetadataFile( - "metadata__9223372036438563903__9223372036854775800__9223370311919910398__31__9223372036854775803__1", + "metadata__9223372036438563903__9223372036854775800__9223370311919910398__31__9223372036854775803__2__1", translogTransferManager ) ); assertEquals( new Tuple<>(106L, 106L), translog.getMinMaxTranslogGenerationFromMetadataFile( - "metadata__9223372036438563903__9223372036854775701__9223370311919910403__31__9223372036854775701__1", + "metadata__9223372036438563903__9223372036854775701__9223370311919910403__31__9223372036854775701__3__1", translogTransferManager ) ); assertEquals( new Tuple<>(4573L, 99964L), translog.getMinMaxTranslogGenerationFromMetadataFile( - "metadata__9223372036438563903__9223372036854675843__9223370311919910408__31__9223372036854771234__1", + "metadata__9223372036438563903__9223372036854675843__9223370311919910408__31__9223372036854771234__4__1", translogTransferManager ) ); assertEquals( new Tuple<>(1L, 4L), translog.getMinMaxTranslogGenerationFromMetadataFile( - "metadata__9223372036438563903__9223372036854775803__9223370311919910413__31__9223372036854775806__1", + "metadata__9223372036438563903__9223372036854775803__9223370311919910413__31__9223372036854775806__5__1", translogTransferManager ) ); assertEquals( new Tuple<>(2474L, 3462L), translog.getMinMaxTranslogGenerationFromMetadataFile( - "metadata__9223372036438563903__9223372036854772345__9223370311919910429__31__9223372036854773333__1", + "metadata__9223372036438563903__9223372036854772345__9223370311919910429__31__9223372036854773333__6__1", translogTransferManager ) ); assertEquals( new Tuple<>(5807L, 7917L), translog.getMinMaxTranslogGenerationFromMetadataFile( - "metadata__9223372036438563903__9223372036854767890__9223370311919910434__31__9223372036854770000__1", + "metadata__9223372036438563903__9223372036854767890__9223370311919910434__31__9223372036854770000__7__1", translogTransferManager ) ); @@ -859,4 +894,93 @@ public void testGetMinMaxTranslogGenerationFromMetadataFile() throws IOException verify(translogTransferManager).readMetadata("metadata__9223372036438563903__9223372036854774799__9223370311919910393__31__1"); verify(translogTransferManager).readMetadata("metadata__9223372036438563903__9223372036854775800__9223370311919910398__31__1"); } + + public void testDeleteStaleRemotePrimaryTerms() throws IOException { + TranslogTransferManager translogTransferManager = mock(TranslogTransferManager.class); + + List metadataFiles = List.of( + // PT 4 to 9 + "metadata__9223372036854775798__9223372036854774799__9223370311919910393__node1__9223372036438563958__4__1", + // PT 2 to 7 + "metadata__9223372036854775800__9223372036854774799__9223370311919910393__node1__9223372036438563958__2__1", + // PT 2 to 6 + "metadata__9223372036854775801__9223372036854774799__9223370311919910393__node1__9223372036438563958__2__1" + ); + + Logger staticLogger = LogManager.getLogger(RemoteFsTimestampAwareTranslogTests.class); + when(translogTransferManager.listPrimaryTermsInRemote()).thenReturn(Set.of(1L, 2L, 3L, 4L)); + AtomicLong minPrimaryTermInRemote = new AtomicLong(Long.MAX_VALUE); + RemoteFsTimestampAwareTranslog.deleteStaleRemotePrimaryTerms( + metadataFiles, + translogTransferManager, + new HashMap<>(), + minPrimaryTermInRemote, + staticLogger + ); + verify(translogTransferManager).deletePrimaryTermsAsync(2L); + assertEquals(2, minPrimaryTermInRemote.get()); + + RemoteFsTimestampAwareTranslog.deleteStaleRemotePrimaryTerms( + metadataFiles, + translogTransferManager, + new HashMap<>(), + minPrimaryTermInRemote, + staticLogger + ); + // This means there are no new invocations of deletePrimaryTermAsync + verify(translogTransferManager, times(1)).deletePrimaryTermsAsync(anyLong()); + } + + public void testDeleteStaleRemotePrimaryTermsNoPrimaryTermInRemote() throws IOException { + TranslogTransferManager translogTransferManager = mock(TranslogTransferManager.class); + + List metadataFiles = List.of( + // PT 4 to 9 + "metadata__9223372036854775798__9223372036854774799__9223370311919910393__node1__9223372036438563958__4__1", + // PT 2 to 7 + "metadata__9223372036854775800__9223372036854774799__9223370311919910393__node1__9223372036438563958__2__1", + // PT 2 to 6 + "metadata__9223372036854775801__9223372036854774799__9223370311919910393__node1__9223372036438563958__2__1" + ); + + Logger staticLogger = LogManager.getLogger(RemoteFsTimestampAwareTranslogTests.class); + when(translogTransferManager.listPrimaryTermsInRemote()).thenReturn(Set.of()); + AtomicLong minPrimaryTermInRemote = new AtomicLong(Long.MAX_VALUE); + RemoteFsTimestampAwareTranslog.deleteStaleRemotePrimaryTerms( + metadataFiles, + translogTransferManager, + new HashMap<>(), + minPrimaryTermInRemote, + staticLogger + ); + verify(translogTransferManager, times(0)).deletePrimaryTermsAsync(anyLong()); + assertEquals(Long.MAX_VALUE, minPrimaryTermInRemote.get()); + } + + public void testDeleteStaleRemotePrimaryTermsPrimaryTermInRemoteIsBigger() throws IOException { + TranslogTransferManager translogTransferManager = mock(TranslogTransferManager.class); + + List metadataFiles = List.of( + // PT 4 to 9 + "metadata__9223372036854775798__9223372036854774799__9223370311919910393__node1__9223372036438563958__4__1", + // PT 2 to 7 + "metadata__9223372036854775800__9223372036854774799__9223370311919910393__node1__9223372036438563958__2__1", + // PT 2 to 6 + "metadata__9223372036854775801__9223372036854774799__9223370311919910393__node1__9223372036438563958__2__1" + ); + + Logger staticLogger = LogManager.getLogger(RemoteFsTimestampAwareTranslogTests.class); + when(translogTransferManager.listPrimaryTermsInRemote()).thenReturn(Set.of(2L, 3L, 4L)); + AtomicLong minPrimaryTermInRemote = new AtomicLong(Long.MAX_VALUE); + RemoteFsTimestampAwareTranslog.deleteStaleRemotePrimaryTerms( + metadataFiles, + translogTransferManager, + new HashMap<>(), + minPrimaryTermInRemote, + staticLogger + ); + verify(translogTransferManager, times(0)).deletePrimaryTermsAsync(anyLong()); + assertEquals(2, minPrimaryTermInRemote.get()); + } + } diff --git a/server/src/test/java/org/opensearch/index/translog/transfer/TranslogTransferManagerTests.java b/server/src/test/java/org/opensearch/index/translog/transfer/TranslogTransferManagerTests.java index 8605043ddd5b5..ed0d6b7d50706 100644 --- a/server/src/test/java/org/opensearch/index/translog/transfer/TranslogTransferManagerTests.java +++ b/server/src/test/java/org/opensearch/index/translog/transfer/TranslogTransferManagerTests.java @@ -628,7 +628,7 @@ public void testMetadataConflict() throws InterruptedException { String mdFilename = tm.getFileName(); long count = mdFilename.chars().filter(ch -> ch == METADATA_SEPARATOR.charAt(0)).count(); // There should not be any `_` in mdFile name as it is used a separator . - assertEquals(12, count); + assertEquals(14, count); Thread.sleep(1); TranslogTransferMetadata tm2 = new TranslogTransferMetadata(1, 1, 1, 2, "node--2"); String mdFilename2 = tm2.getFileName(); diff --git a/server/src/test/java/org/opensearch/indices/replication/SegmentReplicatorTests.java b/server/src/test/java/org/opensearch/indices/replication/SegmentReplicatorTests.java new file mode 100644 index 0000000000000..7acee449a1b46 --- /dev/null +++ b/server/src/test/java/org/opensearch/indices/replication/SegmentReplicatorTests.java @@ -0,0 +1,163 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.indices.replication; + +import org.apache.lucene.store.IOContext; +import org.opensearch.OpenSearchCorruptionException; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.common.lucene.Lucene; +import org.opensearch.common.settings.Settings; +import org.opensearch.core.action.ActionListener; +import org.opensearch.index.engine.NRTReplicationEngineFactory; +import org.opensearch.index.replication.TestReplicationSource; +import org.opensearch.index.shard.IndexShard; +import org.opensearch.index.shard.IndexShardTestCase; +import org.opensearch.index.store.StoreFileMetadata; +import org.opensearch.indices.replication.checkpoint.ReplicationCheckpoint; +import org.opensearch.indices.replication.common.CopyState; +import org.opensearch.indices.replication.common.ReplicationType; +import org.opensearch.threadpool.ThreadPool; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.function.BiConsumer; + +import org.mockito.Mockito; + +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.when; + +public class SegmentReplicatorTests extends IndexShardTestCase { + + private static final Settings settings = Settings.builder() + .put(IndexMetadata.SETTING_REPLICATION_TYPE, ReplicationType.SEGMENT) + .build(); + + public void testStartReplicationWithoutSourceFactory() { + ThreadPool threadpool = mock(ThreadPool.class); + ExecutorService mock = mock(ExecutorService.class); + when(threadpool.generic()).thenReturn(mock); + SegmentReplicator segmentReplicator = new SegmentReplicator(threadpool); + + IndexShard shard = mock(IndexShard.class); + segmentReplicator.startReplication(shard); + Mockito.verifyNoInteractions(mock); + } + + public void testStartReplicationRunsSuccessfully() throws Exception { + final IndexShard replica = newStartedShard(false, settings, new NRTReplicationEngineFactory()); + final IndexShard primary = newStartedShard(true, settings, new NRTReplicationEngineFactory()); + + // index and copy segments to replica. + int numDocs = randomIntBetween(10, 20); + for (int i = 0; i < numDocs; i++) { + indexDoc(primary, "_doc", Integer.toString(i)); + } + primary.refresh("test"); + + SegmentReplicator segmentReplicator = spy(new SegmentReplicator(threadPool)); + SegmentReplicationSourceFactory factory = mock(SegmentReplicationSourceFactory.class); + when(factory.get(replica)).thenReturn(new TestReplicationSource() { + @Override + public void getCheckpointMetadata( + long replicationId, + ReplicationCheckpoint checkpoint, + ActionListener listener + ) { + resolveCheckpointListener(listener, primary); + } + + @Override + public void getSegmentFiles( + long replicationId, + ReplicationCheckpoint checkpoint, + List filesToFetch, + IndexShard indexShard, + BiConsumer fileProgressTracker, + ActionListener listener + ) { + try { + Lucene.cleanLuceneIndex(indexShard.store().directory()); + Map segmentMetadataMap = primary.getSegmentMetadataMap(); + for (String file : segmentMetadataMap.keySet()) { + indexShard.store().directory().copyFrom(primary.store().directory(), file, file, IOContext.DEFAULT); + } + listener.onResponse(new GetSegmentFilesResponse(new ArrayList<>(segmentMetadataMap.values()))); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + }); + segmentReplicator.setSourceFactory(factory); + segmentReplicator.startReplication(replica); + assertBusy(() -> assertDocCount(replica, numDocs)); + closeShards(primary, replica); + } + + public void testReplicationFails() throws Exception { + allowShardFailures(); + final IndexShard replica = newStartedShard(false, settings, new NRTReplicationEngineFactory()); + final IndexShard primary = newStartedShard(true, settings, new NRTReplicationEngineFactory()); + + SegmentReplicator segmentReplicator = spy(new SegmentReplicator(threadPool)); + SegmentReplicationSourceFactory factory = mock(SegmentReplicationSourceFactory.class); + when(factory.get(replica)).thenReturn(new TestReplicationSource() { + @Override + public void getCheckpointMetadata( + long replicationId, + ReplicationCheckpoint checkpoint, + ActionListener listener + ) { + resolveCheckpointListener(listener, primary); + } + + @Override + public void getSegmentFiles( + long replicationId, + ReplicationCheckpoint checkpoint, + List filesToFetch, + IndexShard indexShard, + BiConsumer fileProgressTracker, + ActionListener listener + ) { + OpenSearchCorruptionException corruptIndexException = new OpenSearchCorruptionException("test"); + try { + indexShard.store().markStoreCorrupted(corruptIndexException); + } catch (IOException e) { + throw new RuntimeException(e); + } + listener.onFailure(corruptIndexException); + } + }); + // assert shard failure on corruption + AtomicBoolean failureCallbackTriggered = new AtomicBoolean(false); + replica.addShardFailureCallback((ig) -> failureCallbackTriggered.set(true)); + segmentReplicator.setSourceFactory(factory); + segmentReplicator.startReplication(replica); + assertBusy(() -> assertTrue(failureCallbackTriggered.get())); + closeShards(primary, replica); + } + + protected void resolveCheckpointListener(ActionListener listener, IndexShard primary) { + try (final CopyState copyState = new CopyState(primary)) { + listener.onResponse( + new CheckpointInfoResponse(copyState.getCheckpoint(), copyState.getMetadataMap(), copyState.getInfosBytes()) + ); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } + +} diff --git a/server/src/test/java/org/opensearch/repositories/IndexIdTests.java b/server/src/test/java/org/opensearch/repositories/IndexIdTests.java index 3b719d287aa9b..bcd75f9a47ad8 100644 --- a/server/src/test/java/org/opensearch/repositories/IndexIdTests.java +++ b/server/src/test/java/org/opensearch/repositories/IndexIdTests.java @@ -86,7 +86,7 @@ public void testEqualsAndHashCode() { public void testSerialization() throws IOException { IndexId indexId = new IndexId(randomAlphaOfLength(8), UUIDs.randomBase64UUID(), randomIntBetween(0, 2)); BytesStreamOutput out = new BytesStreamOutput(); - out.setVersion(Version.CURRENT); + out.setVersion(Version.V_2_17_0); indexId.writeTo(out); assertEquals(indexId, new IndexId(out.bytes().streamInput())); } diff --git a/server/src/test/java/org/opensearch/search/DefaultSearchContextTests.java b/server/src/test/java/org/opensearch/search/DefaultSearchContextTests.java index 491a0377ab32e..7e213218eb97b 100644 --- a/server/src/test/java/org/opensearch/search/DefaultSearchContextTests.java +++ b/server/src/test/java/org/opensearch/search/DefaultSearchContextTests.java @@ -168,9 +168,8 @@ public void testPreProcess() throws Exception { when(indexCache.query()).thenReturn(queryCache); when(indexService.cache()).thenReturn(indexCache); QueryShardContext queryShardContext = mock(QueryShardContext.class); - when(indexService.newQueryShardContext(eq(shardId.id()), any(), any(), nullable(String.class), anyBoolean())).thenReturn( - queryShardContext - ); + when(indexService.newQueryShardContext(eq(shardId.id()), any(), any(), nullable(String.class), anyBoolean(), anyBoolean())) + .thenReturn(queryShardContext); MapperService mapperService = mock(MapperService.class); when(mapperService.hasNested()).thenReturn(randomBoolean()); when(indexService.mapperService()).thenReturn(mapperService); @@ -501,9 +500,8 @@ public void testClearQueryCancellationsOnClose() throws IOException { IndexService indexService = mock(IndexService.class); QueryShardContext queryShardContext = mock(QueryShardContext.class); - when(indexService.newQueryShardContext(eq(shardId.id()), any(), any(), nullable(String.class), anyBoolean())).thenReturn( - queryShardContext - ); + when(indexService.newQueryShardContext(eq(shardId.id()), any(), any(), nullable(String.class), anyBoolean(), anyBoolean())) + .thenReturn(queryShardContext); Settings settings = Settings.builder() .put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT) .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1) @@ -598,9 +596,8 @@ public void testSearchPathEvaluation() throws Exception { IndexService indexService = mock(IndexService.class); QueryShardContext queryShardContext = mock(QueryShardContext.class); - when(indexService.newQueryShardContext(eq(shardId.id()), any(), any(), nullable(String.class), anyBoolean())).thenReturn( - queryShardContext - ); + when(indexService.newQueryShardContext(eq(shardId.id()), any(), any(), nullable(String.class), anyBoolean(), anyBoolean())) + .thenReturn(queryShardContext); IndexMetadata indexMetadata = IndexMetadata.builder("index").settings(settings).build(); IndexSettings indexSettings = new IndexSettings(indexMetadata, Settings.EMPTY); @@ -830,9 +827,8 @@ public void testSearchPathEvaluationWithConcurrentSearchModeAsAuto() throws Exce IndexService indexService = mock(IndexService.class); QueryShardContext queryShardContext = mock(QueryShardContext.class); - when(indexService.newQueryShardContext(eq(shardId.id()), any(), any(), nullable(String.class), anyBoolean())).thenReturn( - queryShardContext - ); + when(indexService.newQueryShardContext(eq(shardId.id()), any(), any(), nullable(String.class), anyBoolean(), anyBoolean())) + .thenReturn(queryShardContext); IndexMetadata indexMetadata = IndexMetadata.builder("index").settings(settings).build(); IndexSettings indexSettings = new IndexSettings(indexMetadata, Settings.EMPTY); diff --git a/server/src/test/java/org/opensearch/search/approximate/ApproximatePointRangeQueryTests.java b/server/src/test/java/org/opensearch/search/approximate/ApproximatePointRangeQueryTests.java index 7fb75e43da6ec..1eaea6a9e9d47 100644 --- a/server/src/test/java/org/opensearch/search/approximate/ApproximatePointRangeQueryTests.java +++ b/server/src/test/java/org/opensearch/search/approximate/ApproximatePointRangeQueryTests.java @@ -127,9 +127,9 @@ public void testApproximateRangeWithSizeUnderDefault() throws IOException { } doc.add(new LongPoint("point", scratch)); iw.addDocument(doc); - if (i % 15 == 0) iw.flush(); } iw.flush(); + iw.forceMerge(1); try (IndexReader reader = iw.getReader()) { try { long lower = 0; @@ -168,6 +168,7 @@ public void testApproximateRangeWithSizeOverDefault() throws IOException { iw.addDocument(doc); } iw.flush(); + iw.forceMerge(1); try (IndexReader reader = iw.getReader()) { try { long lower = 0; @@ -185,7 +186,7 @@ protected String toString(int dimension, byte[] value) { }; IndexSearcher searcher = new IndexSearcher(reader); TopDocs topDocs = searcher.search(approximateQuery, 11000); - assertEquals(topDocs.totalHits, new TotalHits(11001, TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO)); + assertEquals(topDocs.totalHits, new TotalHits(11000, TotalHits.Relation.EQUAL_TO)); } catch (IOException e) { throw new RuntimeException(e); } @@ -213,6 +214,7 @@ public void testApproximateRangeShortCircuit() throws IOException { if (i % 10 == 0) iw.flush(); } iw.flush(); + iw.forceMerge(1); try (IndexReader reader = iw.getReader()) { try { long lower = 0; @@ -258,6 +260,7 @@ public void testApproximateRangeShortCircuitAscSort() throws IOException { iw.addDocument(doc); } iw.flush(); + iw.forceMerge(1); try (IndexReader reader = iw.getReader()) { try { long lower = 0; @@ -284,12 +287,12 @@ protected String toString(int dimension, byte[] value) { assertNotEquals(topDocs.totalHits, topDocs1.totalHits); assertEquals(topDocs.totalHits, new TotalHits(10, TotalHits.Relation.EQUAL_TO)); assertEquals(topDocs1.totalHits, new TotalHits(21, TotalHits.Relation.EQUAL_TO)); - assertEquals(topDocs.scoreDocs[0].doc, 0); - assertEquals(topDocs.scoreDocs[1].doc, 1); - assertEquals(topDocs.scoreDocs[2].doc, 2); - assertEquals(topDocs.scoreDocs[3].doc, 3); - assertEquals(topDocs.scoreDocs[4].doc, 4); - assertEquals(topDocs.scoreDocs[5].doc, 5); + assertEquals(topDocs.scoreDocs[0].doc, topDocs1.scoreDocs[0].doc); + assertEquals(topDocs.scoreDocs[1].doc, topDocs1.scoreDocs[1].doc); + assertEquals(topDocs.scoreDocs[2].doc, topDocs1.scoreDocs[2].doc); + assertEquals(topDocs.scoreDocs[3].doc, topDocs1.scoreDocs[3].doc); + assertEquals(topDocs.scoreDocs[4].doc, topDocs1.scoreDocs[4].doc); + assertEquals(topDocs.scoreDocs[5].doc, topDocs1.scoreDocs[5].doc); } catch (IOException e) { throw new RuntimeException(e); diff --git a/test/framework/src/main/java/org/opensearch/action/support/replication/ClusterStateCreationUtils.java b/test/framework/src/main/java/org/opensearch/action/support/replication/ClusterStateCreationUtils.java index 8650500df8e95..0c4e871b1330c 100644 --- a/test/framework/src/main/java/org/opensearch/action/support/replication/ClusterStateCreationUtils.java +++ b/test/framework/src/main/java/org/opensearch/action/support/replication/ClusterStateCreationUtils.java @@ -63,6 +63,7 @@ import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_CREATION_DATE; import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_REPLICAS; +import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_SEARCH_REPLICAS; import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_SHARDS; import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_VERSION_CREATED; import static org.opensearch.test.OpenSearchTestCase.randomFrom; @@ -325,7 +326,18 @@ public static ClusterState stateWithAssignedPrimariesAndOneReplica(String index, * Creates cluster state with several indexes, shards and replicas and all shards STARTED. */ public static ClusterState stateWithAssignedPrimariesAndReplicas(String[] indices, int numberOfShards, int numberOfReplicas) { + return stateWithAssignedPrimariesAndReplicas(indices, numberOfShards, numberOfReplicas, 0); + } + /** + * Creates cluster state with several indexes, shards and replicas and all shards STARTED. + */ + public static ClusterState stateWithAssignedPrimariesAndReplicas( + String[] indices, + int numberOfShards, + int numberOfReplicas, + int numberOfSearchReplicas + ) { int numberOfDataNodes = numberOfReplicas + 1; DiscoveryNodes.Builder discoBuilder = DiscoveryNodes.builder(); for (int i = 0; i < numberOfDataNodes + 1; i++) { @@ -347,6 +359,7 @@ public static ClusterState stateWithAssignedPrimariesAndReplicas(String[] indice .put(SETTING_VERSION_CREATED, Version.CURRENT) .put(SETTING_NUMBER_OF_SHARDS, numberOfShards) .put(SETTING_NUMBER_OF_REPLICAS, numberOfReplicas) + .put(SETTING_NUMBER_OF_SEARCH_REPLICAS, numberOfSearchReplicas) .put(SETTING_CREATION_DATE, System.currentTimeMillis()) ) .build(); @@ -363,6 +376,19 @@ public static ClusterState stateWithAssignedPrimariesAndReplicas(String[] indice TestShardRouting.newShardRouting(index, i, newNode(replica + 1).getId(), null, false, ShardRoutingState.STARTED) ); } + for (int replica = numberOfReplicas; replica < numberOfSearchReplicas + numberOfReplicas; replica++) { + indexShardRoutingBuilder.addShard( + TestShardRouting.newShardRouting( + new ShardId(index, IndexMetadata.INDEX_UUID_NA_VALUE, i), + newNode(replica + 1).getId(), + null, + false, + true, + ShardRoutingState.STARTED, + null + ) + ); + } indexRoutingTableBuilder.addIndexShard(indexShardRoutingBuilder.build()); } routingTableBuilder.add(indexRoutingTableBuilder.build()); diff --git a/test/framework/src/main/java/org/opensearch/index/mapper/FieldTypeTestCase.java b/test/framework/src/main/java/org/opensearch/index/mapper/FieldTypeTestCase.java index 7ed0da8509fab..5d85844f3218d 100644 --- a/test/framework/src/main/java/org/opensearch/index/mapper/FieldTypeTestCase.java +++ b/test/framework/src/main/java/org/opensearch/index/mapper/FieldTypeTestCase.java @@ -46,16 +46,18 @@ /** Base test case for subclasses of MappedFieldType */ public abstract class FieldTypeTestCase extends OpenSearchTestCase { - public static final QueryShardContext MOCK_QSC = createMockQueryShardContext(true); - public static final QueryShardContext MOCK_QSC_DISALLOW_EXPENSIVE = createMockQueryShardContext(false); + public static final QueryShardContext MOCK_QSC = createMockQueryShardContext(true, false); + public static final QueryShardContext MOCK_QSC_DISALLOW_EXPENSIVE = createMockQueryShardContext(false, false); + public static final QueryShardContext MOCK_QSC_ENABLE_INDEX_DOC_VALUES = createMockQueryShardContext(true, true); protected QueryShardContext randomMockShardContext() { return randomFrom(MOCK_QSC, MOCK_QSC_DISALLOW_EXPENSIVE); } - static QueryShardContext createMockQueryShardContext(boolean allowExpensiveQueries) { + static QueryShardContext createMockQueryShardContext(boolean allowExpensiveQueries, boolean keywordIndexOrDocValuesEnabled) { QueryShardContext queryShardContext = mock(QueryShardContext.class); when(queryShardContext.allowExpensiveQueries()).thenReturn(allowExpensiveQueries); + when(queryShardContext.keywordFieldIndexOrDocValuesEnabled()).thenReturn(keywordIndexOrDocValuesEnabled); return queryShardContext; } diff --git a/test/framework/src/main/java/org/opensearch/index/shard/IndexShardTestUtils.java b/test/framework/src/main/java/org/opensearch/index/shard/IndexShardTestUtils.java index abf8f2a4da6c1..50a1751546293 100644 --- a/test/framework/src/main/java/org/opensearch/index/shard/IndexShardTestUtils.java +++ b/test/framework/src/main/java/org/opensearch/index/shard/IndexShardTestUtils.java @@ -21,6 +21,7 @@ import java.util.Map; public class IndexShardTestUtils { + public static final String MOCK_STATE_REPO_NAME = "state-test-repo"; public static final String MOCK_SEGMENT_REPO_NAME = "segment-test-repo"; public static final String MOCK_TLOG_REPO_NAME = "tlog-test-repo"; @@ -37,6 +38,7 @@ public static DiscoveryNode getFakeDiscoNode(String id) { public static DiscoveryNode getFakeRemoteEnabledNode(String id) { Map remoteNodeAttributes = new HashMap(); + remoteNodeAttributes.put(RemoteStoreNodeAttribute.REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY, MOCK_STATE_REPO_NAME); remoteNodeAttributes.put(RemoteStoreNodeAttribute.REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY, MOCK_SEGMENT_REPO_NAME); remoteNodeAttributes.put(RemoteStoreNodeAttribute.REMOTE_STORE_TRANSLOG_REPOSITORY_NAME_ATTRIBUTE_KEY, MOCK_TLOG_REPO_NAME); return new DiscoveryNode(