diff --git a/docs/reference/commands/node-tool.asciidoc b/docs/reference/commands/node-tool.asciidoc index ed810a4dac014..d6342ee7810f7 100644 --- a/docs/reference/commands/node-tool.asciidoc +++ b/docs/reference/commands/node-tool.asciidoc @@ -3,9 +3,9 @@ The `elasticsearch-node` command enables you to perform certain unsafe operations on a node that are only possible while it is shut down. This command -allows you to adjust the <> of a node and may be able to -recover some data after a disaster or start a node even if it is incompatible -with the data on disk. +allows you to adjust the <> of a node, unsafely edit cluster +settings and may be able to recover some data after a disaster or start a node +even if it is incompatible with the data on disk. [float] === Synopsis @@ -20,13 +20,17 @@ bin/elasticsearch-node repurpose|unsafe-bootstrap|detach-cluster|override-versio [float] === Description -This tool has four modes: +This tool has five modes: * `elasticsearch-node repurpose` can be used to delete unwanted data from a node if it used to be a <> or a <> but has been repurposed not to have one or other of these roles. +* `elasticsearch-node remove-settings` can be used to remove persistent settings + from the cluster state in case where it contains incompatible settings that + prevent the cluster from forming. + * `elasticsearch-node unsafe-bootstrap` can be used to perform _unsafe cluster bootstrapping_. It forces one of the nodes to form a brand-new cluster on its own, using its local copy of the cluster metadata. @@ -76,6 +80,26 @@ The tool provides a summary of the data to be deleted and asks for confirmation before making any changes. You can get detailed information about the affected indices and shards by passing the verbose (`-v`) option. +[float] +==== Removing persistent cluster settings + +There may be situations where a node contains persistent cluster +settings that prevent the cluster from forming. Since the cluster cannot form, +it is not possible to remove these settings using the +<> API. + +The `elasticsearch-node remove-settings` tool allows you to forcefully remove +those persistent settings from the on-disk cluster state. The tool takes a +list of settings as parameters that should be removed, and also supports +wildcard patterns. + +The intended use is: + +* Stop the node +* Run `elasticsearch-node remove-settings name-of-setting-to-remove` on the node +* Repeat for all other master-eligible nodes +* Start the nodes + [float] ==== Recovering data after a disaster @@ -143,9 +167,9 @@ If there is at least one remaining master-eligible node, but it is not possible to restart a majority of them, then the `elasticsearch-node unsafe-bootstrap` command will unsafely override the cluster's <> as if performing another -<>. +<>. The target node can then form a new cluster on its own by using -the cluster metadata held locally on the target node. +the cluster metadata held locally on the target node. [WARNING] These steps can lead to arbitrary data loss since the target node may not hold the latest cluster @@ -290,6 +314,9 @@ it can join a different cluster. `override-version`:: Overwrites the version number stored in the data path so that a node can start despite being incompatible with the on-disk data. +`remove-settings`:: Forcefully removes the provided persistent cluster settings +from the on-disk cluster state. + `--ordinal `:: If there is <> then this specifies which node to target. Defaults to `0`, meaning to use the first node in the data path. @@ -350,6 +377,40 @@ Confirm [y/N] y Node successfully repurposed to no-master and no-data. ---- +[float] +==== Removing persistent cluster settings + +If your nodes contain persistent cluster settings that prevent the cluster +from forming, i.e., can't be removed using the <> API, +you can run the following commands to remove one or more cluster settings. + +[source,txt] +---- +node$ ./bin/elasticsearch-node remove-settings xpack.monitoring.exporters.my_exporter.host + + WARNING: Elasticsearch MUST be stopped before running this tool. + +The following settings will be removed: +xpack.monitoring.exporters.my_exporter.host: "10.1.2.3" + +You should only run this tool if you have incompatible settings in the +cluster state that prevent the cluster from forming. +This tool can cause data loss and its use should be your last resort. + +Do you want to proceed? + +Confirm [y/N] y + +Settings were successfully removed from the cluster state +---- + +You can also use wildcards to remove multiple settings, for example using + +[source,txt] +---- +node$ ./bin/elasticsearch-node remove-settings xpack.monitoring.* +---- + [float] ==== Unsafe cluster bootstrapping diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/CoordinationState.java b/server/src/main/java/org/elasticsearch/cluster/coordination/CoordinationState.java index 00aebe85ec424..a84891fc8388a 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/CoordinationState.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/CoordinationState.java @@ -25,6 +25,8 @@ import org.elasticsearch.cluster.metadata.MetaData; import org.elasticsearch.cluster.node.DiscoveryNode; +import java.io.Closeable; +import java.io.IOException; import java.util.Collection; import java.util.Collections; import java.util.HashMap; @@ -444,15 +446,14 @@ public void invariant() { assert publishVotes.isEmpty() || electionWon(); } - public void close() { + public void close() throws IOException { persistedState.close(); } /** * Pluggable persistence layer for {@link CoordinationState}. - * */ - public interface PersistedState { + public interface PersistedState extends Closeable { /** * Returns the current term @@ -511,7 +512,8 @@ default void markLastAcceptedStateAsCommitted() { } } - default void close() {} + default void close() throws IOException { + } } /** diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/Coordinator.java b/server/src/main/java/org/elasticsearch/cluster/coordination/Coordinator.java index 7ccb2abf59d51..6ad7d25eb8ffd 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/Coordinator.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/Coordinator.java @@ -75,6 +75,7 @@ import org.elasticsearch.transport.TransportResponse.Empty; import org.elasticsearch.transport.TransportService; +import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; @@ -732,7 +733,7 @@ protected void doStop() { } @Override - protected void doClose() { + protected void doClose() throws IOException { final CoordinationState coordinationState = this.coordinationState.get(); if (coordinationState != null) { // This looks like a race that might leak an unclosed CoordinationState if it's created while execution is here, but this method diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/DetachClusterCommand.java b/server/src/main/java/org/elasticsearch/cluster/coordination/DetachClusterCommand.java index dff7ae5a2ee03..2101cf5a13531 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/DetachClusterCommand.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/DetachClusterCommand.java @@ -18,11 +18,12 @@ */ package org.elasticsearch.cluster.coordination; +import joptsimple.OptionSet; import org.elasticsearch.cli.Terminal; -import org.elasticsearch.cluster.metadata.Manifest; +import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.cluster.metadata.MetaData; -import org.elasticsearch.common.collect.Tuple; import org.elasticsearch.env.Environment; +import org.elasticsearch.gateway.PersistedClusterStateService; import java.io.IOException; import java.nio.file.Path; @@ -48,14 +49,22 @@ public DetachClusterCommand() { @Override - protected void processNodePaths(Terminal terminal, Path[] dataPaths, Environment env) throws IOException { - final Tuple manifestMetaDataTuple = loadMetaData(terminal, dataPaths); - final Manifest manifest = manifestMetaDataTuple.v1(); - final MetaData metaData = manifestMetaDataTuple.v2(); + protected void processNodePaths(Terminal terminal, Path[] dataPaths, int nodeLockId, OptionSet options, Environment env) + throws IOException { + final PersistedClusterStateService persistedClusterStateService = createPersistedClusterStateService(dataPaths); + + terminal.println(Terminal.Verbosity.VERBOSE, "Loading cluster state"); + final ClusterState oldClusterState = loadTermAndClusterState(persistedClusterStateService, env).v2(); + final ClusterState newClusterState = ClusterState.builder(oldClusterState) + .metaData(updateMetaData(oldClusterState.metaData())).build(); + terminal.println(Terminal.Verbosity.VERBOSE, + "[old cluster state = " + oldClusterState + ", new cluster state = " + newClusterState + "]"); confirm(terminal, CONFIRMATION_MSG); - writeNewMetaData(terminal, manifest, updateCurrentTerm(), metaData, updateMetaData(metaData), dataPaths); + try (PersistedClusterStateService.Writer writer = persistedClusterStateService.createWriter()) { + writer.writeFullStateAndCommit(updateCurrentTerm(), newClusterState); + } terminal.println(NODE_DETACHED_MSG); } diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/ElasticsearchNodeCommand.java b/server/src/main/java/org/elasticsearch/cluster/coordination/ElasticsearchNodeCommand.java index 00e3eb2e2411f..cd6178154a247 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/ElasticsearchNodeCommand.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/ElasticsearchNodeCommand.java @@ -27,37 +27,48 @@ import org.elasticsearch.ElasticsearchException; import org.elasticsearch.cli.EnvironmentAwareCommand; import org.elasticsearch.cli.Terminal; -import org.elasticsearch.cluster.metadata.Manifest; -import org.elasticsearch.cluster.metadata.MetaData; +import org.elasticsearch.cli.UserException; +import org.elasticsearch.cluster.ClusterModule; +import org.elasticsearch.cluster.ClusterName; +import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.common.collect.Tuple; +import org.elasticsearch.common.util.BigArrays; import org.elasticsearch.common.xcontent.NamedXContentRegistry; import org.elasticsearch.env.Environment; import org.elasticsearch.env.NodeEnvironment; +import org.elasticsearch.env.NodeMetaData; +import org.elasticsearch.gateway.PersistedClusterStateService; +import org.elasticsearch.indices.IndicesModule; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; import java.util.Arrays; import java.util.Objects; +import java.util.function.Function; +import java.util.stream.Collectors; +import java.util.stream.Stream; public abstract class ElasticsearchNodeCommand extends EnvironmentAwareCommand { private static final Logger logger = LogManager.getLogger(ElasticsearchNodeCommand.class); protected static final String DELIMITER = "------------------------------------------------------------------------\n"; - static final String STOP_WARNING_MSG = DELIMITER + "\n" + " WARNING: Elasticsearch MUST be stopped before running this tool." + "\n"; protected static final String FAILED_TO_OBTAIN_NODE_LOCK_MSG = "failed to lock node's directory, is Elasticsearch still running?"; - static final String NO_NODE_FOLDER_FOUND_MSG = "no node folder is found in data folder(s), node has not been started yet?"; - static final String NO_MANIFEST_FILE_FOUND_MSG = "no manifest file is found, do you run pre 7.0 Elasticsearch?"; - protected static final String GLOBAL_GENERATION_MISSING_MSG = - "no metadata is referenced from the manifest file, cluster has never been bootstrapped?"; - static final String NO_GLOBAL_METADATA_MSG = "failed to find global metadata, metadata corrupted?"; - static final String WRITE_METADATA_EXCEPTION_MSG = "exception occurred when writing new metadata to disk"; protected static final String ABORTED_BY_USER_MSG = "aborted by user"; final OptionSpec nodeOrdinalOption; + static final String NO_NODE_FOLDER_FOUND_MSG = "no node folder is found in data folder(s), node has not been started yet?"; + static final String NO_NODE_METADATA_FOUND_MSG = "no node meta data is found, node has not been started yet?"; + protected static final String CS_MISSING_MSG = + "cluster state is empty, cluster has never been bootstrapped?"; + + protected static final NamedXContentRegistry namedXContentRegistry = new NamedXContentRegistry( + Stream.of(ClusterModule.getNamedXWriteables().stream(), IndicesModule.getNamedXContents().stream()) + .flatMap(Function.identity()) + .collect(Collectors.toList())); public ElasticsearchNodeCommand(String description) { super(description); @@ -65,7 +76,33 @@ public ElasticsearchNodeCommand(String description) { .withRequiredArg().ofType(Integer.class); } - protected void processNodePathsWithLock(Terminal terminal, OptionSet options, Environment env) throws IOException { + public static PersistedClusterStateService createPersistedClusterStateService(Path[] dataPaths) throws IOException { + final NodeMetaData nodeMetaData = PersistedClusterStateService.nodeMetaData(dataPaths); + if (nodeMetaData == null) { + throw new ElasticsearchException(NO_NODE_METADATA_FOUND_MSG); + } + + String nodeId = nodeMetaData.nodeId(); + return new PersistedClusterStateService(dataPaths, nodeId, namedXContentRegistry, BigArrays.NON_RECYCLING_INSTANCE, true); + } + + public static ClusterState clusterState(Environment environment, PersistedClusterStateService.OnDiskState onDiskState) { + return ClusterState.builder(ClusterName.CLUSTER_NAME_SETTING.get(environment.settings())) + .version(onDiskState.lastAcceptedVersion) + .metaData(onDiskState.metaData) + .build(); + } + + public static Tuple loadTermAndClusterState(PersistedClusterStateService psf, + Environment env) throws IOException { + final PersistedClusterStateService.OnDiskState bestOnDiskState = psf.loadBestOnDiskState(); + if (bestOnDiskState.empty()) { + throw new ElasticsearchException(CS_MISSING_MSG); + } + return Tuple.tuple(bestOnDiskState.currentTerm, clusterState(env, bestOnDiskState)); + } + + protected void processNodePaths(Terminal terminal, OptionSet options, Environment env) throws IOException, UserException { terminal.println(Terminal.Verbosity.VERBOSE, "Obtaining lock for node"); Integer nodeOrdinal = nodeOrdinalOption.value(options); if (nodeOrdinal == null) { @@ -77,32 +114,12 @@ protected void processNodePathsWithLock(Terminal terminal, OptionSet options, En if (dataPaths.length == 0) { throw new ElasticsearchException(NO_NODE_FOLDER_FOUND_MSG); } - processNodePaths(terminal, dataPaths, env); + processNodePaths(terminal, dataPaths, nodeOrdinal, options, env); } catch (LockObtainFailedException e) { throw new ElasticsearchException(FAILED_TO_OBTAIN_NODE_LOCK_MSG, e); } } - protected Tuple loadMetaData(Terminal terminal, Path[] dataPaths) throws IOException { - terminal.println(Terminal.Verbosity.VERBOSE, "Loading manifest file"); - final Manifest manifest = Manifest.FORMAT.loadLatestState(logger, NamedXContentRegistry.EMPTY, dataPaths); - - if (manifest == null) { - throw new ElasticsearchException(NO_MANIFEST_FILE_FOUND_MSG); - } - if (manifest.isGlobalGenerationMissing()) { - throw new ElasticsearchException(GLOBAL_GENERATION_MISSING_MSG); - } - terminal.println(Terminal.Verbosity.VERBOSE, "Loading global metadata file"); - final MetaData metaData = MetaData.FORMAT_PRESERVE_CUSTOMS.loadGeneration( - logger, NamedXContentRegistry.EMPTY, manifest.getGlobalGeneration(), dataPaths); - if (metaData == null) { - throw new ElasticsearchException(NO_GLOBAL_METADATA_MSG + " [generation = " + manifest.getGlobalGeneration() + "]"); - } - - return Tuple.tuple(manifest, metaData); - } - protected void confirm(Terminal terminal, String msg) { terminal.println(msg); String text = terminal.readText("Confirm [y/N] "); @@ -112,10 +129,10 @@ protected void confirm(Terminal terminal, String msg) { } @Override - protected final void execute(Terminal terminal, OptionSet options, Environment env) throws Exception { + public final void execute(Terminal terminal, OptionSet options, Environment env) throws Exception { terminal.println(STOP_WARNING_MSG); if (validateBeforeLock(terminal, env)) { - processNodePathsWithLock(terminal, options, env); + processNodePaths(terminal, options, env); } } @@ -134,33 +151,11 @@ protected boolean validateBeforeLock(Terminal terminal, Environment env) { * Process the paths. Locks for the paths is held during this method invocation. * @param terminal the terminal to use for messages * @param dataPaths the paths of the node to process + * @param options the command line options * @param env the env of the node to process */ - protected abstract void processNodePaths(Terminal terminal, Path[] dataPaths, Environment env) throws IOException; - - - protected void writeNewMetaData(Terminal terminal, Manifest oldManifest, long newCurrentTerm, - MetaData oldMetaData, MetaData newMetaData, Path[] dataPaths) { - try { - terminal.println(Terminal.Verbosity.VERBOSE, - "[clusterUUID = " + oldMetaData.clusterUUID() + ", committed = " + oldMetaData.clusterUUIDCommitted() + "] => " + - "[clusterUUID = " + newMetaData.clusterUUID() + ", committed = " + newMetaData.clusterUUIDCommitted() + "]"); - terminal.println(Terminal.Verbosity.VERBOSE, "New coordination metadata is " + newMetaData.coordinationMetaData()); - terminal.println(Terminal.Verbosity.VERBOSE, "Writing new global metadata to disk"); - long newGeneration = MetaData.FORMAT.write(newMetaData, dataPaths); - Manifest newManifest = new Manifest(newCurrentTerm, oldManifest.getClusterStateVersion(), newGeneration, - oldManifest.getIndexGenerations()); - terminal.println(Terminal.Verbosity.VERBOSE, "New manifest is " + newManifest); - terminal.println(Terminal.Verbosity.VERBOSE, "Writing new manifest file to disk"); - Manifest.FORMAT.writeAndCleanup(newManifest, dataPaths); - terminal.println(Terminal.Verbosity.VERBOSE, "Cleaning up old metadata"); - MetaData.FORMAT.cleanupOldFiles(newGeneration, dataPaths); - } catch (Exception e) { - terminal.println(Terminal.Verbosity.VERBOSE, "Cleaning up new metadata"); - MetaData.FORMAT.cleanupOldFiles(oldManifest.getGlobalGeneration(), dataPaths); - throw new ElasticsearchException(WRITE_METADATA_EXCEPTION_MSG, e); - } - } + protected abstract void processNodePaths(Terminal terminal, Path[] dataPaths, int nodeLockId, OptionSet options, Environment env) + throws IOException, UserException; protected NodeEnvironment.NodePath[] toNodePaths(Path[] dataPaths) { return Arrays.stream(dataPaths).map(ElasticsearchNodeCommand::createNodePath).toArray(NodeEnvironment.NodePath[]::new); diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/NodeToolCli.java b/server/src/main/java/org/elasticsearch/cluster/coordination/NodeToolCli.java index ff054e71eee3a..29c03dbb8c46c 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/NodeToolCli.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/NodeToolCli.java @@ -41,6 +41,7 @@ public NodeToolCli() { subcommands.put("unsafe-bootstrap", new UnsafeBootstrapMasterCommand()); subcommands.put("detach-cluster", new DetachClusterCommand()); subcommands.put("override-version", new OverrideNodeVersionCommand()); + subcommands.put("remove-settings", new RemoveSettingsCommand()); } public static void main(String[] args) throws Exception { diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/RemoveSettingsCommand.java b/server/src/main/java/org/elasticsearch/cluster/coordination/RemoveSettingsCommand.java new file mode 100644 index 0000000000000..41d9e164b3115 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/RemoveSettingsCommand.java @@ -0,0 +1,104 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.cluster.coordination; + +import joptsimple.OptionSet; +import joptsimple.OptionSpec; +import org.elasticsearch.cli.ExitCodes; +import org.elasticsearch.cli.Terminal; +import org.elasticsearch.cli.UserException; +import org.elasticsearch.cluster.ClusterState; +import org.elasticsearch.cluster.metadata.MetaData; +import org.elasticsearch.common.collect.Tuple; +import org.elasticsearch.common.regex.Regex; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.env.Environment; +import org.elasticsearch.gateway.PersistedClusterStateService; + +import java.io.IOException; +import java.nio.file.Path; +import java.util.List; + +public class RemoveSettingsCommand extends ElasticsearchNodeCommand { + + static final String SETTINGS_REMOVED_MSG = "Settings were successfully removed from the cluster state"; + static final String CONFIRMATION_MSG = + DELIMITER + + "\n" + + "You should only run this tool if you have incompatible settings in the\n" + + "cluster state that prevent the cluster from forming.\n" + + "This tool can cause data loss and its use should be your last resort.\n" + + "\n" + + "Do you want to proceed?\n"; + + private final OptionSpec arguments; + + public RemoveSettingsCommand() { + super("Removes persistent settings from the cluster state"); + arguments = parser.nonOptions("setting names"); + } + + @Override + protected void processNodePaths(Terminal terminal, Path[] dataPaths, int nodeLockId, OptionSet options, Environment env) + throws IOException, UserException { + final List settingsToRemove = arguments.values(options); + if (settingsToRemove.isEmpty()) { + throw new UserException(ExitCodes.USAGE, "Must supply at least one setting to remove"); + } + + final PersistedClusterStateService persistedClusterStateService = createPersistedClusterStateService(dataPaths); + + terminal.println(Terminal.Verbosity.VERBOSE, "Loading cluster state"); + final Tuple termAndClusterState = loadTermAndClusterState(persistedClusterStateService, env); + final ClusterState oldClusterState = termAndClusterState.v2(); + final Settings oldPersistentSettings = oldClusterState.metaData().persistentSettings(); + terminal.println(Terminal.Verbosity.VERBOSE, "persistent settings: " + oldPersistentSettings); + final Settings.Builder newPersistentSettingsBuilder = Settings.builder().put(oldPersistentSettings); + for (String settingToRemove : settingsToRemove) { + boolean matched = false; + for (String settingKey : oldPersistentSettings.keySet()) { + if (Regex.simpleMatch(settingToRemove, settingKey)) { + newPersistentSettingsBuilder.remove(settingKey); + if (matched == false) { + terminal.println("The following settings will be removed:"); + } + matched = true; + terminal.println(settingKey + ": " + oldPersistentSettings.get(settingKey)); + } + } + if (matched == false) { + throw new UserException(ExitCodes.USAGE, + "No persistent cluster settings matching [" + settingToRemove + "] were found on this node"); + } + } + final ClusterState newClusterState = ClusterState.builder(oldClusterState) + .metaData(MetaData.builder(oldClusterState.metaData()).persistentSettings(newPersistentSettingsBuilder.build()).build()) + .build(); + terminal.println(Terminal.Verbosity.VERBOSE, + "[old cluster state = " + oldClusterState + ", new cluster state = " + newClusterState + "]"); + + confirm(terminal, CONFIRMATION_MSG); + + try (PersistedClusterStateService.Writer writer = persistedClusterStateService.createWriter()) { + writer.writeFullStateAndCommit(termAndClusterState.v1(), newClusterState); + } + + terminal.println(SETTINGS_REMOVED_MSG); + } +} diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/UnsafeBootstrapMasterCommand.java b/server/src/main/java/org/elasticsearch/cluster/coordination/UnsafeBootstrapMasterCommand.java index 05bc0116c13c6..66bcdf5c8f5b5 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/UnsafeBootstrapMasterCommand.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/UnsafeBootstrapMasterCommand.java @@ -18,19 +18,17 @@ */ package org.elasticsearch.cluster.coordination; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; +import joptsimple.OptionSet; import org.elasticsearch.ElasticsearchException; import org.elasticsearch.cli.Terminal; -import org.elasticsearch.cluster.metadata.Manifest; +import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.cluster.metadata.MetaData; import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.collect.Tuple; import org.elasticsearch.common.settings.Setting; import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.common.xcontent.NamedXContentRegistry; import org.elasticsearch.env.Environment; -import org.elasticsearch.env.NodeMetaData; +import org.elasticsearch.gateway.PersistedClusterStateService; import org.elasticsearch.node.Node; import java.io.IOException; @@ -40,8 +38,6 @@ public class UnsafeBootstrapMasterCommand extends ElasticsearchNodeCommand { - private static final Logger logger = LogManager.getLogger(UnsafeBootstrapMasterCommand.class); - static final String CLUSTER_STATE_TERM_VERSION_MSG_FORMAT = "Current node cluster state (term, version) pair is (%s, %s)"; static final String CONFIRMATION_MSG = @@ -58,8 +54,6 @@ public class UnsafeBootstrapMasterCommand extends ElasticsearchNodeCommand { static final String NOT_MASTER_NODE_MSG = "unsafe-bootstrap tool can only be run on master eligible node"; - static final String NO_NODE_METADATA_FOUND_MSG = "no node meta data is found, node has not been started yet?"; - static final String EMPTY_LAST_COMMITTED_VOTING_CONFIG_MSG = "last committed voting voting configuration is empty, cluster has never been bootstrapped?"; @@ -83,49 +77,55 @@ protected boolean validateBeforeLock(Terminal terminal, Environment env) { return true; } - protected void processNodePaths(Terminal terminal, Path[] dataPaths, Environment env) throws IOException { - terminal.println(Terminal.Verbosity.VERBOSE, "Loading node metadata"); - final NodeMetaData nodeMetaData = NodeMetaData.FORMAT.loadLatestState(logger, NamedXContentRegistry.EMPTY, dataPaths); - if (nodeMetaData == null) { - throw new ElasticsearchException(NO_NODE_METADATA_FOUND_MSG); - } + protected void processNodePaths(Terminal terminal, Path[] dataPaths, int nodeLockId, OptionSet options, Environment env) + throws IOException { + final PersistedClusterStateService persistedClusterStateService = createPersistedClusterStateService(dataPaths); - String nodeId = nodeMetaData.nodeId(); - terminal.println(Terminal.Verbosity.VERBOSE, "Current nodeId is " + nodeId); + final Tuple state = loadTermAndClusterState(persistedClusterStateService, env); + final ClusterState oldClusterState = state.v2(); + + final MetaData metaData = oldClusterState.metaData(); - final Tuple manifestMetaDataTuple = loadMetaData(terminal, dataPaths); - final Manifest manifest = manifestMetaDataTuple.v1(); - final MetaData metaData = manifestMetaDataTuple.v2(); final CoordinationMetaData coordinationMetaData = metaData.coordinationMetaData(); if (coordinationMetaData == null || - coordinationMetaData.getLastCommittedConfiguration() == null || - coordinationMetaData.getLastCommittedConfiguration().isEmpty()) { + coordinationMetaData.getLastCommittedConfiguration() == null || + coordinationMetaData.getLastCommittedConfiguration().isEmpty()) { throw new ElasticsearchException(EMPTY_LAST_COMMITTED_VOTING_CONFIG_MSG); } terminal.println(String.format(Locale.ROOT, CLUSTER_STATE_TERM_VERSION_MSG_FORMAT, coordinationMetaData.term(), - metaData.version())); - - confirm(terminal, CONFIRMATION_MSG); + metaData.version())); CoordinationMetaData newCoordinationMetaData = CoordinationMetaData.builder(coordinationMetaData) - .clearVotingConfigExclusions() - .lastAcceptedConfiguration(new CoordinationMetaData.VotingConfiguration(Collections.singleton(nodeId))) - .lastCommittedConfiguration(new CoordinationMetaData.VotingConfiguration(Collections.singleton(nodeId))) - .build(); + .clearVotingConfigExclusions() + .lastAcceptedConfiguration(new CoordinationMetaData.VotingConfiguration( + Collections.singleton(persistedClusterStateService.getNodeId()))) + .lastCommittedConfiguration(new CoordinationMetaData.VotingConfiguration( + Collections.singleton(persistedClusterStateService.getNodeId()))) + .build(); Settings persistentSettings = Settings.builder() - .put(metaData.persistentSettings()) - .put(UNSAFE_BOOTSTRAP.getKey(), true) - .build(); + .put(metaData.persistentSettings()) + .put(UNSAFE_BOOTSTRAP.getKey(), true) + .build(); MetaData newMetaData = MetaData.builder(metaData) - .clusterUUID(MetaData.UNKNOWN_CLUSTER_UUID) - .generateClusterUuidIfNeeded() - .clusterUUIDCommitted(true) - .persistentSettings(persistentSettings) - .coordinationMetaData(newCoordinationMetaData) - .build(); - - writeNewMetaData(terminal, manifest, manifest.getCurrentTerm(), metaData, newMetaData, dataPaths); + .clusterUUID(MetaData.UNKNOWN_CLUSTER_UUID) + .generateClusterUuidIfNeeded() + .clusterUUIDCommitted(true) + .persistentSettings(persistentSettings) + .coordinationMetaData(newCoordinationMetaData) + .build(); + + final ClusterState newClusterState = ClusterState.builder(oldClusterState) + .metaData(newMetaData).build(); + + terminal.println(Terminal.Verbosity.VERBOSE, + "[old cluster state = " + oldClusterState + ", new cluster state = " + newClusterState + "]"); + + confirm(terminal, CONFIRMATION_MSG); + + try (PersistedClusterStateService.Writer writer = persistedClusterStateService.createWriter()) { + writer.writeFullStateAndCommit(state.v1(), newClusterState); + } terminal.println(MASTER_NODE_BOOTSTRAPPED_MSG); } diff --git a/server/src/main/java/org/elasticsearch/cluster/metadata/MetaDataCreateIndexService.java b/server/src/main/java/org/elasticsearch/cluster/metadata/MetaDataCreateIndexService.java index fc7d9fd17efb1..059b584e13ac1 100644 --- a/server/src/main/java/org/elasticsearch/cluster/metadata/MetaDataCreateIndexService.java +++ b/server/src/main/java/org/elasticsearch/cluster/metadata/MetaDataCreateIndexService.java @@ -687,7 +687,7 @@ private static IndexService validateActiveShardCountAndCreateIndexService(String "]: cannot be greater than number of shard copies [" + (tmpImd.getNumberOfReplicas() + 1) + "]"); } - return indicesService.createIndex(tmpImd, Collections.emptyList()); + return indicesService.createIndex(tmpImd, Collections.emptyList(), false); } private void validate(CreateIndexClusterStateUpdateRequest request, ClusterState state) { diff --git a/server/src/main/java/org/elasticsearch/cluster/metadata/MetaDataIndexAliasesService.java b/server/src/main/java/org/elasticsearch/cluster/metadata/MetaDataIndexAliasesService.java index 5efd4b6eae8bc..c6149682a203a 100644 --- a/server/src/main/java/org/elasticsearch/cluster/metadata/MetaDataIndexAliasesService.java +++ b/server/src/main/java/org/elasticsearch/cluster/metadata/MetaDataIndexAliasesService.java @@ -140,7 +140,7 @@ public ClusterState applyAliasActions(ClusterState currentState, Iterable np.path).toArray(Path[]::new); - - final Set nodeIds = new HashSet<>(); - for (final Path path : paths) { - final NodeMetaData metaData = NodeMetaData.FORMAT.loadLatestState(logger, NamedXContentRegistry.EMPTY, path); - if (metaData != null) { - nodeIds.add(metaData.nodeId()); - } - } - if (nodeIds.size() > 1) { - throw new IllegalStateException( - "data paths " + Arrays.toString(paths) + " belong to multiple nodes with IDs " + nodeIds); - } - - NodeMetaData metaData = NodeMetaData.FORMAT.loadLatestState(logger, NamedXContentRegistry.EMPTY, paths); + NodeMetaData metaData = PersistedClusterStateService.nodeMetaData(paths); if (metaData == null) { - assert nodeIds.isEmpty() : nodeIds; - metaData = new NodeMetaData(generateNodeId(settings), Version.CURRENT); - } else { - assert nodeIds.equals(Collections.singleton(metaData.nodeId())) : nodeIds + " doesn't match " + metaData; - metaData = metaData.upgradeToCurrentVersion(); + // load legacy metadata + final Set nodeIds = new HashSet<>(); + for (final Path path : paths) { + final NodeMetaData oldStyleMetaData = NodeMetaData.FORMAT.loadLatestState(logger, NamedXContentRegistry.EMPTY, path); + if (oldStyleMetaData != null) { + nodeIds.add(oldStyleMetaData.nodeId()); + } + } + if (nodeIds.size() > 1) { + throw new IllegalStateException( + "data paths " + Arrays.toString(paths) + " belong to multiple nodes with IDs " + nodeIds); + } + // load legacy metadata + final NodeMetaData legacyMetaData = NodeMetaData.FORMAT.loadLatestState(logger, NamedXContentRegistry.EMPTY, paths); + if (legacyMetaData == null) { + assert nodeIds.isEmpty() : nodeIds; + metaData = new NodeMetaData(generateNodeId(settings), Version.CURRENT); + } else { + assert nodeIds.equals(Collections.singleton(legacyMetaData.nodeId())) : nodeIds + " doesn't match " + legacyMetaData; + metaData = legacyMetaData; + } } - - // we write again to make sure all paths have the latest state file + metaData = metaData.upgradeToCurrentVersion(); assert metaData.nodeVersion().equals(Version.CURRENT) : metaData.nodeVersion() + " != " + Version.CURRENT; - NodeMetaData.FORMAT.writeAndCleanup(metaData, paths); return metaData; } diff --git a/server/src/main/java/org/elasticsearch/env/NodeRepurposeCommand.java b/server/src/main/java/org/elasticsearch/env/NodeRepurposeCommand.java index f3e099f8938ec..c07f32d2512a4 100644 --- a/server/src/main/java/org/elasticsearch/env/NodeRepurposeCommand.java +++ b/server/src/main/java/org/elasticsearch/env/NodeRepurposeCommand.java @@ -18,42 +18,41 @@ */ package org.elasticsearch.env; +import com.carrotsearch.hppc.cursors.ObjectObjectCursor; import joptsimple.OptionParser; import joptsimple.OptionSet; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; import org.elasticsearch.ElasticsearchException; import org.elasticsearch.cli.Terminal; +import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.cluster.coordination.ElasticsearchNodeCommand; import org.elasticsearch.cluster.metadata.IndexMetaData; -import org.elasticsearch.cluster.metadata.Manifest; +import org.elasticsearch.cluster.metadata.MetaData; import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.common.xcontent.NamedXContentRegistry; +import org.elasticsearch.common.util.set.Sets; import org.elasticsearch.core.internal.io.IOUtils; -import org.elasticsearch.gateway.WriteStateException; +import org.elasticsearch.gateway.MetaDataStateFormat; +import org.elasticsearch.gateway.PersistedClusterStateService; import java.io.IOException; import java.nio.file.Path; import java.util.Arrays; import java.util.Collection; -import java.util.HashMap; import java.util.List; import java.util.Set; import java.util.stream.Collectors; +import java.util.stream.Stream; +import java.util.stream.StreamSupport; -public class NodeRepurposeCommand extends ElasticsearchNodeCommand { +import static org.elasticsearch.env.NodeEnvironment.INDICES_FOLDER; - private static final Logger logger = LogManager.getLogger(NodeRepurposeCommand.class); +public class NodeRepurposeCommand extends ElasticsearchNodeCommand { static final String ABORTED_BY_USER_MSG = ElasticsearchNodeCommand.ABORTED_BY_USER_MSG; static final String FAILED_TO_OBTAIN_NODE_LOCK_MSG = ElasticsearchNodeCommand.FAILED_TO_OBTAIN_NODE_LOCK_MSG; static final String NO_CLEANUP = "Node has node.data=true -> no clean up necessary"; static final String NO_DATA_TO_CLEAN_UP_FOUND = "No data to clean-up found"; static final String NO_SHARD_DATA_TO_CLEAN_UP_FOUND = "No shard data to clean-up found"; - static final String PRE_V7_MESSAGE = - "No manifest file found. If you were previously running this node on Elasticsearch version 6, please proceed.\n" + - "If this node was ever started on Elasticsearch version 7 or higher, it might mean metadata corruption, please abort."; public NodeRepurposeCommand() { super("Repurpose this node to another master/data role, cleaning up any excess persisted data"); @@ -75,17 +74,18 @@ protected boolean validateBeforeLock(Terminal terminal, Environment env) { } @Override - protected void processNodePaths(Terminal terminal, Path[] dataPaths, Environment env) throws IOException { + protected void processNodePaths(Terminal terminal, Path[] dataPaths, int nodeLockId, OptionSet options, Environment env) + throws IOException { assert DiscoveryNode.isDataNode(env.settings()) == false; if (DiscoveryNode.isMasterNode(env.settings()) == false) { - processNoMasterNoDataNode(terminal, dataPaths); + processNoMasterNoDataNode(terminal, dataPaths, env); } else { - processMasterNoDataNode(terminal, dataPaths); + processMasterNoDataNode(terminal, dataPaths, env); } } - private void processNoMasterNoDataNode(Terminal terminal, Path[] dataPaths) throws IOException { + private void processNoMasterNoDataNode(Terminal terminal, Path[] dataPaths, Environment env) throws IOException { NodeEnvironment.NodePath[] nodePaths = toNodePaths(dataPaths); terminal.println(Terminal.Verbosity.VERBOSE, "Collecting shard data paths"); @@ -95,32 +95,36 @@ private void processNoMasterNoDataNode(Terminal terminal, Path[] dataPaths) thro List indexMetaDataPaths = NodeEnvironment.collectIndexMetaDataPaths(nodePaths); Set indexPaths = uniqueParentPaths(shardDataPaths, indexMetaDataPaths); - if (indexPaths.isEmpty()) { + + final PersistedClusterStateService persistedClusterStateService = createPersistedClusterStateService(dataPaths); + + final MetaData metaData = loadClusterState(terminal, env, persistedClusterStateService).metaData(); + if (indexPaths.isEmpty() && metaData.indices().isEmpty()) { terminal.println(Terminal.Verbosity.NORMAL, NO_DATA_TO_CLEAN_UP_FOUND); return; } - Set indexUUIDs = indexUUIDsFor(indexPaths); - outputVerboseInformation(terminal, nodePaths, indexPaths, indexUUIDs); + final Set indexUUIDs = Sets.union(indexUUIDsFor(indexPaths), + StreamSupport.stream(metaData.indices().values().spliterator(), false) + .map(imd -> imd.value.getIndexUUID()).collect(Collectors.toSet())); + + outputVerboseInformation(terminal, indexPaths, indexUUIDs, metaData); terminal.println(noMasterMessage(indexUUIDs.size(), shardDataPaths.size(), indexMetaDataPaths.size())); outputHowToSeeVerboseInformation(terminal); - final Manifest manifest = loadManifest(terminal, dataPaths); - terminal.println("Node is being re-purposed as no-master and no-data. Clean-up of index data will be performed."); confirm(terminal, "Do you want to proceed?"); - if (manifest != null) { - rewriteManifest(terminal, manifest, dataPaths); - } - - removePaths(terminal, indexPaths); + removePaths(terminal, indexPaths); // clean-up shard dirs + // clean-up all metadata dirs + MetaDataStateFormat.deleteMetaState(dataPaths); + IOUtils.rm(Stream.of(dataPaths).map(path -> path.resolve(INDICES_FOLDER)).toArray(Path[]::new)); terminal.println("Node successfully repurposed to no-master and no-data."); } - private void processMasterNoDataNode(Terminal terminal, Path[] dataPaths) throws IOException { + private void processMasterNoDataNode(Terminal terminal, Path[] dataPaths, Environment env) throws IOException { NodeEnvironment.NodePath[] nodePaths = toNodePaths(dataPaths); terminal.println(Terminal.Verbosity.VERBOSE, "Collecting shard data paths"); @@ -130,9 +134,14 @@ private void processMasterNoDataNode(Terminal terminal, Path[] dataPaths) throws return; } - Set indexPaths = uniqueParentPaths(shardDataPaths); - Set indexUUIDs = indexUUIDsFor(indexPaths); - outputVerboseInformation(terminal, nodePaths, shardDataPaths, indexUUIDs); + final PersistedClusterStateService persistedClusterStateService = createPersistedClusterStateService(dataPaths); + + final MetaData metaData = loadClusterState(terminal, env, persistedClusterStateService).metaData(); + + final Set indexPaths = uniqueParentPaths(shardDataPaths); + final Set indexUUIDs = indexUUIDsFor(indexPaths); + + outputVerboseInformation(terminal, shardDataPaths, indexUUIDs, metaData); terminal.println(shardMessage(shardDataPaths.size(), indexUUIDs.size())); outputHowToSeeVerboseInformation(terminal); @@ -140,18 +149,22 @@ private void processMasterNoDataNode(Terminal terminal, Path[] dataPaths) throws terminal.println("Node is being re-purposed as master and no-data. Clean-up of shard data will be performed."); confirm(terminal, "Do you want to proceed?"); - removePaths(terminal, shardDataPaths); + removePaths(terminal, shardDataPaths); // clean-up shard dirs terminal.println("Node successfully repurposed to master and no-data."); } - private void outputVerboseInformation(Terminal terminal, NodeEnvironment.NodePath[] nodePaths, - Collection pathsToCleanup, Set indexUUIDs) { + private ClusterState loadClusterState(Terminal terminal, Environment env, PersistedClusterStateService psf) throws IOException { + terminal.println(Terminal.Verbosity.VERBOSE, "Loading cluster state"); + return clusterState(env, psf.loadBestOnDiskState()); + } + + private void outputVerboseInformation(Terminal terminal, Collection pathsToCleanup, Set indexUUIDs, MetaData metaData) { if (terminal.isPrintable(Terminal.Verbosity.VERBOSE)) { terminal.println(Terminal.Verbosity.VERBOSE, "Paths to clean up:"); pathsToCleanup.forEach(p -> terminal.println(Terminal.Verbosity.VERBOSE, " " + p.toString())); terminal.println(Terminal.Verbosity.VERBOSE, "Indices affected:"); - indexUUIDs.forEach(uuid -> terminal.println(Terminal.Verbosity.VERBOSE, " " + toIndexName(nodePaths, uuid))); + indexUUIDs.forEach(uuid -> terminal.println(Terminal.Verbosity.VERBOSE, " " + toIndexName(uuid, metaData))); } } @@ -160,17 +173,15 @@ private void outputHowToSeeVerboseInformation(Terminal terminal) { terminal.println("Use -v to see list of paths and indices affected"); } } - private String toIndexName(NodeEnvironment.NodePath[] nodePaths, String uuid) { - Path[] indexPaths = new Path[nodePaths.length]; - for (int i = 0; i < nodePaths.length; i++) { - indexPaths[i] = nodePaths[i].resolve(uuid); - } - try { - IndexMetaData metaData = IndexMetaData.FORMAT.loadLatestState(logger, NamedXContentRegistry.EMPTY, indexPaths); - return metaData.getIndex().getName(); - } catch (Exception e) { - return "no name for uuid: " + uuid + ": " + e; + private String toIndexName(String uuid, MetaData metaData) { + if (metaData != null) { + for (ObjectObjectCursor indexMetaData : metaData.indices()) { + if (indexMetaData.value.getIndexUUID().equals(uuid)) { + return indexMetaData.value.getIndex().getName(); + } + } } + return "no name for uuid: " + uuid; } private Set indexUUIDsFor(Set indexPaths) { @@ -186,23 +197,6 @@ static String shardMessage(int shards, int indices) { return "Found " + shards + " shards in " + indices + " indices to clean up"; } - private void rewriteManifest(Terminal terminal, Manifest manifest, Path[] dataPaths) throws WriteStateException { - terminal.println(Terminal.Verbosity.VERBOSE, "Re-writing manifest"); - Manifest newManifest = new Manifest(manifest.getCurrentTerm(), manifest.getClusterStateVersion(), manifest.getGlobalGeneration(), - new HashMap<>()); - Manifest.FORMAT.writeAndCleanup(newManifest, dataPaths); - } - - private Manifest loadManifest(Terminal terminal, Path[] dataPaths) throws IOException { - terminal.println(Terminal.Verbosity.VERBOSE, "Loading manifest"); - final Manifest manifest = Manifest.FORMAT.loadLatestState(logger, NamedXContentRegistry.EMPTY, dataPaths); - - if (manifest == null) { - terminal.println(Terminal.Verbosity.SILENT, PRE_V7_MESSAGE); - } - return manifest; - } - private void removePaths(Terminal terminal, Collection paths) { terminal.println(Terminal.Verbosity.VERBOSE, "Removing data"); paths.forEach(this::removePath); diff --git a/server/src/main/java/org/elasticsearch/env/OverrideNodeVersionCommand.java b/server/src/main/java/org/elasticsearch/env/OverrideNodeVersionCommand.java index f50bdf081ef85..022271e0fc51b 100644 --- a/server/src/main/java/org/elasticsearch/env/OverrideNodeVersionCommand.java +++ b/server/src/main/java/org/elasticsearch/env/OverrideNodeVersionCommand.java @@ -19,21 +19,18 @@ package org.elasticsearch.env; import joptsimple.OptionParser; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; +import joptsimple.OptionSet; import org.elasticsearch.ElasticsearchException; import org.elasticsearch.Version; import org.elasticsearch.cli.Terminal; import org.elasticsearch.cluster.coordination.ElasticsearchNodeCommand; -import org.elasticsearch.common.xcontent.NamedXContentRegistry; +import org.elasticsearch.gateway.PersistedClusterStateService; import java.io.IOException; import java.nio.file.Path; import java.util.Arrays; public class OverrideNodeVersionCommand extends ElasticsearchNodeCommand { - private static final Logger logger = LogManager.getLogger(OverrideNodeVersionCommand.class); - private static final String TOO_NEW_MESSAGE = DELIMITER + "\n" + @@ -72,10 +69,10 @@ public OverrideNodeVersionCommand() { } @Override - protected void processNodePaths(Terminal terminal, Path[] dataPaths, Environment env) throws IOException { + protected void processNodePaths(Terminal terminal, Path[] dataPaths, int nodeLockId, OptionSet options, Environment env) + throws IOException { final Path[] nodePaths = Arrays.stream(toNodePaths(dataPaths)).map(p -> p.path).toArray(Path[]::new); - final NodeMetaData nodeMetaData - = new NodeMetaData.NodeMetaDataStateFormat(true).loadLatestState(logger, NamedXContentRegistry.EMPTY, nodePaths); + final NodeMetaData nodeMetaData = PersistedClusterStateService.nodeMetaData(nodePaths); if (nodeMetaData == null) { throw new ElasticsearchException(NO_METADATA_MESSAGE); } @@ -93,7 +90,7 @@ protected void processNodePaths(Terminal terminal, Path[] dataPaths, Environment .replace("V_NEW", nodeMetaData.nodeVersion().toString()) .replace("V_CUR", Version.CURRENT.toString())); - NodeMetaData.FORMAT.writeAndCleanup(new NodeMetaData(nodeMetaData.nodeId(), Version.CURRENT), nodePaths); + PersistedClusterStateService.overrideVersion(Version.CURRENT, dataPaths); terminal.println(SUCCESS_MESSAGE); } diff --git a/server/src/main/java/org/elasticsearch/gateway/GatewayMetaState.java b/server/src/main/java/org/elasticsearch/gateway/GatewayMetaState.java index f417d5078558f..69243b075bf02 100644 --- a/server/src/main/java/org/elasticsearch/gateway/GatewayMetaState.java +++ b/server/src/main/java/org/elasticsearch/gateway/GatewayMetaState.java @@ -22,14 +22,16 @@ import com.carrotsearch.hppc.cursors.ObjectObjectCursor; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; -import org.apache.logging.log4j.message.ParameterizedMessage; +import org.apache.lucene.store.AlreadyClosedException; import org.apache.lucene.util.SetOnce; import org.elasticsearch.ElasticsearchException; +import org.elasticsearch.ExceptionsHelper; import org.elasticsearch.Version; import org.elasticsearch.cluster.ClusterChangedEvent; import org.elasticsearch.cluster.ClusterName; import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.cluster.ClusterStateApplier; +import org.elasticsearch.cluster.coordination.CoordinationMetaData; import org.elasticsearch.cluster.coordination.CoordinationState.PersistedState; import org.elasticsearch.cluster.coordination.InMemoryPersistedState; import org.elasticsearch.cluster.metadata.IndexMetaData; @@ -42,20 +44,34 @@ import org.elasticsearch.common.collect.ImmutableOpenMap; import org.elasticsearch.common.collect.Tuple; import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.common.unit.TimeValue; +import org.elasticsearch.common.util.concurrent.AbstractRunnable; +import org.elasticsearch.common.util.concurrent.EsExecutors; +import org.elasticsearch.common.util.concurrent.EsRejectedExecutionException; +import org.elasticsearch.common.util.concurrent.EsThreadPoolExecutor; +import org.elasticsearch.core.internal.io.IOUtils; import org.elasticsearch.discovery.DiscoveryModule; -import org.elasticsearch.index.Index; +import org.elasticsearch.env.NodeMetaData; +import org.elasticsearch.node.Node; import org.elasticsearch.plugins.MetaDataUpgrader; +import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.transport.TransportService; +import java.io.Closeable; import java.io.IOException; +import java.io.UncheckedIOException; +import java.util.Collections; import java.util.HashMap; import java.util.Map; +import java.util.Objects; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicReference; import java.util.function.BiConsumer; import java.util.function.Consumer; import java.util.function.Function; import java.util.function.UnaryOperator; +import static org.elasticsearch.common.util.concurrent.EsExecutors.daemonThreadFactory; + /** * Loads (and maybe upgrades) cluster metadata at startup, and persistently stores cluster metadata for future restarts. * @@ -64,8 +80,7 @@ * ClusterState#metaData()} because it might be stale or incomplete. Master-eligible nodes must perform an election to find a complete and * non-stale state, and master-ineligible nodes receive the real cluster state from the elected master after joining the cluster. */ -public class GatewayMetaState { - private static final Logger logger = LogManager.getLogger(GatewayMetaState.class); +public class GatewayMetaState implements Closeable { // Set by calling start() private final SetOnce persistedState = new SetOnce<>(); @@ -82,55 +97,111 @@ public MetaData getMetaData() { public void start(Settings settings, TransportService transportService, ClusterService clusterService, MetaStateService metaStateService, MetaDataIndexUpgradeService metaDataIndexUpgradeService, - MetaDataUpgrader metaDataUpgrader) { + MetaDataUpgrader metaDataUpgrader, PersistedClusterStateService persistedClusterStateService) { assert persistedState.get() == null : "should only start once, but already have " + persistedState.get(); - final Tuple manifestClusterStateTuple; - try { - upgradeMetaData(settings, metaStateService, metaDataIndexUpgradeService, metaDataUpgrader); - manifestClusterStateTuple = loadStateAndManifest(ClusterName.CLUSTER_NAME_SETTING.get(settings), metaStateService); - } catch (IOException e) { - throw new ElasticsearchException("failed to load metadata", e); - } - final IncrementalClusterStateWriter incrementalClusterStateWriter - = new IncrementalClusterStateWriter(settings, clusterService.getClusterSettings(), metaStateService, - manifestClusterStateTuple.v1(), - prepareInitialClusterState(transportService, clusterService, manifestClusterStateTuple.v2()), - transportService.getThreadPool()::relativeTimeInMillis); - if (DiscoveryModule.DISCOVERY_TYPE_SETTING.get(settings).equals(DiscoveryModule.ZEN_DISCOVERY_TYPE)) { // only for tests that simulate mixed Zen1/Zen2 clusters, see Zen1IT - if (isMasterOrDataNode(settings)) { - clusterService.addLowPriorityApplier(new GatewayClusterApplier(incrementalClusterStateWriter)); + final Tuple manifestClusterStateTuple; + try { + NodeMetaData.FORMAT.writeAndCleanup(new NodeMetaData(persistedClusterStateService.getNodeId(), Version.CURRENT), + persistedClusterStateService.getDataPaths()); + manifestClusterStateTuple = metaStateService.loadFullState(); + } catch (IOException e) { + throw new UncheckedIOException(e); } - persistedState.set(new InMemoryPersistedState(manifestClusterStateTuple.v1().getCurrentTerm(), manifestClusterStateTuple.v2())); - } else if (DiscoveryNode.isMasterNode(settings) == false) { - if (DiscoveryNode.isDataNode(settings)) { - // Master-eligible nodes persist index metadata for all indices regardless of whether they hold any shards or not. It's - // vitally important to the safety of the cluster coordination system that master-eligible nodes persist this metadata when - // _accepting_ the cluster state (i.e. before it is committed). This persistence happens on the generic threadpool. - // - // In contrast, master-ineligible data nodes only persist the index metadata for shards that they hold. When all shards of - // an index are moved off such a node the IndicesStore is responsible for removing the corresponding index directory, - // including the metadata, and does so on the cluster applier thread. - // - // This presents a problem: if a shard is unassigned from a node and then reassigned back to it again then there is a race - // between the IndicesStore deleting the index folder and the CoordinationState concurrently trying to write the updated - // metadata into it. We could probably solve this with careful synchronization, but in fact there is no need. The persisted - // state on master-ineligible data nodes is mostly ignored - it's only there to support dangling index imports, which is - // inherently unsafe anyway. Thus we can safely delay metadata writes on master-ineligible data nodes until applying the - // cluster state, which is what this does: + final ClusterState clusterState = ClusterState.builder(ClusterName.CLUSTER_NAME_SETTING.get(settings)) + .version(manifestClusterStateTuple.v1().getClusterStateVersion()) + .metaData(manifestClusterStateTuple.v2()).build(); + + final IncrementalClusterStateWriter incrementalClusterStateWriter + = new IncrementalClusterStateWriter(settings, clusterService.getClusterSettings(), metaStateService, + manifestClusterStateTuple.v1(), + prepareInitialClusterState(transportService, clusterService, clusterState), + transportService.getThreadPool()::relativeTimeInMillis); + + if (DiscoveryNode.isMasterNode(settings) || DiscoveryNode.isDataNode(settings)) { clusterService.addLowPriorityApplier(new GatewayClusterApplier(incrementalClusterStateWriter)); } + persistedState.set(new InMemoryPersistedState(manifestClusterStateTuple.v1().getCurrentTerm(), clusterState)); + return; + } - // Master-ineligible nodes do not need to persist the cluster state when accepting it because they are not in the voting - // configuration, so it's ok if they have a stale or incomplete cluster state when restarted. We track the latest cluster state - // in memory instead. - persistedState.set(new InMemoryPersistedState(manifestClusterStateTuple.v1().getCurrentTerm(), manifestClusterStateTuple.v2())); + if (DiscoveryNode.isMasterNode(settings) || DiscoveryNode.isDataNode(settings)) { + try { + final PersistedClusterStateService.OnDiskState onDiskState = persistedClusterStateService.loadBestOnDiskState(); + + MetaData metaData = onDiskState.metaData; + long lastAcceptedVersion = onDiskState.lastAcceptedVersion; + long currentTerm = onDiskState.currentTerm; + + if (onDiskState.empty()) { + assert Version.CURRENT.major <= Version.V_7_0_0.major + 1 : + "legacy metadata loader is not needed anymore from v9 onwards"; + final Tuple legacyState = metaStateService.loadFullState(); + if (legacyState.v1().isEmpty() == false) { + metaData = legacyState.v2(); + lastAcceptedVersion = legacyState.v1().getClusterStateVersion(); + currentTerm = legacyState.v1().getCurrentTerm(); + } + } + + PersistedState persistedState = null; + boolean success = false; + try { + final ClusterState clusterState = prepareInitialClusterState(transportService, clusterService, + ClusterState.builder(ClusterName.CLUSTER_NAME_SETTING.get(settings)) + .version(lastAcceptedVersion) + .metaData(upgradeMetaDataForNode(metaData, metaDataIndexUpgradeService, metaDataUpgrader)) + .build()); + + if (DiscoveryNode.isMasterNode(settings)) { + persistedState = new LucenePersistedState(persistedClusterStateService, currentTerm, clusterState); + } else { + persistedState = new AsyncLucenePersistedState(settings, transportService.getThreadPool(), + new LucenePersistedState(persistedClusterStateService, currentTerm, clusterState)); + } + if (DiscoveryNode.isDataNode(settings)) { + metaStateService.unreferenceAll(); // unreference legacy files (only keep them for dangling indices functionality) + } else { + metaStateService.deleteAll(); // delete legacy files + } + // write legacy node metadata to prevent accidental downgrades from spawning empty cluster state + NodeMetaData.FORMAT.writeAndCleanup(new NodeMetaData(persistedClusterStateService.getNodeId(), Version.CURRENT), + persistedClusterStateService.getDataPaths()); + success = true; + } finally { + if (success == false) { + IOUtils.closeWhileHandlingException(persistedState); + } + } + + this.persistedState.set(persistedState); + } catch (IOException e) { + throw new ElasticsearchException("failed to load metadata", e); + } } else { - // Master-ineligible nodes must persist the cluster state when accepting it because they must reload the (complete, fresh) - // last-accepted cluster state when restarted. - persistedState.set(new GatewayPersistedState(incrementalClusterStateWriter)); + final long currentTerm = 0L; + final ClusterState clusterState = ClusterState.builder(ClusterName.CLUSTER_NAME_SETTING.get(settings)).build(); + if (persistedClusterStateService.getDataPaths().length > 0) { + // write empty cluster state just so that we have a persistent node id. There is no need to write out global metadata with + // cluster uuid as coordinating-only nodes do not snap into a cluster as they carry no state + try (PersistedClusterStateService.Writer persistenceWriter = persistedClusterStateService.createWriter()) { + persistenceWriter.writeFullStateAndCommit(currentTerm, clusterState); + } catch (IOException e) { + throw new ElasticsearchException("failed to load metadata", e); + } + try { + // delete legacy cluster state files + metaStateService.deleteAll(); + // write legacy node metadata to prevent downgrades from spawning empty cluster state + NodeMetaData.FORMAT.writeAndCleanup(new NodeMetaData(persistedClusterStateService.getNodeId(), Version.CURRENT), + persistedClusterStateService.getDataPaths()); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } + persistedState.set(new InMemoryPersistedState(currentTerm, clusterState)); } } @@ -147,67 +218,10 @@ ClusterState prepareInitialClusterState(TransportService transportService, Clust } // exposed so it can be overridden by tests - void upgradeMetaData(Settings settings, MetaStateService metaStateService, MetaDataIndexUpgradeService metaDataIndexUpgradeService, - MetaDataUpgrader metaDataUpgrader) throws IOException { - if (isMasterOrDataNode(settings)) { - try { - final Tuple metaStateAndData = metaStateService.loadFullState(); - final Manifest manifest = metaStateAndData.v1(); - final MetaData metaData = metaStateAndData.v2(); - - // We finished global state validation and successfully checked all indices for backward compatibility - // and found no non-upgradable indices, which means the upgrade can continue. - // Now it's safe to overwrite global and index metadata. - // We don't re-write metadata if it's not upgraded by upgrade plugins, because - // if there is manifest file, it means metadata is properly persisted to all data paths - // if there is no manifest file (upgrade from 6.x to 7.x) metadata might be missing on some data paths, - // but anyway we will re-write it as soon as we receive first ClusterState - final IncrementalClusterStateWriter.AtomicClusterStateWriter writer - = new IncrementalClusterStateWriter.AtomicClusterStateWriter(metaStateService, manifest); - final MetaData upgradedMetaData = upgradeMetaData(metaData, metaDataIndexUpgradeService, metaDataUpgrader); - - final long globalStateGeneration; - if (MetaData.isGlobalStateEquals(metaData, upgradedMetaData) == false) { - globalStateGeneration = writer.writeGlobalState("upgrade", upgradedMetaData); - } else { - globalStateGeneration = manifest.getGlobalGeneration(); - } - - Map indices = new HashMap<>(manifest.getIndexGenerations()); - for (IndexMetaData indexMetaData : upgradedMetaData) { - if (metaData.hasIndexMetaData(indexMetaData) == false) { - final long generation = writer.writeIndex("upgrade", indexMetaData); - indices.put(indexMetaData.getIndex(), generation); - } - } - - final Manifest newManifest = new Manifest(manifest.getCurrentTerm(), manifest.getClusterStateVersion(), - globalStateGeneration, indices); - writer.writeManifestAndCleanup("startup", newManifest); - } catch (Exception e) { - logger.error("failed to read or upgrade local state, exiting...", e); - throw e; - } - } - } - - private static Tuple loadStateAndManifest(ClusterName clusterName, - MetaStateService metaStateService) throws IOException { - final long startNS = System.nanoTime(); - final Tuple manifestAndMetaData = metaStateService.loadFullState(); - final Manifest manifest = manifestAndMetaData.v1(); - - final ClusterState clusterState = ClusterState.builder(clusterName) - .version(manifest.getClusterStateVersion()) - .metaData(manifestAndMetaData.v2()).build(); - - logger.debug("took {} to load state", TimeValue.timeValueMillis(TimeValue.nsecToMSec(System.nanoTime() - startNS))); - - return Tuple.tuple(manifest, clusterState); - } - - private static boolean isMasterOrDataNode(Settings settings) { - return DiscoveryNode.isMasterNode(settings) || DiscoveryNode.isDataNode(settings); + MetaData upgradeMetaDataForNode(MetaData metaData, + MetaDataIndexUpgradeService metaDataIndexUpgradeService, + MetaDataUpgrader metaDataUpgrader) { + return upgradeMetaData(metaData, metaDataIndexUpgradeService, metaDataUpgrader); } /** @@ -259,9 +273,10 @@ private static boolean applyPluginUpgraders(ImmutableOpenMap persistenceWriter = new AtomicReference<>(); + boolean writeNextStateFully; + + LucenePersistedState(PersistedClusterStateService persistedClusterStateService, long currentTerm, ClusterState lastAcceptedState) + throws IOException { + this.persistedClusterStateService = persistedClusterStateService; + this.currentTerm = currentTerm; + this.lastAcceptedState = lastAcceptedState; + // Write the whole state out to be sure it's fresh and using the latest format. Called during initialisation, so that + // (1) throwing an IOException is enough to halt the node, and + // (2) the index is currently empty since it was opened with IndexWriterConfig.OpenMode.CREATE + + // In the common case it's actually sufficient to commit() the existing state and not do any indexing. For instance, + // this is true if there's only one data path on this master node, and the commit we just loaded was already written out + // by this version of Elasticsearch. TODO TBD should we avoid indexing when possible? + final PersistedClusterStateService.Writer writer = persistedClusterStateService.createWriter(); + try { + writer.writeFullStateAndCommit(currentTerm, lastAcceptedState); + } catch (Exception e) { + try { + writer.close(); + } catch (Exception e2) { + e.addSuppressed(e2); + } + throw e; + } + persistenceWriter.set(writer); } @Override public long getCurrentTerm() { - return incrementalClusterStateWriter.getPreviousManifest().getCurrentTerm(); + return currentTerm; } @Override public ClusterState getLastAcceptedState() { - final ClusterState previousClusterState = incrementalClusterStateWriter.getPreviousClusterState(); - assert previousClusterState.nodes().getLocalNode() != null : "Cluster state is not fully built yet"; - return previousClusterState; + return lastAcceptedState; } @Override public void setCurrentTerm(long currentTerm) { try { - incrementalClusterStateWriter.setCurrentTerm(currentTerm); - } catch (WriteStateException e) { - logger.error(new ParameterizedMessage("Failed to set current term to {}", currentTerm), e); - e.rethrowAsErrorOrUncheckedException(); + if (writeNextStateFully) { + getWriterSafe().writeFullStateAndCommit(currentTerm, lastAcceptedState); + writeNextStateFully = false; + } else { + getWriterSafe().commit(currentTerm, lastAcceptedState.version()); + } + } catch (Exception e) { + handleExceptionOnWrite(e); } + this.currentTerm = currentTerm; } @Override public void setLastAcceptedState(ClusterState clusterState) { try { - incrementalClusterStateWriter.setIncrementalWrite( - incrementalClusterStateWriter.getPreviousClusterState().term() == clusterState.term()); - incrementalClusterStateWriter.updateClusterState(clusterState); - } catch (WriteStateException e) { - logger.error(new ParameterizedMessage("Failed to set last accepted state with version {}", clusterState.version()), e); - e.rethrowAsErrorOrUncheckedException(); + if (writeNextStateFully) { + getWriterSafe().writeFullStateAndCommit(currentTerm, clusterState); + writeNextStateFully = false; + } else { + if (clusterState.term() != lastAcceptedState.term()) { + assert clusterState.term() > lastAcceptedState.term() : clusterState.term() + " vs " + lastAcceptedState.term(); + // In a new currentTerm, we cannot compare the persisted metadata's lastAcceptedVersion to those in the new state, + // so it's simplest to write everything again. + getWriterSafe().writeFullStateAndCommit(currentTerm, clusterState); + } else { + // Within the same currentTerm, we _can_ use metadata versions to skip unnecessary writing. + getWriterSafe().writeIncrementalStateAndCommit(currentTerm, lastAcceptedState, clusterState); + } + } + } catch (Exception e) { + handleExceptionOnWrite(e); } + + lastAcceptedState = clusterState; } - } + private PersistedClusterStateService.Writer getWriterSafe() { + final PersistedClusterStateService.Writer writer = persistenceWriter.get(); + if (writer == null) { + throw new AlreadyClosedException("persisted state has been closed"); + } + if (writer.isOpen()) { + return writer; + } else { + try { + final PersistedClusterStateService.Writer newWriter = persistedClusterStateService.createWriter(); + if (persistenceWriter.compareAndSet(writer, newWriter)) { + return newWriter; + } else { + assert persistenceWriter.get() == null : "expected no concurrent calls to getWriterSafe"; + newWriter.close(); + throw new AlreadyClosedException("persisted state has been closed"); + } + } catch (Exception e) { + throw ExceptionsHelper.convertToRuntime(e); + } + } + } + + private void handleExceptionOnWrite(Exception e) { + writeNextStateFully = true; + throw ExceptionsHelper.convertToRuntime(e); + } + @Override + public void close() throws IOException { + IOUtils.close(persistenceWriter.getAndSet(null)); + } + } } diff --git a/server/src/main/java/org/elasticsearch/gateway/IncrementalClusterStateWriter.java b/server/src/main/java/org/elasticsearch/gateway/IncrementalClusterStateWriter.java index eb0d243d74fab..6ff97b6a9d2a5 100644 --- a/server/src/main/java/org/elasticsearch/gateway/IncrementalClusterStateWriter.java +++ b/server/src/main/java/org/elasticsearch/gateway/IncrementalClusterStateWriter.java @@ -33,7 +33,6 @@ import org.elasticsearch.index.Index; import java.util.ArrayList; -import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.List; @@ -53,9 +52,7 @@ public class IncrementalClusterStateWriter { private final MetaStateService metaStateService; - // On master-eligible nodes we call updateClusterState under the Coordinator's mutex; on master-ineligible data nodes we call - // updateClusterState on the (unique) cluster applier thread; on other nodes we never call updateClusterState. In all cases there's - // no need to synchronize access to these fields. + // We call updateClusterState on the (unique) cluster applier thread so there's no need to synchronize access to these fields. private Manifest previousManifest; private ClusterState previousClusterState; private final LongSupplier relativeTimeMillisSupplier; @@ -89,10 +86,6 @@ Manifest getPreviousManifest() { return previousManifest; } - ClusterState getPreviousClusterState() { - return previousClusterState; - } - void setIncrementalWrite(boolean incrementalWrite) { this.incrementalWrite = incrementalWrite; } @@ -206,38 +199,20 @@ static List resolveIndexMetaDataActions(Map pr return actions; } - private static Set getRelevantIndicesOnDataOnlyNode(ClusterState state) { - RoutingNode newRoutingNode = state.getRoutingNodes().node(state.nodes().getLocalNodeId()); + // exposed for tests + static Set getRelevantIndices(ClusterState state) { + assert state.nodes().getLocalNode().isDataNode(); + final RoutingNode newRoutingNode = state.getRoutingNodes().node(state.nodes().getLocalNodeId()); if (newRoutingNode == null) { throw new IllegalStateException("cluster state does not contain this node - cannot write index meta state"); } - Set indices = new HashSet<>(); - for (ShardRouting routing : newRoutingNode) { + final Set indices = new HashSet<>(); + for (final ShardRouting routing : newRoutingNode) { indices.add(routing.index()); } return indices; } - private static Set getRelevantIndicesForMasterEligibleNode(ClusterState state) { - Set relevantIndices = new HashSet<>(); - // we have to iterate over the metadata to make sure we also capture closed indices - for (IndexMetaData indexMetaData : state.metaData()) { - relevantIndices.add(indexMetaData.getIndex()); - } - return relevantIndices; - } - - // exposed for tests - static Set getRelevantIndices(ClusterState state) { - if (state.nodes().getLocalNode().isMasterNode()) { - return getRelevantIndicesForMasterEligibleNode(state); - } else if (state.nodes().getLocalNode().isDataNode()) { - return getRelevantIndicesOnDataOnlyNode(state); - } else { - return Collections.emptySet(); - } - } - /** * Action to perform with index metadata. */ diff --git a/server/src/main/java/org/elasticsearch/gateway/MetaStateService.java b/server/src/main/java/org/elasticsearch/gateway/MetaStateService.java index 5fb7852a1ccb1..35f4003a86722 100644 --- a/server/src/main/java/org/elasticsearch/gateway/MetaStateService.java +++ b/server/src/main/java/org/elasticsearch/gateway/MetaStateService.java @@ -69,7 +69,7 @@ public MetaStateService(NodeEnvironment nodeEnv, NamedXContentRegistry namedXCon * meta state with globalGeneration -1 and empty meta data is returned. * @throws IOException if some IOException when loading files occurs or there is no metadata referenced by manifest file. */ - Tuple loadFullState() throws IOException { + public Tuple loadFullState() throws IOException { final Manifest manifest = MANIFEST_FORMAT.loadLatestState(logger, namedXContentRegistry, nodeEnv.nodeDataPaths()); if (manifest == null) { return loadFullStateBWC(); @@ -275,28 +275,26 @@ public void cleanupIndex(Index index, long currentGeneration) { } /** - * Writes index metadata and updates manifest file accordingly. - * Used by tests. + * Creates empty cluster state file on disk, deleting global metadata and unreferencing all index metadata + * (only used for dangling indices at that point). */ - public void writeIndexAndUpdateManifest(String reason, IndexMetaData metaData) throws IOException { - long generation = writeIndex(reason, metaData); - Manifest manifest = loadManifestOrEmpty(); - Map indices = new HashMap<>(manifest.getIndexGenerations()); - indices.put(metaData.getIndex(), generation); - manifest = new Manifest(manifest.getCurrentTerm(), manifest.getClusterStateVersion(), manifest.getGlobalGeneration(), indices); - writeManifestAndCleanup(reason, manifest); - cleanupIndex(metaData.getIndex(), generation); + public void unreferenceAll() throws IOException { + MANIFEST_FORMAT.writeAndCleanup(Manifest.empty(), nodeEnv.nodeDataPaths()); // write empty file so that indices become unreferenced + META_DATA_FORMAT.cleanupOldFiles(Long.MAX_VALUE, nodeEnv.nodeDataPaths()); } /** - * Writes global metadata and updates manifest file accordingly. - * Used by tests. + * Removes manifest file, global metadata and all index metadata */ - public void writeGlobalStateAndUpdateManifest(String reason, MetaData metaData) throws IOException { - long generation = writeGlobalState(reason, metaData); - Manifest manifest = loadManifestOrEmpty(); - manifest = new Manifest(manifest.getCurrentTerm(), manifest.getClusterStateVersion(), generation, manifest.getIndexGenerations()); - writeManifestAndCleanup(reason, manifest); - cleanupGlobalState(generation); + public void deleteAll() throws IOException { + // To ensure that the metadata is never reimported by loadFullStateBWC in case where the deletions here fail mid-way through, + // we first write an empty manifest file so that the indices become unreferenced, then clean up the indices, and only then delete + // the manifest file. + unreferenceAll(); + for (String indexFolderName : nodeEnv.availableIndexFolders()) { + // delete meta state directories of indices + MetaDataStateFormat.deleteMetaState(nodeEnv.resolveIndexFolder(indexFolderName)); + } + MANIFEST_FORMAT.cleanupOldFiles(Long.MAX_VALUE, nodeEnv.nodeDataPaths()); // finally delete manifest } } diff --git a/server/src/main/java/org/elasticsearch/gateway/PersistedClusterStateService.java b/server/src/main/java/org/elasticsearch/gateway/PersistedClusterStateService.java new file mode 100644 index 0000000000000..ec064550d4dca --- /dev/null +++ b/server/src/main/java/org/elasticsearch/gateway/PersistedClusterStateService.java @@ -0,0 +1,770 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.gateway; + +import com.carrotsearch.hppc.cursors.ObjectCursor; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.logging.log4j.message.ParameterizedMessage; +import org.apache.lucene.analysis.core.KeywordAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.StoredField; +import org.apache.lucene.document.StringField; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexNotFoundException; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.SerialMergeScheduler; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.Weight; +import org.apache.lucene.store.AlreadyClosedException; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.SimpleFSDirectory; +import org.apache.lucene.util.Bits; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.SetOnce; +import org.elasticsearch.Version; +import org.elasticsearch.cluster.ClusterState; +import org.elasticsearch.cluster.metadata.IndexMetaData; +import org.elasticsearch.cluster.metadata.MetaData; +import org.elasticsearch.common.CheckedConsumer; +import org.elasticsearch.common.Nullable; +import org.elasticsearch.common.io.stream.ReleasableBytesStreamOutput; +import org.elasticsearch.common.lease.Releasable; +import org.elasticsearch.common.logging.Loggers; +import org.elasticsearch.common.lucene.Lucene; +import org.elasticsearch.common.util.BigArrays; +import org.elasticsearch.common.xcontent.LoggingDeprecationHandler; +import org.elasticsearch.common.xcontent.NamedXContentRegistry; +import org.elasticsearch.common.xcontent.ToXContent; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.common.xcontent.XContentFactory; +import org.elasticsearch.common.xcontent.XContentType; +import org.elasticsearch.core.internal.io.IOUtils; +import org.elasticsearch.env.NodeEnvironment; +import org.elasticsearch.env.NodeMetaData; +import org.elasticsearch.index.Index; + +import java.io.Closeable; +import java.io.FilterOutputStream; +import java.io.IOError; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.function.IntPredicate; + +/** + * Stores cluster metadata in a bare Lucene index (per data path) split across a number of documents. This is used by master-eligible nodes + * to record the last-accepted cluster state during publication. The metadata is written incrementally where possible, leaving alone any + * documents that have not changed. The index has the following fields: + * + * +------------------------------+-----------------------------+----------------------------------------------+ + * | "type" (string field) | "index_uuid" (string field) | "data" (stored binary field in SMILE format) | + * +------------------------------+-----------------------------+----------------------------------------------+ + * | GLOBAL_TYPE_NAME == "global" | (omitted) | Global metadata | + * | INDEX_TYPE_NAME == "index" | Index UUID | Index metadata | + * +------------------------------+-----------------------------+----------------------------------------------+ + * + * Additionally each commit has the following user data: + * + * +---------------------------+-------------------------+-------------------------------------------------------------------------------+ + * | Key symbol | Key literal | Value | + * +---------------------------+-------------------------+-------------------------------------------------------------------------------+ + * | CURRENT_TERM_KEY | "current_term" | Node's "current" term (≥ last-accepted term and the terms of all sent joins) | + * | LAST_ACCEPTED_VERSION_KEY | "last_accepted_version" | The cluster state version corresponding with the persisted metadata | + * | NODE_ID_KEY | "node_id" | The (persistent) ID of the node that wrote this metadata | + * | NODE_VERSION_KEY | "node_version" | The (ID of the) version of the node that wrote this metadata | + * +---------------------------+-------------------------+-------------------------------------------------------------------------------+ + * + * (the last-accepted term is recorded in MetaData → CoordinationMetaData so does not need repeating here) + */ +public class PersistedClusterStateService { + private static final Logger logger = LogManager.getLogger(PersistedClusterStateService.class); + private static final String CURRENT_TERM_KEY = "current_term"; + private static final String LAST_ACCEPTED_VERSION_KEY = "last_accepted_version"; + private static final String NODE_ID_KEY = "node_id"; + private static final String NODE_VERSION_KEY = "node_version"; + private static final String TYPE_FIELD_NAME = "type"; + private static final String DATA_FIELD_NAME = "data"; + private static final String GLOBAL_TYPE_NAME = "global"; + private static final String INDEX_TYPE_NAME = "index"; + private static final String INDEX_UUID_FIELD_NAME = "index_uuid"; + private static final int COMMIT_DATA_SIZE = 4; + + public static final String METADATA_DIRECTORY_NAME = MetaDataStateFormat.STATE_DIR_NAME; + + private final Path[] dataPaths; + private final String nodeId; + private final NamedXContentRegistry namedXContentRegistry; + private final BigArrays bigArrays; + private final boolean preserveUnknownCustoms; + + public PersistedClusterStateService(NodeEnvironment nodeEnvironment, NamedXContentRegistry namedXContentRegistry, BigArrays bigArrays) { + this(nodeEnvironment.nodeDataPaths(), nodeEnvironment.nodeId(), namedXContentRegistry, bigArrays, false); + } + + public PersistedClusterStateService(Path[] dataPaths, String nodeId, NamedXContentRegistry namedXContentRegistry, + BigArrays bigArrays, boolean preserveUnknownCustoms) { + this.dataPaths = dataPaths; + this.nodeId = nodeId; + this.namedXContentRegistry = namedXContentRegistry; + this.bigArrays = bigArrays; + this.preserveUnknownCustoms = preserveUnknownCustoms; + } + + public String getNodeId() { + return nodeId; + } + + /** + * Creates a new disk-based writer for cluster states + */ + public Writer createWriter() throws IOException { + final List metaDataIndexWriters = new ArrayList<>(); + final List closeables = new ArrayList<>(); + boolean success = false; + try { + for (final Path path : dataPaths) { + final Directory directory = createDirectory(path.resolve(METADATA_DIRECTORY_NAME)); + closeables.add(directory); + + final IndexWriter indexWriter = createIndexWriter(directory, false); + closeables.add(indexWriter); + metaDataIndexWriters.add(new MetaDataIndexWriter(directory, indexWriter)); + } + success = true; + } finally { + if (success == false) { + IOUtils.closeWhileHandlingException(closeables); + } + } + return new Writer(metaDataIndexWriters, nodeId, bigArrays); + } + + private static IndexWriter createIndexWriter(Directory directory, boolean openExisting) throws IOException { + final IndexWriterConfig indexWriterConfig = new IndexWriterConfig(new KeywordAnalyzer()); + // start empty since we re-write the whole cluster state to ensure it is all using the same format version + indexWriterConfig.setOpenMode(openExisting ? IndexWriterConfig.OpenMode.APPEND : IndexWriterConfig.OpenMode.CREATE); + // only commit when specifically instructed, we must not write any intermediate states + indexWriterConfig.setCommitOnClose(false); + // most of the data goes into stored fields which are not buffered, so we only really need a tiny buffer + indexWriterConfig.setRAMBufferSizeMB(1.0); + // merge on the write thread (e.g. while flushing) + indexWriterConfig.setMergeScheduler(new SerialMergeScheduler()); + + return new IndexWriter(directory, indexWriterConfig); + } + + /** + * Remove all persisted cluster states from the given data paths, for use in tests. Should only be called when there is no open + * {@link Writer} on these paths. + */ + public static void deleteAll(Path[] dataPaths) throws IOException { + for (Path dataPath : dataPaths) { + Lucene.cleanLuceneIndex(new SimpleFSDirectory(dataPath.resolve(METADATA_DIRECTORY_NAME))); + } + } + + // exposed for tests + Directory createDirectory(Path path) throws IOException { + // it is possible to disable the use of MMapDirectory for indices, and it may be surprising to users that have done so if we still + // use a MMapDirectory here, which might happen with FSDirectory.open(path). Concurrency is of no concern here so a + // SimpleFSDirectory is fine: + return new SimpleFSDirectory(path); + } + + public Path[] getDataPaths() { + return dataPaths; + } + + public static class OnDiskState { + private static final OnDiskState NO_ON_DISK_STATE = new OnDiskState(null, null, 0L, 0L, MetaData.EMPTY_META_DATA); + + private final String nodeId; + private final Path dataPath; + public final long currentTerm; + public final long lastAcceptedVersion; + public final MetaData metaData; + + private OnDiskState(String nodeId, Path dataPath, long currentTerm, long lastAcceptedVersion, MetaData metaData) { + this.nodeId = nodeId; + this.dataPath = dataPath; + this.currentTerm = currentTerm; + this.lastAcceptedVersion = lastAcceptedVersion; + this.metaData = metaData; + } + + public boolean empty() { + return this == NO_ON_DISK_STATE; + } + } + + /** + * Returns the node metadata for the given data paths, and checks if the node ids are unique + * @param dataPaths the data paths to scan + */ + @Nullable + public static NodeMetaData nodeMetaData(Path... dataPaths) throws IOException { + String nodeId = null; + Version version = null; + for (final Path dataPath : dataPaths) { + final Path indexPath = dataPath.resolve(METADATA_DIRECTORY_NAME); + if (Files.exists(indexPath)) { + try (DirectoryReader reader = DirectoryReader.open(new SimpleFSDirectory(dataPath.resolve(METADATA_DIRECTORY_NAME)))) { + final Map userData = reader.getIndexCommit().getUserData(); + assert userData.get(NODE_VERSION_KEY) != null; + + final String thisNodeId = userData.get(NODE_ID_KEY); + assert thisNodeId != null; + if (nodeId != null && nodeId.equals(thisNodeId) == false) { + throw new IllegalStateException("unexpected node ID in metadata, found [" + thisNodeId + + "] in [" + dataPath + "] but expected [" + nodeId + "]"); + } else if (nodeId == null) { + nodeId = thisNodeId; + version = Version.fromId(Integer.parseInt(userData.get(NODE_VERSION_KEY))); + } + } catch (IndexNotFoundException e) { + logger.debug(new ParameterizedMessage("no on-disk state at {}", indexPath), e); + } + } + } + if (nodeId == null) { + return null; + } + return new NodeMetaData(nodeId, version); + } + + /** + * Overrides the version field for the metadata in the given data path + */ + public static void overrideVersion(Version newVersion, Path... dataPaths) throws IOException { + for (final Path dataPath : dataPaths) { + final Path indexPath = dataPath.resolve(METADATA_DIRECTORY_NAME); + if (Files.exists(indexPath)) { + try (DirectoryReader reader = DirectoryReader.open(new SimpleFSDirectory(dataPath.resolve(METADATA_DIRECTORY_NAME)))) { + final Map userData = reader.getIndexCommit().getUserData(); + assert userData.get(NODE_VERSION_KEY) != null; + + try (IndexWriter indexWriter = + createIndexWriter(new SimpleFSDirectory(dataPath.resolve(METADATA_DIRECTORY_NAME)), true)) { + final Map commitData = new HashMap<>(userData); + commitData.put(NODE_VERSION_KEY, Integer.toString(newVersion.id)); + indexWriter.setLiveCommitData(commitData.entrySet()); + indexWriter.commit(); + } + } catch (IndexNotFoundException e) { + logger.debug(new ParameterizedMessage("no on-disk state at {}", indexPath), e); + } + } + } + } + + /** + * Loads the best available on-disk cluster state. Returns {@link OnDiskState#NO_ON_DISK_STATE} if no such state was found. + */ + public OnDiskState loadBestOnDiskState() throws IOException { + String committedClusterUuid = null; + Path committedClusterUuidPath = null; + OnDiskState bestOnDiskState = OnDiskState.NO_ON_DISK_STATE; + OnDiskState maxCurrentTermOnDiskState = bestOnDiskState; + + // We use a write-all-read-one strategy: metadata is written to every data path when accepting it, which means it is mostly + // sufficient to read _any_ copy. "Mostly" sufficient because the user can change the set of data paths when restarting, and may + // add a data path containing a stale copy of the metadata. We deal with this by using the freshest copy we can find. + for (final Path dataPath : dataPaths) { + final Path indexPath = dataPath.resolve(METADATA_DIRECTORY_NAME); + if (Files.exists(indexPath)) { + try (Directory directory = createDirectory(indexPath); + DirectoryReader directoryReader = DirectoryReader.open(directory)) { + final OnDiskState onDiskState = loadOnDiskState(dataPath, directoryReader); + + if (nodeId.equals(onDiskState.nodeId) == false) { + throw new IllegalStateException("unexpected node ID in metadata, found [" + onDiskState.nodeId + + "] in [" + dataPath + "] but expected [" + nodeId + "]"); + } + + if (onDiskState.metaData.clusterUUIDCommitted()) { + if (committedClusterUuid == null) { + committedClusterUuid = onDiskState.metaData.clusterUUID(); + committedClusterUuidPath = dataPath; + } else if (committedClusterUuid.equals(onDiskState.metaData.clusterUUID()) == false) { + throw new IllegalStateException("mismatched cluster UUIDs in metadata, found [" + committedClusterUuid + + "] in [" + committedClusterUuidPath + "] and [" + onDiskState.metaData.clusterUUID() + "] in [" + + dataPath + "]"); + } + } + + if (maxCurrentTermOnDiskState.empty() || maxCurrentTermOnDiskState.currentTerm < onDiskState.currentTerm) { + maxCurrentTermOnDiskState = onDiskState; + } + + long acceptedTerm = onDiskState.metaData.coordinationMetaData().term(); + long maxAcceptedTerm = bestOnDiskState.metaData.coordinationMetaData().term(); + if (bestOnDiskState.empty() + || acceptedTerm > maxAcceptedTerm + || (acceptedTerm == maxAcceptedTerm + && (onDiskState.lastAcceptedVersion > bestOnDiskState.lastAcceptedVersion + || (onDiskState.lastAcceptedVersion == bestOnDiskState.lastAcceptedVersion) + && onDiskState.currentTerm > bestOnDiskState.currentTerm))) { + bestOnDiskState = onDiskState; + } + } catch (IndexNotFoundException e) { + logger.debug(new ParameterizedMessage("no on-disk state at {}", indexPath), e); + } + } + } + + if (bestOnDiskState.currentTerm != maxCurrentTermOnDiskState.currentTerm) { + throw new IllegalStateException("inconsistent terms found: best state is from [" + bestOnDiskState.dataPath + + "] in term [" + bestOnDiskState.currentTerm + "] but there is a stale state in [" + maxCurrentTermOnDiskState.dataPath + + "] with greater term [" + maxCurrentTermOnDiskState.currentTerm + "]"); + } + + return bestOnDiskState; + } + + private OnDiskState loadOnDiskState(Path dataPath, DirectoryReader reader) throws IOException { + final IndexSearcher searcher = new IndexSearcher(reader); + searcher.setQueryCache(null); + + final SetOnce builderReference = new SetOnce<>(); + consumeFromType(searcher, GLOBAL_TYPE_NAME, bytes -> + { + final MetaData metaData = MetaData.Builder.fromXContent(XContentFactory.xContent(XContentType.SMILE) + .createParser(namedXContentRegistry, LoggingDeprecationHandler.INSTANCE, bytes.bytes, bytes.offset, bytes.length), + preserveUnknownCustoms); + logger.trace("found global metadata with last-accepted term [{}]", metaData.coordinationMetaData().term()); + if (builderReference.get() != null) { + throw new IllegalStateException("duplicate global metadata found in [" + dataPath + "]"); + } + builderReference.set(MetaData.builder(metaData)); + }); + + final MetaData.Builder builder = builderReference.get(); + if (builder == null) { + throw new IllegalStateException("no global metadata found in [" + dataPath + "]"); + } + + logger.trace("got global metadata, now reading index metadata"); + + final Set indexUUIDs = new HashSet<>(); + consumeFromType(searcher, INDEX_TYPE_NAME, bytes -> + { + final IndexMetaData indexMetaData = IndexMetaData.fromXContent(XContentFactory.xContent(XContentType.SMILE) + .createParser(namedXContentRegistry, LoggingDeprecationHandler.INSTANCE, bytes.bytes, bytes.offset, bytes.length)); + logger.trace("found index metadata for {}", indexMetaData.getIndex()); + if (indexUUIDs.add(indexMetaData.getIndexUUID()) == false) { + throw new IllegalStateException("duplicate metadata found for " + indexMetaData.getIndex() + " in [" + dataPath + "]"); + } + builder.put(indexMetaData, false); + }); + + final Map userData = reader.getIndexCommit().getUserData(); + logger.trace("loaded metadata [{}] from [{}]", userData, reader.directory()); + assert userData.size() == COMMIT_DATA_SIZE : userData; + assert userData.get(CURRENT_TERM_KEY) != null; + assert userData.get(LAST_ACCEPTED_VERSION_KEY) != null; + assert userData.get(NODE_ID_KEY) != null; + assert userData.get(NODE_VERSION_KEY) != null; + return new OnDiskState(userData.get(NODE_ID_KEY), dataPath, Long.parseLong(userData.get(CURRENT_TERM_KEY)), + Long.parseLong(userData.get(LAST_ACCEPTED_VERSION_KEY)), builder.build()); + } + + private static void consumeFromType(IndexSearcher indexSearcher, String type, + CheckedConsumer bytesRefConsumer) throws IOException { + + final Query query = new TermQuery(new Term(TYPE_FIELD_NAME, type)); + final Weight weight = indexSearcher.createWeight(query, ScoreMode.COMPLETE_NO_SCORES, 0.0f); + logger.trace("running query [{}]", query); + + for (LeafReaderContext leafReaderContext : indexSearcher.getIndexReader().leaves()) { + logger.trace("new leafReaderContext: {}", leafReaderContext); + final Scorer scorer = weight.scorer(leafReaderContext); + if (scorer != null) { + final Bits liveDocs = leafReaderContext.reader().getLiveDocs(); + final IntPredicate isLiveDoc = liveDocs == null ? i -> true : liveDocs::get; + final DocIdSetIterator docIdSetIterator = scorer.iterator(); + while (docIdSetIterator.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { + if (isLiveDoc.test(docIdSetIterator.docID())) { + logger.trace("processing doc {}", docIdSetIterator.docID()); + bytesRefConsumer.accept( + leafReaderContext.reader().document(docIdSetIterator.docID()).getBinaryValue(DATA_FIELD_NAME)); + } + } + } + } + } + + private static final ToXContent.Params FORMAT_PARAMS; + + static { + Map params = new HashMap<>(2); + params.put("binary", "true"); + params.put(MetaData.CONTEXT_MODE_PARAM, MetaData.CONTEXT_MODE_GATEWAY); + FORMAT_PARAMS = new ToXContent.MapParams(params); + } + + /** + * A {@link Document} with a stored field containing serialized metadata written to a {@link ReleasableBytesStreamOutput} which must be + * released when no longer needed. + */ + private static class ReleasableDocument implements Releasable { + private final Document document; + private final Releasable releasable; + + ReleasableDocument(Document document, Releasable releasable) { + this.document = document; + this.releasable = releasable; + } + + Document getDocument() { + return document; + } + + @Override + public void close() { + releasable.close(); + } + } + + /** + * Encapsulates a single {@link IndexWriter} with its {@link Directory} for ease of closing, and a {@link Logger}. There is one of these + * for each data path. + */ + private static class MetaDataIndexWriter implements Closeable { + + private final Logger logger; + private final Directory directory; + private final IndexWriter indexWriter; + + MetaDataIndexWriter(Directory directory, IndexWriter indexWriter) { + this.directory = directory; + this.indexWriter = indexWriter; + this.logger = Loggers.getLogger(MetaDataIndexWriter.class, directory.toString()); + } + + void deleteAll() throws IOException { + this.logger.trace("clearing existing metadata"); + this.indexWriter.deleteAll(); + } + + void updateIndexMetaDataDocument(Document indexMetaDataDocument, Index index) throws IOException { + this.logger.trace("updating metadata for [{}]", index); + indexWriter.updateDocument(new Term(INDEX_UUID_FIELD_NAME, index.getUUID()), indexMetaDataDocument); + } + + void updateGlobalMetaData(Document globalMetaDataDocument) throws IOException { + this.logger.trace("updating global metadata doc"); + indexWriter.updateDocument(new Term(TYPE_FIELD_NAME, GLOBAL_TYPE_NAME), globalMetaDataDocument); + } + + void deleteIndexMetaData(String indexUUID) throws IOException { + this.logger.trace("removing metadata for [{}]", indexUUID); + indexWriter.deleteDocuments(new Term(INDEX_UUID_FIELD_NAME, indexUUID)); + } + + void flush() throws IOException { + this.logger.trace("flushing"); + this.indexWriter.flush(); + } + + void prepareCommit(String nodeId, long currentTerm, long lastAcceptedVersion) throws IOException { + final Map commitData = new HashMap<>(COMMIT_DATA_SIZE); + commitData.put(CURRENT_TERM_KEY, Long.toString(currentTerm)); + commitData.put(LAST_ACCEPTED_VERSION_KEY, Long.toString(lastAcceptedVersion)); + commitData.put(NODE_VERSION_KEY, Integer.toString(Version.CURRENT.id)); + commitData.put(NODE_ID_KEY, nodeId); + indexWriter.setLiveCommitData(commitData.entrySet()); + indexWriter.prepareCommit(); + } + + void commit() throws IOException { + indexWriter.commit(); + } + + @Override + public void close() throws IOException { + IOUtils.close(indexWriter, directory); + } + } + + public static class Writer implements Closeable { + + private final List metaDataIndexWriters; + private final String nodeId; + private final BigArrays bigArrays; + + boolean fullStateWritten = false; + private final AtomicBoolean closed = new AtomicBoolean(); + + private Writer(List metaDataIndexWriters, String nodeId, BigArrays bigArrays) { + this.metaDataIndexWriters = metaDataIndexWriters; + this.nodeId = nodeId; + this.bigArrays = bigArrays; + } + + private void ensureOpen() { + if (closed.get()) { + throw new AlreadyClosedException("cluster state writer is closed already"); + } + } + + public boolean isOpen() { + return closed.get() == false; + } + + private void closeIfAnyIndexWriterHasTragedyOrIsClosed() { + if (metaDataIndexWriters.stream().map(writer -> writer.indexWriter) + .anyMatch(iw -> iw.getTragicException() != null || iw.isOpen() == false)) { + try { + close(); + } catch (Exception e) { + logger.warn("failed on closing cluster state writer", e); + } + } + } + + /** + * Overrides and commits the given current term and cluster state + */ + public void writeFullStateAndCommit(long currentTerm, ClusterState clusterState) throws IOException { + ensureOpen(); + try { + overwriteMetaData(clusterState.metaData()); + commit(currentTerm, clusterState.version()); + fullStateWritten = true; + } finally { + closeIfAnyIndexWriterHasTragedyOrIsClosed(); + } + } + + /** + * Updates and commits the given cluster state update + */ + void writeIncrementalStateAndCommit(long currentTerm, ClusterState previousClusterState, + ClusterState clusterState) throws IOException { + ensureOpen(); + assert fullStateWritten : "Need to write full state first before doing incremental writes"; + try { + updateMetaData(previousClusterState.metaData(), clusterState.metaData()); + commit(currentTerm, clusterState.version()); + } finally { + closeIfAnyIndexWriterHasTragedyOrIsClosed(); + } + } + + /** + * Update the persisted metadata to match the given cluster state by removing any stale or unnecessary documents and adding any + * updated documents. + */ + private void updateMetaData(MetaData previouslyWrittenMetaData, MetaData metaData) throws IOException { + assert previouslyWrittenMetaData.coordinationMetaData().term() == metaData.coordinationMetaData().term(); + logger.trace("currentTerm [{}] matches previous currentTerm, writing changes only", + metaData.coordinationMetaData().term()); + + if (MetaData.isGlobalStateEquals(previouslyWrittenMetaData, metaData) == false) { + try (ReleasableDocument globalMetaDataDocument = makeGlobalMetaDataDocument(metaData)) { + for (MetaDataIndexWriter metaDataIndexWriter : metaDataIndexWriters) { + metaDataIndexWriter.updateGlobalMetaData(globalMetaDataDocument.getDocument()); + } + } + } + + final Map indexMetaDataVersionByUUID = new HashMap<>(previouslyWrittenMetaData.indices().size()); + for (ObjectCursor cursor : previouslyWrittenMetaData.indices().values()) { + final IndexMetaData indexMetaData = cursor.value; + final Long previousValue = indexMetaDataVersionByUUID.putIfAbsent(indexMetaData.getIndexUUID(), indexMetaData.getVersion()); + assert previousValue == null : indexMetaData.getIndexUUID() + " already mapped to " + previousValue; + } + + for (ObjectCursor cursor : metaData.indices().values()) { + final IndexMetaData indexMetaData = cursor.value; + final Long previousVersion = indexMetaDataVersionByUUID.get(indexMetaData.getIndexUUID()); + if (previousVersion == null || indexMetaData.getVersion() != previousVersion) { + logger.trace("updating metadata for [{}], changing version from [{}] to [{}]", + indexMetaData.getIndex(), previousVersion, indexMetaData.getVersion()); + try (ReleasableDocument indexMetaDataDocument = makeIndexMetaDataDocument(indexMetaData)) { + for (MetaDataIndexWriter metaDataIndexWriter : metaDataIndexWriters) { + metaDataIndexWriter.updateIndexMetaDataDocument(indexMetaDataDocument.getDocument(), indexMetaData.getIndex()); + } + } + } else { + logger.trace("no action required for [{}]", indexMetaData.getIndex()); + } + indexMetaDataVersionByUUID.remove(indexMetaData.getIndexUUID()); + } + + for (String removedIndexUUID : indexMetaDataVersionByUUID.keySet()) { + for (MetaDataIndexWriter metaDataIndexWriter : metaDataIndexWriters) { + metaDataIndexWriter.deleteIndexMetaData(removedIndexUUID); + } + } + + // Flush, to try and expose a failure (e.g. out of disk space) before committing, because we can handle a failure here more + // gracefully than one that occurs during the commit process. + for (MetaDataIndexWriter metaDataIndexWriter : metaDataIndexWriters) { + metaDataIndexWriter.flush(); + } + } + + /** + * Update the persisted metadata to match the given cluster state by removing all existing documents and then adding new documents. + */ + private void overwriteMetaData(MetaData metaData) throws IOException { + for (MetaDataIndexWriter metaDataIndexWriter : metaDataIndexWriters) { + metaDataIndexWriter.deleteAll(); + } + addMetaData(metaData); + } + + /** + * Add documents for the metadata of the given cluster state, assuming that there are currently no documents. + */ + private void addMetaData(MetaData metaData) throws IOException { + try (ReleasableDocument globalMetaDataDocument = makeGlobalMetaDataDocument(metaData)) { + for (MetaDataIndexWriter metaDataIndexWriter : metaDataIndexWriters) { + metaDataIndexWriter.updateGlobalMetaData(globalMetaDataDocument.getDocument()); + } + } + + for (ObjectCursor cursor : metaData.indices().values()) { + final IndexMetaData indexMetaData = cursor.value; + try (ReleasableDocument indexMetaDataDocument = makeIndexMetaDataDocument(indexMetaData)) { + for (MetaDataIndexWriter metaDataIndexWriter : metaDataIndexWriters) { + metaDataIndexWriter.updateIndexMetaDataDocument(indexMetaDataDocument.getDocument(), indexMetaData.getIndex()); + } + } + } + + // Flush, to try and expose a failure (e.g. out of disk space) before committing, because we can handle a failure here more + // gracefully than one that occurs during the commit process. + for (MetaDataIndexWriter metaDataIndexWriter : metaDataIndexWriters) { + metaDataIndexWriter.flush(); + } + } + + public void commit(long currentTerm, long lastAcceptedVersion) throws IOException { + ensureOpen(); + try { + for (MetaDataIndexWriter metaDataIndexWriter : metaDataIndexWriters) { + metaDataIndexWriter.prepareCommit(nodeId, currentTerm, lastAcceptedVersion); + } + } catch (Exception e) { + try { + close(); + } catch (Exception e2) { + logger.warn("failed on closing cluster state writer", e2); + e.addSuppressed(e2); + } + throw e; + } finally { + closeIfAnyIndexWriterHasTragedyOrIsClosed(); + } + try { + for (MetaDataIndexWriter metaDataIndexWriter : metaDataIndexWriters) { + metaDataIndexWriter.commit(); + } + } catch (IOException e) { + // The commit() call has similar semantics to a fsync(): although it's atomic, if it fails then we've no idea whether the + // data on disk is now the old version or the new version, and this is a disaster. It's safest to fail the whole node and + // retry from the beginning. + try { + close(); + } catch (Exception e2) { + e.addSuppressed(e2); + } + throw new IOError(e); + } finally { + closeIfAnyIndexWriterHasTragedyOrIsClosed(); + } + } + + @Override + public void close() throws IOException { + logger.trace("closing PersistedClusterStateService.Writer"); + if (closed.compareAndSet(false, true)) { + IOUtils.close(metaDataIndexWriters); + } + } + + private ReleasableDocument makeIndexMetaDataDocument(IndexMetaData indexMetaData) throws IOException { + final ReleasableDocument indexMetaDataDocument = makeDocument(INDEX_TYPE_NAME, indexMetaData); + boolean success = false; + try { + final String indexUUID = indexMetaData.getIndexUUID(); + assert indexUUID.equals(IndexMetaData.INDEX_UUID_NA_VALUE) == false; + indexMetaDataDocument.getDocument().add(new StringField(INDEX_UUID_FIELD_NAME, indexUUID, Field.Store.NO)); + success = true; + return indexMetaDataDocument; + } finally { + if (success == false) { + IOUtils.closeWhileHandlingException(indexMetaDataDocument); + } + } + } + + private ReleasableDocument makeGlobalMetaDataDocument(MetaData metaData) throws IOException { + return makeDocument(GLOBAL_TYPE_NAME, metaData); + } + + private ReleasableDocument makeDocument(String typeName, ToXContent metaData) throws IOException { + final Document document = new Document(); + document.add(new StringField(TYPE_FIELD_NAME, typeName, Field.Store.NO)); + + boolean success = false; + final ReleasableBytesStreamOutput releasableBytesStreamOutput = new ReleasableBytesStreamOutput(bigArrays); + try { + final FilterOutputStream outputStream = new FilterOutputStream(releasableBytesStreamOutput) { + @Override + public void close() { + // closing the XContentBuilder should not release the bytes yet + } + }; + try (XContentBuilder xContentBuilder = XContentFactory.contentBuilder(XContentType.SMILE, outputStream)) { + xContentBuilder.startObject(); + metaData.toXContent(xContentBuilder, FORMAT_PARAMS); + xContentBuilder.endObject(); + } + document.add(new StoredField(DATA_FIELD_NAME, releasableBytesStreamOutput.bytes().toBytesRef())); + final ReleasableDocument releasableDocument = new ReleasableDocument(document, releasableBytesStreamOutput); + success = true; + return releasableDocument; + } finally { + if (success == false) { + IOUtils.closeWhileHandlingException(releasableBytesStreamOutput); + } + } + } + } +} diff --git a/server/src/main/java/org/elasticsearch/index/IndexService.java b/server/src/main/java/org/elasticsearch/index/IndexService.java index cd7ef1f0b84f8..a09881c0eae40 100644 --- a/server/src/main/java/org/elasticsearch/index/IndexService.java +++ b/server/src/main/java/org/elasticsearch/index/IndexService.java @@ -48,6 +48,8 @@ import org.elasticsearch.env.NodeEnvironment; import org.elasticsearch.env.ShardLock; import org.elasticsearch.env.ShardLockObtainFailedException; +import org.elasticsearch.gateway.MetaDataStateFormat; +import org.elasticsearch.gateway.WriteStateException; import org.elasticsearch.index.analysis.IndexAnalyzers; import org.elasticsearch.index.cache.IndexCache; import org.elasticsearch.index.cache.bitset.BitsetFilterCache; @@ -90,6 +92,7 @@ import java.util.Map; import java.util.Objects; import java.util.Set; +import java.util.concurrent.CopyOnWriteArrayList; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import java.util.function.BooleanSupplier; @@ -325,6 +328,29 @@ public synchronized void close(final String reason, boolean delete) throws IOExc } } + // method is synchronized so that IndexService can't be closed while we're writing out dangling indices information + public synchronized void writeDanglingIndicesInfo() { + if (closed.get()) { + return; + } + try { + IndexMetaData.FORMAT.writeAndCleanup(getMetaData(), nodeEnv.indexPaths(index())); + } catch (WriteStateException e) { + logger.warn(() -> new ParameterizedMessage("failed to write dangling indices state for index {}", index()), e); + } + } + + // method is synchronized so that IndexService can't be closed while we're deleting dangling indices information + public synchronized void deleteDanglingIndicesInfo() { + if (closed.get()) { + return; + } + try { + MetaDataStateFormat.deleteMetaState(nodeEnv.indexPaths(index())); + } catch (IOException e) { + logger.warn(() -> new ParameterizedMessage("failed to delete dangling indices state for index {}", index()), e); + } + } public String indexUUID() { return indexSettings.getUUID(); @@ -671,9 +697,15 @@ public IndexMetaData getMetaData() { return indexSettings.getIndexMetaData(); } + private final CopyOnWriteArrayList> metaDataListeners = new CopyOnWriteArrayList<>(); + + public void addMetaDataListener(Consumer listener) { + metaDataListeners.add(listener); + } + @Override public synchronized void updateMetaData(final IndexMetaData currentIndexMetaData, final IndexMetaData newIndexMetaData) { - final boolean updateIndexMetaData = indexSettings.updateIndexMetaData(newIndexMetaData); + final boolean updateIndexSettings = indexSettings.updateIndexMetaData(newIndexMetaData); if (Assertions.ENABLED && currentIndexMetaData != null @@ -681,16 +713,16 @@ public synchronized void updateMetaData(final IndexMetaData currentIndexMetaData final long currentSettingsVersion = currentIndexMetaData.getSettingsVersion(); final long newSettingsVersion = newIndexMetaData.getSettingsVersion(); if (currentSettingsVersion == newSettingsVersion) { - assert updateIndexMetaData == false; + assert updateIndexSettings == false; } else { - assert updateIndexMetaData; + assert updateIndexSettings; assert currentSettingsVersion < newSettingsVersion : "expected current settings version [" + currentSettingsVersion + "] " + "to be less than new settings version [" + newSettingsVersion + "]"; } } - if (updateIndexMetaData) { + if (updateIndexSettings) { for (final IndexShard shard : this.shards.values()) { try { shard.onSettingsChanged(); @@ -726,6 +758,8 @@ public boolean isForceExecution() { } updateFsyncTaskIfNecessary(); } + + metaDataListeners.forEach(c -> c.accept(newIndexMetaData)); } private void updateFsyncTaskIfNecessary() { diff --git a/server/src/main/java/org/elasticsearch/index/shard/RemoveCorruptedShardDataCommand.java b/server/src/main/java/org/elasticsearch/index/shard/RemoveCorruptedShardDataCommand.java index f04750704074e..46a8c013783ec 100644 --- a/server/src/main/java/org/elasticsearch/index/shard/RemoveCorruptedShardDataCommand.java +++ b/server/src/main/java/org/elasticsearch/index/shard/RemoveCorruptedShardDataCommand.java @@ -21,7 +21,6 @@ import joptsimple.OptionParser; import joptsimple.OptionSet; import joptsimple.OptionSpec; - import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.apache.lucene.index.IndexWriter; @@ -33,9 +32,9 @@ import org.apache.lucene.store.LockObtainFailedException; import org.apache.lucene.store.NativeFSLockFactory; import org.elasticsearch.ElasticsearchException; -import org.elasticsearch.cli.EnvironmentAwareCommand; import org.elasticsearch.cli.Terminal; -import org.elasticsearch.cluster.ClusterModule; +import org.elasticsearch.cluster.ClusterState; +import org.elasticsearch.cluster.coordination.ElasticsearchNodeCommand; import org.elasticsearch.cluster.metadata.IndexMetaData; import org.elasticsearch.cluster.routing.AllocationId; import org.elasticsearch.cluster.routing.allocation.command.AllocateEmptyPrimaryAllocationCommand; @@ -49,11 +48,10 @@ import org.elasticsearch.common.io.PathUtils; import org.elasticsearch.common.lucene.Lucene; import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.common.xcontent.NamedXContentRegistry; import org.elasticsearch.env.Environment; import org.elasticsearch.env.NodeEnvironment; import org.elasticsearch.env.NodeMetaData; -import org.elasticsearch.gateway.MetaDataStateFormat; +import org.elasticsearch.gateway.PersistedClusterStateService; import org.elasticsearch.index.Index; import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.engine.Engine; @@ -65,15 +63,15 @@ import java.io.OutputStream; import java.io.PrintStream; import java.io.PrintWriter; -import java.nio.file.DirectoryStream; import java.nio.file.Files; import java.nio.file.Path; import java.util.Arrays; import java.util.HashMap; import java.util.Map; import java.util.Objects; +import java.util.stream.StreamSupport; -public class RemoveCorruptedShardDataCommand extends EnvironmentAwareCommand { +public class RemoveCorruptedShardDataCommand extends ElasticsearchNodeCommand { private static final Logger logger = LogManager.getLogger(RemoveCorruptedShardDataCommand.class); @@ -84,7 +82,6 @@ public class RemoveCorruptedShardDataCommand extends EnvironmentAwareCommand { private final RemoveCorruptedLuceneSegmentsAction removeCorruptedLuceneSegmentsAction; private final TruncateTranslogAction truncateTranslogAction; - private final NamedXContentRegistry namedXContentRegistry; public RemoveCorruptedShardDataCommand() { super("Removes corrupted shard files"); @@ -102,8 +99,6 @@ public RemoveCorruptedShardDataCommand() { parser.accepts(TRUNCATE_CLEAN_TRANSLOG_FLAG, "Truncate the translog even if it is not corrupt"); - namedXContentRegistry = new NamedXContentRegistry(ClusterModule.getNamedXWriteables()); - removeCorruptedLuceneSegmentsAction = new RemoveCorruptedLuceneSegmentsAction(); truncateTranslogAction = new TruncateTranslogAction(namedXContentRegistry); } @@ -123,11 +118,12 @@ protected Path getPath(String dirValue) { return PathUtils.get(dirValue, "", ""); } - protected void findAndProcessShardPath(OptionSet options, Environment environment, CheckedConsumer consumer) + protected void findAndProcessShardPath(OptionSet options, Environment environment, Path[] dataPaths, int nodeLockId, + ClusterState clusterState, CheckedConsumer consumer) throws IOException { final Settings settings = environment.settings(); - final String indexName; + final IndexMetaData indexMetaData; final int shardId; final int fromNodeId; final int toNodeId; @@ -141,81 +137,56 @@ protected void findAndProcessShardPath(OptionSet options, Environment environmen throw new ElasticsearchException("index directory [" + indexPath + "], must exist and be a directory"); } - final IndexMetaData indexMetaData = - IndexMetaData.FORMAT.loadLatestState(logger, namedXContentRegistry, shardParent); - final String shardIdFileName = path.getFileName().toString(); final String nodeIdFileName = shardParentParent.getParent().getFileName().toString(); + final String indexUUIDFolderName = shardParent.getFileName().toString(); if (Files.isDirectory(path) && shardIdFileName.chars().allMatch(Character::isDigit) // SHARD-ID path element check && NodeEnvironment.INDICES_FOLDER.equals(shardParentParent.getFileName().toString()) // `indices` check && nodeIdFileName.chars().allMatch(Character::isDigit) // NODE-ID check && NodeEnvironment.NODES_FOLDER.equals(shardParentParent.getParent().getParent().getFileName().toString()) // `nodes` check ) { shardId = Integer.parseInt(shardIdFileName); - indexName = indexMetaData.getIndex().getName(); fromNodeId = Integer.parseInt(nodeIdFileName); toNodeId = fromNodeId + 1; + indexMetaData = StreamSupport.stream(clusterState.metaData().indices().values().spliterator(), false) + .map(imd -> imd.value) + .filter(imd -> imd.getIndexUUID().equals(indexUUIDFolderName)).findFirst() + .orElse(null); } else { throw new ElasticsearchException("Unable to resolve shard id. Wrong folder structure at [ " + path.toString() + " ], expected .../nodes/[NODE-ID]/indices/[INDEX-UUID]/[SHARD-ID]"); } } else { // otherwise resolve shardPath based on the index name and shard id - indexName = Objects.requireNonNull(indexNameOption.value(options), "Index name is required"); + String indexName = Objects.requireNonNull(indexNameOption.value(options), "Index name is required"); shardId = Objects.requireNonNull(shardIdOption.value(options), "Shard ID is required"); + indexMetaData = clusterState.metaData().index(indexName); + } - // resolve shard path in case of multi-node layout per environment - fromNodeId = 0; - toNodeId = NodeEnvironment.MAX_LOCAL_STORAGE_NODES_SETTING.get(settings); + if (indexMetaData == null) { + throw new ElasticsearchException("Unable to find index in cluster state"); } - // have to iterate over possibleLockId as NodeEnvironment; on a contrast to it - we have to fail if node is busy - for (int possibleLockId = fromNodeId; possibleLockId < toNodeId; possibleLockId++) { - try { - try (NodeEnvironment.NodeLock nodeLock = new NodeEnvironment.NodeLock(possibleLockId, logger, environment, Files::exists)) { - final NodeEnvironment.NodePath[] nodePaths = nodeLock.getNodePaths(); - for (NodeEnvironment.NodePath nodePath : nodePaths) { - if (Files.exists(nodePath.indicesPath)) { - // have to scan all index uuid folders to resolve from index name - try (DirectoryStream stream = Files.newDirectoryStream(nodePath.indicesPath)) { - for (Path file : stream) { - if (Files.exists(file.resolve(MetaDataStateFormat.STATE_DIR_NAME)) == false) { - continue; - } - - final IndexMetaData indexMetaData = - IndexMetaData.FORMAT.loadLatestState(logger, namedXContentRegistry, file); - if (indexMetaData == null) { - continue; - } - final IndexSettings indexSettings = new IndexSettings(indexMetaData, settings); - final Index index = indexMetaData.getIndex(); - if (indexName.equals(index.getName()) == false) { - continue; - } - final ShardId shId = new ShardId(index, shardId); - - final Path shardPathLocation = nodePath.resolve(shId); - if (Files.exists(shardPathLocation) == false) { - continue; - } - final ShardPath shardPath = ShardPath.loadShardPath(logger, shId, indexSettings.customDataPath(), - new Path[]{shardPathLocation}, possibleLockId, nodePath.path); - if (shardPath != null) { - consumer.accept(shardPath); - return; - } - } - } - } - } + final IndexSettings indexSettings = new IndexSettings(indexMetaData, settings); + final Index index = indexMetaData.getIndex(); + final ShardId shId = new ShardId(index, shardId); + + for (Path dataPath : dataPaths) { + final Path shardPathLocation = dataPath + .resolve(NodeEnvironment.INDICES_FOLDER) + .resolve(index.getUUID()) + .resolve(Integer.toString(shId.id())); + if (Files.exists(shardPathLocation)) { + final ShardPath shardPath = ShardPath.loadShardPath(logger, shId, indexSettings.customDataPath(), + new Path[]{shardPathLocation}, nodeLockId, dataPath); + if (shardPath != null) { + consumer.accept(shardPath); + return; } - } catch (LockObtainFailedException lofe) { - throw new ElasticsearchException("Failed to lock node's directory [" + lofe.getMessage() - + "], is Elasticsearch still running ?"); } } - throw new ElasticsearchException("Unable to resolve shard path for index [" + indexName + "] and shard id [" + shardId + "]"); + throw new ElasticsearchException("Unable to resolve shard path for index [" + indexMetaData.getIndex().getName() + + "] and shard id [" + shardId + "]"); } public static boolean isCorruptMarkerFileIsPresent(final Directory directory) throws IOException { @@ -267,11 +238,9 @@ private static void confirm(String msg, Terminal terminal) { } } - private void warnAboutESShouldBeStopped(Terminal terminal) { + private void warnAboutIndexBackup(Terminal terminal) { terminal.println("-----------------------------------------------------------------------"); terminal.println(""); - terminal.println(" WARNING: Elasticsearch MUST be stopped before running this tool."); - terminal.println(""); terminal.println(" Please make a complete backup of your index before using this tool."); terminal.println(""); terminal.println("-----------------------------------------------------------------------"); @@ -279,10 +248,13 @@ private void warnAboutESShouldBeStopped(Terminal terminal) { // Visible for testing @Override - public void execute(Terminal terminal, OptionSet options, Environment environment) throws Exception { - warnAboutESShouldBeStopped(terminal); + public void processNodePaths(Terminal terminal, Path[] dataPaths, int nodeLockId, OptionSet options, Environment environment) + throws IOException { + warnAboutIndexBackup(terminal); - findAndProcessShardPath(options, environment, shardPath -> { + final ClusterState clusterState = loadTermAndClusterState(createPersistedClusterStateService(dataPaths), environment).v2(); + + findAndProcessShardPath(options, environment, dataPaths, nodeLockId, clusterState, shardPath -> { final Path indexPath = shardPath.resolveIndex(); final Path translogPath = shardPath.resolveTranslog(); final Path nodePath = getNodePath(shardPath); @@ -332,7 +304,7 @@ public void write(int b) { terminal.println("Opening translog at " + translogPath); terminal.println(""); try { - translogCleanStatus = truncateTranslogAction.getCleanStatus(shardPath, indexDir); + translogCleanStatus = truncateTranslogAction.getCleanStatus(shardPath, clusterState, indexDir); } catch (Exception e) { terminal.println(e.getMessage()); throw e; @@ -476,21 +448,17 @@ private void newAllocationId(ShardPath shardPath, Terminal terminal) throws IOEx printRerouteCommand(shardPath, terminal, true); } - private void printRerouteCommand(ShardPath shardPath, Terminal terminal, boolean allocateStale) throws IOException { - final IndexMetaData indexMetaData = - IndexMetaData.FORMAT.loadLatestState(logger, namedXContentRegistry, - shardPath.getDataPath().getParent()); - + private void printRerouteCommand(ShardPath shardPath, Terminal terminal, boolean allocateStale) + throws IOException { final Path nodePath = getNodePath(shardPath); - final NodeMetaData nodeMetaData = - NodeMetaData.FORMAT.loadLatestState(logger, namedXContentRegistry, nodePath); + final NodeMetaData nodeMetaData = PersistedClusterStateService.nodeMetaData(nodePath); if (nodeMetaData == null) { throw new ElasticsearchException("No node meta data at " + nodePath); } final String nodeId = nodeMetaData.nodeId(); - final String index = indexMetaData.getIndex().getName(); + final String index = shardPath.getShardId().getIndexName(); final int id = shardPath.getShardId().id(); final AllocationCommands commands = new AllocationCommands( allocateStale @@ -506,7 +474,8 @@ private void printRerouteCommand(ShardPath shardPath, Terminal terminal, boolean private Path getNodePath(ShardPath shardPath) { final Path nodePath = shardPath.getDataPath().getParent().getParent().getParent(); - if (Files.exists(nodePath) == false || Files.exists(nodePath.resolve(MetaDataStateFormat.STATE_DIR_NAME)) == false) { + if (Files.exists(nodePath) == false || + Files.exists(nodePath.resolve(PersistedClusterStateService.METADATA_DIRECTORY_NAME)) == false) { throw new ElasticsearchException("Unable to resolve node path for " + shardPath); } return nodePath; diff --git a/server/src/main/java/org/elasticsearch/index/translog/TruncateTranslogAction.java b/server/src/main/java/org/elasticsearch/index/translog/TruncateTranslogAction.java index e6581d0359d11..9480ee3c1e1f3 100644 --- a/server/src/main/java/org/elasticsearch/index/translog/TruncateTranslogAction.java +++ b/server/src/main/java/org/elasticsearch/index/translog/TruncateTranslogAction.java @@ -26,6 +26,7 @@ import org.apache.lucene.store.Directory; import org.elasticsearch.ElasticsearchException; import org.elasticsearch.cli.Terminal; +import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.cluster.metadata.IndexMetaData; import org.elasticsearch.common.UUIDs; import org.elasticsearch.common.collect.Tuple; @@ -63,6 +64,7 @@ public TruncateTranslogAction(NamedXContentRegistry namedXContentRegistry) { } public Tuple getCleanStatus(ShardPath shardPath, + ClusterState clusterState, Directory indexDirectory) throws IOException { final Path indexPath = shardPath.resolveIndex(); final Path translogPath = shardPath.resolveTranslog(); @@ -83,7 +85,7 @@ public Tuple getCleanStatus throw new ElasticsearchException("shard must have a valid translog UUID but got: [null]"); } - final boolean clean = isTranslogClean(shardPath, translogUUID); + final boolean clean = isTranslogClean(shardPath, clusterState, translogUUID); if (clean) { return Tuple.tuple(RemoveCorruptedShardDataCommand.CleanStatus.CLEAN, null); @@ -166,13 +168,12 @@ public void execute(Terminal terminal, ShardPath shardPath, Directory indexDirec IOUtils.fsync(translogPath, true); } - private boolean isTranslogClean(ShardPath shardPath, String translogUUID) throws IOException { + private boolean isTranslogClean(ShardPath shardPath, ClusterState clusterState, String translogUUID) throws IOException { // perform clean check of translog instead of corrupted marker file try { final Path translogPath = shardPath.resolveTranslog(); final long translogGlobalCheckpoint = Translog.readGlobalCheckpoint(translogPath, translogUUID); - final IndexMetaData indexMetaData = - IndexMetaData.FORMAT.loadLatestState(logger, namedXContentRegistry, shardPath.getDataPath().getParent()); + final IndexMetaData indexMetaData = clusterState.metaData().getIndexSafe(shardPath.getShardId().getIndex()); final IndexSettings indexSettings = new IndexSettings(indexMetaData, Settings.EMPTY); final TranslogConfig translogConfig = new TranslogConfig(shardPath.getShardId(), translogPath, indexSettings, BigArrays.NON_RECYCLING_INSTANCE); diff --git a/server/src/main/java/org/elasticsearch/indices/IndicesModule.java b/server/src/main/java/org/elasticsearch/indices/IndicesModule.java index 068569c68cf0d..b16ea429add75 100644 --- a/server/src/main/java/org/elasticsearch/indices/IndicesModule.java +++ b/server/src/main/java/org/elasticsearch/indices/IndicesModule.java @@ -104,7 +104,7 @@ public List getNamedWriteables() { return namedWritables; } - public List getNamedXContents() { + public static List getNamedXContents() { return Arrays.asList( new NamedXContentRegistry.Entry(Condition.class, new ParseField(MaxAgeCondition.NAME), (p, c) -> MaxAgeCondition.fromXContent(p)), diff --git a/server/src/main/java/org/elasticsearch/indices/IndicesService.java b/server/src/main/java/org/elasticsearch/indices/IndicesService.java index add5865fda767..b2f5e75d85d76 100644 --- a/server/src/main/java/org/elasticsearch/indices/IndicesService.java +++ b/server/src/main/java/org/elasticsearch/indices/IndicesService.java @@ -41,6 +41,7 @@ import org.elasticsearch.cluster.metadata.IndexMetaData; import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver; import org.elasticsearch.cluster.metadata.MetaData; +import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.routing.RecoverySource; import org.elasticsearch.cluster.routing.ShardRouting; import org.elasticsearch.cluster.service.ClusterService; @@ -64,8 +65,12 @@ import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.common.util.BigArrays; import org.elasticsearch.common.util.concurrent.AbstractRefCounted; +import org.elasticsearch.common.util.concurrent.AbstractRunnable; import org.elasticsearch.common.util.concurrent.EsExecutors; +import org.elasticsearch.common.util.concurrent.EsRejectedExecutionException; +import org.elasticsearch.common.util.concurrent.EsThreadPoolExecutor; import org.elasticsearch.common.util.iterable.Iterables; +import org.elasticsearch.common.util.set.Sets; import org.elasticsearch.common.xcontent.LoggingDeprecationHandler; import org.elasticsearch.common.xcontent.NamedXContentRegistry; import org.elasticsearch.common.xcontent.XContentFactory; @@ -116,6 +121,7 @@ import org.elasticsearch.indices.mapper.MapperRegistry; import org.elasticsearch.indices.recovery.PeerRecoveryTargetService; import org.elasticsearch.indices.recovery.RecoveryState; +import org.elasticsearch.node.Node; import org.elasticsearch.plugins.IndexStorePlugin; import org.elasticsearch.plugins.PluginsService; import org.elasticsearch.repositories.RepositoriesService; @@ -159,6 +165,7 @@ import static java.util.Collections.unmodifiableMap; import static org.elasticsearch.common.collect.MapBuilder.newMapBuilder; import static org.elasticsearch.common.util.CollectionUtils.arrayAsArrayList; +import static org.elasticsearch.common.util.concurrent.EsExecutors.daemonThreadFactory; import static org.elasticsearch.index.IndexService.IndexCreationContext.CREATE_INDEX; import static org.elasticsearch.index.IndexService.IndexCreationContext.META_DATA_VERIFICATION; import static org.elasticsearch.index.query.AbstractQueryBuilder.parseInnerQueryBuilder; @@ -173,6 +180,11 @@ public class IndicesService extends AbstractLifecycleComponent public static final Setting INDICES_ID_FIELD_DATA_ENABLED_SETTING = Setting.boolSetting("indices.id_field_data.enabled", true, Property.Dynamic, Property.NodeScope); + public static final Setting WRITE_DANGLING_INDICES_INFO_SETTING = Setting.boolSetting( + "gateway.write_dangling_indices_info", + true, + Setting.Property.NodeScope + ); /** * The node's settings. @@ -210,6 +222,12 @@ public class IndicesService extends AbstractLifecycleComponent private final CountDownLatch closeLatch = new CountDownLatch(1); private volatile boolean idFieldDataEnabled; + @Nullable + private final EsThreadPoolExecutor danglingIndicesThreadPoolExecutor; + private final Set danglingIndicesToWrite = Sets.newConcurrentHashSet(); + private final boolean nodeWriteDanglingIndicesInfo; + + @Override protected void doStart() { // Start thread that will manage cleaning the field data cache periodically @@ -290,12 +308,25 @@ protected void closeInternal() { } } }; + + final String nodeName = Objects.requireNonNull(Node.NODE_NAME_SETTING.get(settings)); + nodeWriteDanglingIndicesInfo = WRITE_DANGLING_INDICES_INFO_SETTING.get(settings); + danglingIndicesThreadPoolExecutor = nodeWriteDanglingIndicesInfo ? EsExecutors.newScaling( + nodeName + "/" + DANGLING_INDICES_UPDATE_THREAD_NAME, + 1, 1, + 0, TimeUnit.MILLISECONDS, + daemonThreadFactory(nodeName, DANGLING_INDICES_UPDATE_THREAD_NAME), + threadPool.getThreadContext()) : null; } + private static final String DANGLING_INDICES_UPDATE_THREAD_NAME = "DanglingIndices#updateTask"; + @Override protected void doStop() { + ThreadPool.terminate(danglingIndicesThreadPoolExecutor, 10, TimeUnit.SECONDS); + ExecutorService indicesStopExecutor = - Executors.newFixedThreadPool(5, EsExecutors.daemonThreadFactory(settings, "indices_shutdown")); + Executors.newFixedThreadPool(5, daemonThreadFactory(settings, "indices_shutdown")); // Copy indices because we modify it asynchronously in the body of the loop final Set indices = this.indices.values().stream().map(s -> s.index()).collect(Collectors.toSet()); @@ -456,6 +487,7 @@ public boolean hasIndex(Index index) { public IndexService indexService(Index index) { return indices.get(index.getUUID()); } + /** * Returns an IndexService for the specified index if exists otherwise a {@link IndexNotFoundException} is thrown. */ @@ -479,7 +511,8 @@ public IndexService indexServiceSafe(Index index) { */ @Override public synchronized IndexService createIndex( - final IndexMetaData indexMetaData, final List builtInListeners) throws IOException { + final IndexMetaData indexMetaData, final List builtInListeners, + final boolean writeDanglingIndices) throws IOException { ensureChangesAllowed(); if (indexMetaData.getIndexUUID().equals(IndexMetaData.INDEX_UUID_NA_VALUE)) { throw new IllegalArgumentException("index must have a real UUID found value: [" + indexMetaData.getIndexUUID() + "]"); @@ -515,8 +548,18 @@ public void onStoreClosed(ShardId shardId) { indexingMemoryController); boolean success = false; try { + if (writeDanglingIndices && nodeWriteDanglingIndicesInfo) { + indexService.addMetaDataListener(imd -> updateDanglingIndicesInfo(index)); + } indexService.getIndexEventListener().afterIndexCreated(indexService); indices = newMapBuilder(indices).put(index.getUUID(), indexService).immutableMap(); + if (writeDanglingIndices) { + if (nodeWriteDanglingIndicesInfo) { + updateDanglingIndicesInfo(index); + } else { + indexService.deleteDanglingIndicesInfo(); + } + } success = true; return indexService; } finally { @@ -763,7 +806,7 @@ public void deleteUnassignedIndex(String reason, IndexMetaData metaData, Cluster throw new IllegalStateException("Can't delete unassigned index store for [" + indexName + "] - it's still part of " + "the cluster state [" + index.getIndexUUID() + "] [" + metaData.getIndexUUID() + "]"); } - deleteIndexStore(reason, metaData, clusterState); + deleteIndexStore(reason, metaData); } catch (Exception e) { logger.warn(() -> new ParameterizedMessage("[{}] failed to delete unassigned index (reason [{}])", metaData.getIndex(), reason), e); @@ -777,7 +820,7 @@ public void deleteUnassignedIndex(String reason, IndexMetaData metaData, Cluster * * Package private for testing */ - void deleteIndexStore(String reason, IndexMetaData metaData, ClusterState clusterState) throws IOException { + void deleteIndexStore(String reason, IndexMetaData metaData) throws IOException { if (nodeEnv.hasNodeFile()) { synchronized (this) { Index index = metaData.getIndex(); @@ -786,15 +829,6 @@ void deleteIndexStore(String reason, IndexMetaData metaData, ClusterState cluste throw new IllegalStateException("Can't delete index store for [" + index.getName() + "] - it's still part of the indices service [" + localUUid + "] [" + metaData.getIndexUUID() + "]"); } - - if (clusterState.metaData().hasIndex(index.getName()) && (clusterState.nodes().getLocalNode().isMasterNode() == true)) { - // we do not delete the store if it is a master eligible node and the index is still in the cluster state - // because we want to keep the meta data for indices around even if no shards are left here - final IndexMetaData idxMeta = clusterState.metaData().index(index.getName()); - throw new IllegalStateException("Can't delete index store for [" + index.getName() + "] - it's still part of the " + - "cluster state [" + idxMeta.getIndexUUID() + "] [" + metaData.getIndexUUID() + "], " + - "we are master eligible, so will keep the index metadata even if no shards are left."); - } } final IndexSettings indexSettings = buildIndexSettings(metaData); deleteIndexStore(reason, indexSettings.getIndex(), indexSettings); @@ -872,13 +906,11 @@ public void deleteShardStore(String reason, ShardId shardId, ClusterState cluste nodeEnv.deleteShardDirectorySafe(shardId, indexSettings); logger.debug("{} deleted shard reason [{}]", shardId, reason); - // master nodes keep the index meta data, even if having no shards.. - if (clusterState.nodes().getLocalNode().isMasterNode() == false && - canDeleteIndexContents(shardId.getIndex(), indexSettings)) { + if (canDeleteIndexContents(shardId.getIndex(), indexSettings)) { if (nodeEnv.findAllShardIds(shardId.getIndex()).isEmpty()) { try { // note that deleteIndexStore have more safety checks and may throw an exception if index was concurrently created. - deleteIndexStore("no longer used", metaData, clusterState); + deleteIndexStore("no longer used", metaData); } catch (Exception e) { // wrap the exception to indicate we already deleted the shard throw new ElasticsearchException("failed to delete unused index after deleting its last shard (" + shardId + ")", e); @@ -1500,4 +1532,51 @@ public static Optional checkShardLimit(int newShards, ClusterState state } return Optional.empty(); } + + private void updateDanglingIndicesInfo(Index index) { + assert DiscoveryNode.isDataNode(settings) : "dangling indices information should only be persisted on data nodes"; + assert nodeWriteDanglingIndicesInfo : "writing dangling indices info is not enabled"; + assert danglingIndicesThreadPoolExecutor != null : "executor for dangling indices info is not available"; + if (danglingIndicesToWrite.add(index)) { + logger.trace("triggered dangling indices update for {}", index); + final long triggeredTimeMillis = threadPool.relativeTimeInMillis(); + try { + danglingIndicesThreadPoolExecutor.execute(new AbstractRunnable() { + @Override + public void onFailure(Exception e) { + logger.warn(() -> new ParameterizedMessage("failed to write dangling indices state for index {}", index), e); + } + + @Override + protected void doRun() { + final boolean exists = danglingIndicesToWrite.remove(index); + assert exists : "removed non-existing item for " + index; + final IndexService indexService = indices.get(index.getUUID()); + if (indexService != null) { + final long executedTimeMillis = threadPool.relativeTimeInMillis(); + logger.trace("writing out dangling indices state for index {}, triggered {} ago", index, + TimeValue.timeValueMillis(Math.min(0L, executedTimeMillis - triggeredTimeMillis))); + indexService.writeDanglingIndicesInfo(); + final long completedTimeMillis = threadPool.relativeTimeInMillis(); + logger.trace("writing out of dangling indices state for index {} completed after {}", index, + TimeValue.timeValueMillis(Math.min(0L, completedTimeMillis - executedTimeMillis))); + } else { + logger.trace("omit writing dangling indices state for index {} as index is deallocated on this node", index); + } + } + }); + } catch (EsRejectedExecutionException e) { + // ignore cases where we are shutting down..., there is really nothing interesting to be done here... + assert danglingIndicesThreadPoolExecutor.isShutdown(); + } + } else { + logger.trace("dangling indices update already pending for {}", index); + } + } + + // visible for testing + public boolean allPendingDanglingIndicesWritten() { + return nodeWriteDanglingIndicesInfo == false || + (danglingIndicesToWrite.isEmpty() && danglingIndicesThreadPoolExecutor.getActiveCount() == 0); + } } diff --git a/server/src/main/java/org/elasticsearch/indices/cluster/IndicesClusterStateService.java b/server/src/main/java/org/elasticsearch/indices/cluster/IndicesClusterStateService.java index 5e8093b23942b..9eb318f6817d9 100644 --- a/server/src/main/java/org/elasticsearch/indices/cluster/IndicesClusterStateService.java +++ b/server/src/main/java/org/elasticsearch/indices/cluster/IndicesClusterStateService.java @@ -481,7 +481,7 @@ private void createIndices(final ClusterState state) { AllocatedIndex indexService = null; try { - indexService = indicesService.createIndex(indexMetaData, buildInIndexListener); + indexService = indicesService.createIndex(indexMetaData, buildInIndexListener, true); if (indexService.updateMapping(null, indexMetaData) && sendRefreshMapping) { nodeMappingRefreshAction.nodeMappingRefresh(state.nodes().getMasterNode(), new NodeMappingRefreshAction.NodeMappingRefreshRequest(indexMetaData.getIndex().getName(), @@ -845,10 +845,12 @@ public interface AllocatedIndices> * @param indexMetaData the index metadata to create the index for * @param builtInIndexListener a list of built-in lifecycle {@link IndexEventListener} that should should be used along side with * the per-index listeners + * @param writeDanglingIndices whether dangling indices information should be written * @throws ResourceAlreadyExistsException if the index already exists. */ U createIndex(IndexMetaData indexMetaData, - List builtInIndexListener) throws IOException; + List builtInIndexListener, + boolean writeDanglingIndices) throws IOException; /** * Verify that the contents on disk for the given index is deleted; if not, delete the contents. diff --git a/server/src/main/java/org/elasticsearch/node/Node.java b/server/src/main/java/org/elasticsearch/node/Node.java index 9f53069640f1d..41806845e56e2 100644 --- a/server/src/main/java/org/elasticsearch/node/Node.java +++ b/server/src/main/java/org/elasticsearch/node/Node.java @@ -23,9 +23,11 @@ import org.apache.logging.log4j.Logger; import org.apache.lucene.util.Constants; import org.apache.lucene.util.SetOnce; +import org.elasticsearch.Assertions; import org.elasticsearch.Build; import org.elasticsearch.ElasticsearchException; import org.elasticsearch.ElasticsearchTimeoutException; +import org.elasticsearch.Version; import org.elasticsearch.action.ActionModule; import org.elasticsearch.action.ActionType; import org.elasticsearch.action.admin.cluster.snapshots.status.TransportNodesSnapshotsStatus; @@ -92,10 +94,12 @@ import org.elasticsearch.discovery.DiscoverySettings; import org.elasticsearch.env.Environment; import org.elasticsearch.env.NodeEnvironment; +import org.elasticsearch.env.NodeMetaData; import org.elasticsearch.gateway.GatewayAllocator; import org.elasticsearch.gateway.GatewayMetaState; import org.elasticsearch.gateway.GatewayModule; import org.elasticsearch.gateway.GatewayService; +import org.elasticsearch.gateway.PersistedClusterStateService; import org.elasticsearch.gateway.MetaStateService; import org.elasticsearch.http.HttpServerTransport; import org.elasticsearch.index.IndexSettings; @@ -401,13 +405,15 @@ protected Node( final NamedWriteableRegistry namedWriteableRegistry = new NamedWriteableRegistry(namedWriteables); NamedXContentRegistry xContentRegistry = new NamedXContentRegistry(Stream.of( NetworkModule.getNamedXContents().stream(), - indicesModule.getNamedXContents().stream(), + IndicesModule.getNamedXContents().stream(), searchModule.getNamedXContents().stream(), pluginsService.filterPlugins(Plugin.class).stream() .flatMap(p -> p.getNamedXContent().stream()), ClusterModule.getNamedXWriteables().stream()) .flatMap(Function.identity()).collect(toList())); final MetaStateService metaStateService = new MetaStateService(nodeEnvironment, xContentRegistry); + final PersistedClusterStateService lucenePersistedStateFactory + = new PersistedClusterStateService(nodeEnvironment, xContentRegistry, bigArrays); // collect engine factory providers from server and from plugins final Collection enginePlugins = pluginsService.filterPlugins(EnginePlugin.class); @@ -547,6 +553,7 @@ protected Node( b.bind(NamedWriteableRegistry.class).toInstance(namedWriteableRegistry); b.bind(MetaDataUpgrader.class).toInstance(metaDataUpgrader); b.bind(MetaStateService.class).toInstance(metaStateService); + b.bind(PersistedClusterStateService.class).toInstance(lucenePersistedStateFactory); b.bind(IndicesService.class).toInstance(indicesService); b.bind(AliasValidator.class).toInstance(aliasValidator); b.bind(MetaDataCreateIndexService.class).toInstance(metaDataCreateIndexService); @@ -695,7 +702,23 @@ public Node start() throws NodeValidationException { // Load (and maybe upgrade) the metadata stored on disk final GatewayMetaState gatewayMetaState = injector.getInstance(GatewayMetaState.class); gatewayMetaState.start(settings(), transportService, clusterService, injector.getInstance(MetaStateService.class), - injector.getInstance(MetaDataIndexUpgradeService.class), injector.getInstance(MetaDataUpgrader.class)); + injector.getInstance(MetaDataIndexUpgradeService.class), injector.getInstance(MetaDataUpgrader.class), + injector.getInstance(PersistedClusterStateService.class)); + if (Assertions.ENABLED) { + try { + if (DiscoveryModule.DISCOVERY_TYPE_SETTING.get(environment.settings()).equals( + DiscoveryModule.ZEN_DISCOVERY_TYPE) == false) { + assert injector.getInstance(MetaStateService.class).loadFullState().v1().isEmpty(); + } + final NodeMetaData nodeMetaData = NodeMetaData.FORMAT.loadLatestState(logger, NamedXContentRegistry.EMPTY, + nodeEnvironment.nodeDataPaths()); + assert nodeMetaData != null; + assert nodeMetaData.nodeVersion().equals(Version.CURRENT); + assert nodeMetaData.nodeId().equals(localNodeFactory.getNode().getId()); + } catch (IOException e) { + assert false : e; + } + } // we load the global state here (the persistent part of the cluster state stored on disk) to // pass it to the bootstrap checks to allow plugins to enforce certain preconditions based on the recovered state. final MetaData onDiskMetadata = gatewayMetaState.getPersistedState().getLastAcceptedState().metaData(); @@ -867,8 +890,11 @@ public synchronized void close() throws IOException { // Don't call shutdownNow here, it might break ongoing operations on Lucene indices. // See https://issues.apache.org/jira/browse/LUCENE-7248. We call shutdownNow in // awaitClose if the node doesn't finish closing within the specified time. - toClose.add(() -> stopWatch.stop().start("node_environment")); + toClose.add(() -> stopWatch.stop().start("gateway_meta_state")); + toClose.add(injector.getInstance(GatewayMetaState.class)); + + toClose.add(() -> stopWatch.stop().start("node_environment")); toClose.add(injector.getInstance(NodeEnvironment.class)); toClose.add(stopWatch::stop); diff --git a/server/src/test/java/org/elasticsearch/cluster/coordination/CoordinatorTests.java b/server/src/test/java/org/elasticsearch/cluster/coordination/CoordinatorTests.java index 1735c8ba033ea..cbd27a25035bc 100644 --- a/server/src/test/java/org/elasticsearch/cluster/coordination/CoordinatorTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/coordination/CoordinatorTests.java @@ -992,17 +992,18 @@ public void testCannotJoinClusterWithDifferentUUID() throws IllegalAccessExcepti cluster1.runRandomly(); cluster1.stabilise(); - final ClusterNode newNode; + final ClusterNode nodeInOtherCluster; try (Cluster cluster2 = new Cluster(3)) { cluster2.runRandomly(); cluster2.stabilise(); - final ClusterNode nodeInOtherCluster = randomFrom(cluster2.clusterNodes); - newNode = cluster1.new ClusterNode(nextNodeIndex.getAndIncrement(), - nodeInOtherCluster.getLocalNode(), n -> cluster1.new MockPersistedState(n, nodeInOtherCluster.persistedState, - Function.identity(), Function.identity()), nodeInOtherCluster.nodeSettings); + nodeInOtherCluster = randomFrom(cluster2.clusterNodes); } + final ClusterNode newNode = cluster1.new ClusterNode(nextNodeIndex.getAndIncrement(), + nodeInOtherCluster.getLocalNode(), n -> cluster1.new MockPersistedState(n, nodeInOtherCluster.persistedState, + Function.identity(), Function.identity()), nodeInOtherCluster.nodeSettings); + cluster1.clusterNodes.add(newNode); MockLogAppender mockAppender = new MockLogAppender(); diff --git a/server/src/test/java/org/elasticsearch/cluster/coordination/RemoveSettingsCommandIT.java b/server/src/test/java/org/elasticsearch/cluster/coordination/RemoveSettingsCommandIT.java new file mode 100644 index 0000000000000..b3f37a9b17a7a --- /dev/null +++ b/server/src/test/java/org/elasticsearch/cluster/coordination/RemoveSettingsCommandIT.java @@ -0,0 +1,135 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.cluster.coordination; + +import joptsimple.OptionSet; +import org.elasticsearch.ElasticsearchException; +import org.elasticsearch.cli.MockTerminal; +import org.elasticsearch.cli.UserException; +import org.elasticsearch.cluster.routing.allocation.DiskThresholdSettings; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.env.Environment; +import org.elasticsearch.env.TestEnvironment; +import org.elasticsearch.test.ESIntegTestCase; + +import static org.hamcrest.Matchers.contains; +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.not; + +@ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST, numDataNodes = 0, autoManageMasterNodes = false) +public class RemoveSettingsCommandIT extends ESIntegTestCase { + + public void testRemoveSettingsAbortedByUser() throws Exception { + internalCluster().setBootstrapMasterNodeIndex(0); + String node = internalCluster().startNode(); + client().admin().cluster().prepareUpdateSettings().setPersistentSettings(Settings.builder() + .put(DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_DISK_THRESHOLD_ENABLED_SETTING.getKey(), false).build()).get(); + Settings dataPathSettings = internalCluster().dataPathSettings(node); + ensureStableCluster(1); + internalCluster().stopRandomDataNode(); + + Environment environment = TestEnvironment.newEnvironment( + Settings.builder().put(internalCluster().getDefaultSettings()).put(dataPathSettings).build()); + expectThrows(() -> removeSettings(environment, true, + new String[]{ DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_DISK_THRESHOLD_ENABLED_SETTING.getKey() }), + ElasticsearchNodeCommand.ABORTED_BY_USER_MSG); + } + + public void testRemoveSettingsSuccessful() throws Exception { + internalCluster().setBootstrapMasterNodeIndex(0); + String node = internalCluster().startNode(); + client().admin().cluster().prepareUpdateSettings().setPersistentSettings(Settings.builder() + .put(DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_DISK_THRESHOLD_ENABLED_SETTING.getKey(), false).build()).get(); + assertThat(client().admin().cluster().prepareState().get().getState().metaData().persistentSettings().keySet(), + contains(DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_DISK_THRESHOLD_ENABLED_SETTING.getKey())); + Settings dataPathSettings = internalCluster().dataPathSettings(node); + ensureStableCluster(1); + internalCluster().stopRandomDataNode(); + + Environment environment = TestEnvironment.newEnvironment( + Settings.builder().put(internalCluster().getDefaultSettings()).put(dataPathSettings).build()); + MockTerminal terminal = removeSettings(environment, false, + randomBoolean() ? + new String[]{ DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_DISK_THRESHOLD_ENABLED_SETTING.getKey() } : + new String[]{ "cluster.routing.allocation.disk.*" } + ); + assertThat(terminal.getOutput(), containsString(RemoveSettingsCommand.SETTINGS_REMOVED_MSG)); + assertThat(terminal.getOutput(), containsString("The following settings will be removed:")); + assertThat(terminal.getOutput(), containsString( + DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_DISK_THRESHOLD_ENABLED_SETTING.getKey() + ": " + false)); + + internalCluster().startNode(dataPathSettings); + assertThat(client().admin().cluster().prepareState().get().getState().metaData().persistentSettings().keySet(), + not(contains(DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_DISK_THRESHOLD_ENABLED_SETTING.getKey()))); + } + + public void testSettingDoesNotMatch() throws Exception { + internalCluster().setBootstrapMasterNodeIndex(0); + String node = internalCluster().startNode(); + client().admin().cluster().prepareUpdateSettings().setPersistentSettings(Settings.builder() + .put(DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_DISK_THRESHOLD_ENABLED_SETTING.getKey(), false).build()).get(); + assertThat(client().admin().cluster().prepareState().get().getState().metaData().persistentSettings().keySet(), + contains(DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_DISK_THRESHOLD_ENABLED_SETTING.getKey())); + Settings dataPathSettings = internalCluster().dataPathSettings(node); + ensureStableCluster(1); + internalCluster().stopRandomDataNode(); + + Environment environment = TestEnvironment.newEnvironment( + Settings.builder().put(internalCluster().getDefaultSettings()).put(dataPathSettings).build()); + UserException ex = expectThrows(UserException.class, () -> removeSettings(environment, false, + new String[]{ "cluster.routing.allocation.disk.bla.*" })); + assertThat(ex.getMessage(), containsString("No persistent cluster settings matching [cluster.routing.allocation.disk.bla.*] were " + + "found on this node")); + } + + private MockTerminal executeCommand(ElasticsearchNodeCommand command, Environment environment, boolean abort, String... args) + throws Exception { + final MockTerminal terminal = new MockTerminal(); + final OptionSet options = command.getParser().parse(args); + final String input; + + if (abort) { + input = randomValueOtherThanMany(c -> c.equalsIgnoreCase("y"), () -> randomAlphaOfLength(1)); + } else { + input = randomBoolean() ? "y" : "Y"; + } + + terminal.addTextInput(input); + + try { + command.execute(terminal, options, environment); + } finally { + assertThat(terminal.getOutput(), containsString(ElasticsearchNodeCommand.STOP_WARNING_MSG)); + } + + return terminal; + } + + private MockTerminal removeSettings(Environment environment, boolean abort, String... args) throws Exception { + final MockTerminal terminal = executeCommand(new RemoveSettingsCommand(), environment, abort, args); + assertThat(terminal.getOutput(), containsString(RemoveSettingsCommand.CONFIRMATION_MSG)); + assertThat(terminal.getOutput(), containsString(RemoveSettingsCommand.SETTINGS_REMOVED_MSG)); + return terminal; + } + + private void expectThrows(ThrowingRunnable runnable, String message) { + ElasticsearchException ex = expectThrows(ElasticsearchException.class, runnable); + assertThat(ex.getMessage(), containsString(message)); + } +} diff --git a/server/src/test/java/org/elasticsearch/cluster/coordination/UnsafeBootstrapAndDetachCommandIT.java b/server/src/test/java/org/elasticsearch/cluster/coordination/UnsafeBootstrapAndDetachCommandIT.java index de7d9e80a1b7f..af62445d1a495 100644 --- a/server/src/test/java/org/elasticsearch/cluster/coordination/UnsafeBootstrapAndDetachCommandIT.java +++ b/server/src/test/java/org/elasticsearch/cluster/coordination/UnsafeBootstrapAndDetachCommandIT.java @@ -23,14 +23,15 @@ import org.elasticsearch.action.admin.cluster.settings.ClusterUpdateSettingsRequest; import org.elasticsearch.cli.MockTerminal; import org.elasticsearch.cluster.ClusterState; -import org.elasticsearch.cluster.metadata.Manifest; import org.elasticsearch.cluster.metadata.MetaData; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.discovery.DiscoverySettings; import org.elasticsearch.env.Environment; import org.elasticsearch.env.NodeEnvironment; -import org.elasticsearch.env.NodeMetaData; import org.elasticsearch.env.TestEnvironment; +import org.elasticsearch.gateway.GatewayMetaState; +import org.elasticsearch.gateway.PersistedClusterStateService; +import org.elasticsearch.indices.IndicesService; import org.elasticsearch.node.Node; import org.elasticsearch.test.ESIntegTestCase; import org.elasticsearch.test.InternalTestCluster; @@ -134,44 +135,20 @@ public void testDetachNodeLocked() throws IOException { } } - public void testBootstrapNoNodeMetaData() throws IOException { + public void testBootstrapNoNodeMetaData() { Settings envSettings = buildEnvSettings(Settings.EMPTY); Environment environment = TestEnvironment.newEnvironment(envSettings); - try (NodeEnvironment nodeEnvironment = new NodeEnvironment(envSettings, environment)) { - NodeMetaData.FORMAT.cleanupOldFiles(-1, nodeEnvironment.nodeDataPaths()); - } - - expectThrows(() -> unsafeBootstrap(environment), UnsafeBootstrapMasterCommand.NO_NODE_METADATA_FOUND_MSG); + expectThrows(() -> unsafeBootstrap(environment), ElasticsearchNodeCommand.NO_NODE_FOLDER_FOUND_MSG); } public void testBootstrapNotBootstrappedCluster() throws Exception { String node = internalCluster().startNode( - Settings.builder() - .put(DiscoverySettings.INITIAL_STATE_TIMEOUT_SETTING.getKey(), "0s") // to ensure quick node startup - .build()); - assertBusy(() -> { - ClusterState state = client().admin().cluster().prepareState().setLocal(true) - .execute().actionGet().getState(); - assertTrue(state.blocks().hasGlobalBlockWithId(NoMasterBlockService.NO_MASTER_BLOCK_ID)); - }); - - Settings dataPathSettings = internalCluster().dataPathSettings(node); - - internalCluster().stopRandomDataNode(); - - Environment environment = TestEnvironment.newEnvironment( - Settings.builder().put(internalCluster().getDefaultSettings()).put(dataPathSettings).build()); - expectThrows(() -> unsafeBootstrap(environment), ElasticsearchNodeCommand.GLOBAL_GENERATION_MISSING_MSG); - } - - public void testDetachNotBootstrappedCluster() throws Exception { - String node = internalCluster().startNode( - Settings.builder() - .put(DiscoverySettings.INITIAL_STATE_TIMEOUT_SETTING.getKey(), "0s") // to ensure quick node startup - .build()); + Settings.builder() + .put(DiscoverySettings.INITIAL_STATE_TIMEOUT_SETTING.getKey(), "0s") // to ensure quick node startup + .build()); assertBusy(() -> { ClusterState state = client().admin().cluster().prepareState().setLocal(true) - .execute().actionGet().getState(); + .execute().actionGet().getState(); assertTrue(state.blocks().hasGlobalBlockWithId(NoMasterBlockService.NO_MASTER_BLOCK_ID)); }); @@ -181,10 +158,10 @@ public void testDetachNotBootstrappedCluster() throws Exception { Environment environment = TestEnvironment.newEnvironment( Settings.builder().put(internalCluster().getDefaultSettings()).put(dataPathSettings).build()); - expectThrows(() -> detachCluster(environment), ElasticsearchNodeCommand.GLOBAL_GENERATION_MISSING_MSG); + expectThrows(() -> unsafeBootstrap(environment), UnsafeBootstrapMasterCommand.EMPTY_LAST_COMMITTED_VOTING_CONFIG_MSG); } - public void testBootstrapNoManifestFile() throws IOException { + public void testBootstrapNoClusterState() throws IOException { internalCluster().setBootstrapMasterNodeIndex(0); String node = internalCluster().startNode(); Settings dataPathSettings = internalCluster().dataPathSettings(node); @@ -193,12 +170,12 @@ public void testBootstrapNoManifestFile() throws IOException { internalCluster().stopRandomDataNode(); Environment environment = TestEnvironment.newEnvironment( Settings.builder().put(internalCluster().getDefaultSettings()).put(dataPathSettings).build()); - Manifest.FORMAT.cleanupOldFiles(-1, nodeEnvironment.nodeDataPaths()); + PersistedClusterStateService.deleteAll(nodeEnvironment.nodeDataPaths()); - expectThrows(() -> unsafeBootstrap(environment), ElasticsearchNodeCommand.NO_MANIFEST_FILE_FOUND_MSG); + expectThrows(() -> unsafeBootstrap(environment), ElasticsearchNodeCommand.NO_NODE_METADATA_FOUND_MSG); } - public void testDetachNoManifestFile() throws IOException { + public void testDetachNoClusterState() throws IOException { internalCluster().setBootstrapMasterNodeIndex(0); String node = internalCluster().startNode(); Settings dataPathSettings = internalCluster().dataPathSettings(node); @@ -207,39 +184,9 @@ public void testDetachNoManifestFile() throws IOException { internalCluster().stopRandomDataNode(); Environment environment = TestEnvironment.newEnvironment( Settings.builder().put(internalCluster().getDefaultSettings()).put(dataPathSettings).build()); - Manifest.FORMAT.cleanupOldFiles(-1, nodeEnvironment.nodeDataPaths()); - - expectThrows(() -> detachCluster(environment), ElasticsearchNodeCommand.NO_MANIFEST_FILE_FOUND_MSG); - } - - public void testBootstrapNoMetaData() throws IOException { - internalCluster().setBootstrapMasterNodeIndex(0); - String node = internalCluster().startNode(); - Settings dataPathSettings = internalCluster().dataPathSettings(node); - ensureStableCluster(1); - NodeEnvironment nodeEnvironment = internalCluster().getMasterNodeInstance(NodeEnvironment.class); - internalCluster().stopRandomDataNode(); - - Environment environment = TestEnvironment.newEnvironment( - Settings.builder().put(internalCluster().getDefaultSettings()).put(dataPathSettings).build()); - MetaData.FORMAT.cleanupOldFiles(-1, nodeEnvironment.nodeDataPaths()); - - expectThrows(() -> unsafeBootstrap(environment), ElasticsearchNodeCommand.NO_GLOBAL_METADATA_MSG); - } - - public void testDetachNoMetaData() throws IOException { - internalCluster().setBootstrapMasterNodeIndex(0); - String node = internalCluster().startNode(); - Settings dataPathSettings = internalCluster().dataPathSettings(node); - ensureStableCluster(1); - NodeEnvironment nodeEnvironment = internalCluster().getMasterNodeInstance(NodeEnvironment.class); - internalCluster().stopRandomDataNode(); - - Environment environment = TestEnvironment.newEnvironment( - Settings.builder().put(internalCluster().getDefaultSettings()).put(dataPathSettings).build()); - MetaData.FORMAT.cleanupOldFiles(-1, nodeEnvironment.nodeDataPaths()); + PersistedClusterStateService.deleteAll(nodeEnvironment.nodeDataPaths()); - expectThrows(() -> detachCluster(environment), ElasticsearchNodeCommand.NO_GLOBAL_METADATA_MSG); + expectThrows(() -> detachCluster(environment), ElasticsearchNodeCommand.NO_NODE_METADATA_FOUND_MSG); } public void testBootstrapAbortedByUser() throws IOException { @@ -314,14 +261,16 @@ public void test3MasterNodes2Failed() throws Exception { logger.info("--> stop 1st master-eligible node and data-only node"); NodeEnvironment nodeEnvironment = internalCluster().getMasterNodeInstance(NodeEnvironment.class); internalCluster().stopRandomNode(InternalTestCluster.nameFilter(masterNodes.get(0))); + assertBusy(() -> internalCluster().getInstance(GatewayMetaState.class, dataNode).allPendingAsyncStatesWritten()); internalCluster().stopRandomDataNode(); logger.info("--> unsafely-bootstrap 1st master-eligible node"); MockTerminal terminal = unsafeBootstrap(environmentMaster1); - MetaData metaData = MetaData.FORMAT.loadLatestState(logger, xContentRegistry(), nodeEnvironment.nodeDataPaths()); + MetaData metaData = ElasticsearchNodeCommand.createPersistedClusterStateService(nodeEnvironment.nodeDataPaths()) + .loadBestOnDiskState().metaData; assertThat(terminal.getOutput(), containsString( - String.format(Locale.ROOT, UnsafeBootstrapMasterCommand.CLUSTER_STATE_TERM_VERSION_MSG_FORMAT, - metaData.coordinationMetaData().term(), metaData.version()))); + String.format(Locale.ROOT, UnsafeBootstrapMasterCommand.CLUSTER_STATE_TERM_VERSION_MSG_FORMAT, + metaData.coordinationMetaData().term(), metaData.version()))); logger.info("--> start 1st master-eligible node"); internalCluster().startMasterOnlyNode(master1DataPathSettings); @@ -372,6 +321,8 @@ public void testAllMasterEligibleNodesFailedDanglingIndexImport() throws Excepti logger.info("--> index 1 doc and ensure index is green"); client().prepareIndex("test", "type1", "1").setSource("field1", "value1").setRefreshPolicy(IMMEDIATE).get(); ensureGreen("test"); + assertBusy(() -> internalCluster().getInstances(IndicesService.class).forEach( + indicesService -> assertTrue(indicesService.allPendingDanglingIndicesWritten()))); logger.info("--> verify 1 doc in the index"); assertHitCount(client().prepareSearch().setQuery(matchAllQuery()).get(), 1L); @@ -379,6 +330,7 @@ public void testAllMasterEligibleNodesFailedDanglingIndexImport() throws Excepti logger.info("--> stop data-only node and detach it from the old cluster"); Settings dataNodeDataPathSettings = internalCluster().dataPathSettings(dataNode); + assertBusy(() -> internalCluster().getInstance(GatewayMetaState.class, dataNode).allPendingAsyncStatesWritten()); internalCluster().stopRandomNode(InternalTestCluster.nameFilter(dataNode)); final Environment environment = TestEnvironment.newEnvironment( Settings.builder().put(internalCluster().getDefaultSettings()).put(dataNodeDataPathSettings).build()); diff --git a/server/src/test/java/org/elasticsearch/cluster/coordination/Zen1IT.java b/server/src/test/java/org/elasticsearch/cluster/coordination/Zen1IT.java index d508853a90173..5fd4429905ac2 100644 --- a/server/src/test/java/org/elasticsearch/cluster/coordination/Zen1IT.java +++ b/server/src/test/java/org/elasticsearch/cluster/coordination/Zen1IT.java @@ -41,6 +41,7 @@ import org.elasticsearch.discovery.zen.ElectMasterService; import org.elasticsearch.env.NodeEnvironment; import org.elasticsearch.gateway.MetaStateService; +import org.elasticsearch.indices.IndicesService; import org.elasticsearch.plugins.Plugin; import org.elasticsearch.test.ESIntegTestCase; import org.elasticsearch.test.InternalTestCluster.RestartCallback; @@ -73,7 +74,9 @@ public class Zen1IT extends ESIntegTestCase { private static Settings ZEN1_SETTINGS = Coordinator.addZen1Attribute(true, Settings.builder() - .put(DiscoveryModule.DISCOVERY_TYPE_SETTING.getKey(), DiscoveryModule.ZEN_DISCOVERY_TYPE)).build(); + .put(DiscoveryModule.DISCOVERY_TYPE_SETTING.getKey(), DiscoveryModule.ZEN_DISCOVERY_TYPE) + .put(IndicesService.WRITE_DANGLING_INDICES_INFO_SETTING.getKey(), false) + ).build(); private static Settings ZEN2_SETTINGS = Settings.builder() .put(DiscoveryModule.DISCOVERY_TYPE_SETTING.getKey(), DiscoveryModule.ZEN2_DISCOVERY_TYPE) diff --git a/server/src/test/java/org/elasticsearch/cluster/metadata/IndexMetaDataTests.java b/server/src/test/java/org/elasticsearch/cluster/metadata/IndexMetaDataTests.java index 54a97871ec696..8f9f883a7cc8d 100644 --- a/server/src/test/java/org/elasticsearch/cluster/metadata/IndexMetaDataTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/metadata/IndexMetaDataTests.java @@ -71,7 +71,7 @@ protected NamedWriteableRegistry writableRegistry() { @Override protected NamedXContentRegistry xContentRegistry() { - return new NamedXContentRegistry(INDICES_MODULE.getNamedXContents()); + return new NamedXContentRegistry(IndicesModule.getNamedXContents()); } public void testIndexMetaDataSerialization() throws IOException { diff --git a/server/src/test/java/org/elasticsearch/env/NodeEnvironmentIT.java b/server/src/test/java/org/elasticsearch/env/NodeEnvironmentIT.java index daddd74ed909c..9154804e0b78d 100644 --- a/server/src/test/java/org/elasticsearch/env/NodeEnvironmentIT.java +++ b/server/src/test/java/org/elasticsearch/env/NodeEnvironmentIT.java @@ -21,7 +21,10 @@ import org.elasticsearch.Version; import org.elasticsearch.common.CheckedConsumer; +import org.elasticsearch.common.io.PathUtils; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.gateway.PersistedClusterStateService; +import org.elasticsearch.indices.IndicesService; import org.elasticsearch.node.Node; import org.elasticsearch.test.ESIntegTestCase; import org.elasticsearch.test.InternalTestCluster; @@ -38,11 +41,13 @@ @ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST, numDataNodes = 0) public class NodeEnvironmentIT extends ESIntegTestCase { - public void testStartFailureOnDataForNonDataNode() { + public void testStartFailureOnDataForNonDataNode() throws Exception { final String indexName = "test-fail-on-data"; logger.info("--> starting one node"); - String node = internalCluster().startNode(); + final boolean writeDanglingIndices = randomBoolean(); + String node = internalCluster().startNode(Settings.builder() + .put(IndicesService.WRITE_DANGLING_INDICES_INFO_SETTING.getKey(), writeDanglingIndices).build()); Settings dataPathSettings = internalCluster().dataPathSettings(node); logger.info("--> creating index"); @@ -51,6 +56,10 @@ public void testStartFailureOnDataForNonDataNode() { .put("index.number_of_replicas", 0) ).get(); final String indexUUID = resolveIndex(indexName).getUUID(); + if (writeDanglingIndices) { + assertBusy(() -> internalCluster().getInstances(IndicesService.class).forEach( + indicesService -> assertTrue(indicesService.allPendingDanglingIndicesWritten()))); + } logger.info("--> restarting the node with node.data=false and node.master=false"); IllegalStateException ex = expectThrows(IllegalStateException.class, @@ -65,13 +74,19 @@ public Settings onNodeStopped(String nodeName) { .build(); } })); - assertThat(ex.getMessage(), containsString(indexUUID)); - assertThat(ex.getMessage(), - startsWith("Node is started with " - + Node.NODE_DATA_SETTING.getKey() - + "=false and " - + Node.NODE_MASTER_SETTING.getKey() - + "=false, but has index metadata")); + if (writeDanglingIndices) { + assertThat(ex.getMessage(), + startsWith("Node is started with " + + Node.NODE_DATA_SETTING.getKey() + + "=false and " + + Node.NODE_MASTER_SETTING.getKey() + + "=false, but has index metadata")); + } else { + assertThat(ex.getMessage(), + startsWith("Node is started with " + + Node.NODE_DATA_SETTING.getKey() + + "=false, but has shard data")); + } logger.info("--> start the node again with node.data=true and node.master=true"); internalCluster().startNode(dataPathSettings); @@ -115,14 +130,14 @@ public Settings onNodeStopped(String nodeName) { public void testFailsToStartIfDowngraded() { final IllegalStateException illegalStateException = expectThrowsOnRestart(dataPaths -> - NodeMetaData.FORMAT.writeAndCleanup(new NodeMetaData(randomAlphaOfLength(10), NodeMetaDataTests.tooNewVersion()), dataPaths)); + PersistedClusterStateService.overrideVersion(NodeMetaDataTests.tooNewVersion(), dataPaths)); assertThat(illegalStateException.getMessage(), allOf(startsWith("cannot downgrade a node from version ["), endsWith("] to version [" + Version.CURRENT + "]"))); } public void testFailsToStartIfUpgradedTooFar() { final IllegalStateException illegalStateException = expectThrowsOnRestart(dataPaths -> - NodeMetaData.FORMAT.writeAndCleanup(new NodeMetaData(randomAlphaOfLength(10), NodeMetaDataTests.tooOldVersion()), dataPaths)); + PersistedClusterStateService.overrideVersion(NodeMetaDataTests.tooOldVersion(), dataPaths)); assertThat(illegalStateException.getMessage(), allOf(startsWith("cannot upgrade a node from version ["), endsWith("] directly to version [" + Version.CURRENT + "]"))); } @@ -140,10 +155,16 @@ public void testFailsToStartOnDataPathsFromMultipleNodes() throws IOException { internalCluster().stopRandomNode(InternalTestCluster.nameFilter(nodes.get(1))); internalCluster().stopRandomNode(InternalTestCluster.nameFilter(nodes.get(0))); - final IllegalStateException illegalStateException = expectThrows(IllegalStateException.class, + IllegalStateException illegalStateException = expectThrows(IllegalStateException.class, + () -> PersistedClusterStateService.nodeMetaData(allDataPaths.stream().map(PathUtils::get) + .map(path -> NodeEnvironment.resolveNodePath(path, 0)).toArray(Path[]::new))); + + assertThat(illegalStateException.getMessage(), containsString("unexpected node ID in metadata")); + + illegalStateException = expectThrows(IllegalStateException.class, () -> internalCluster().startNode(Settings.builder().putList(Environment.PATH_DATA_SETTING.getKey(), allDataPaths))); - assertThat(illegalStateException.getMessage(), containsString("belong to multiple nodes with IDs")); + assertThat(illegalStateException.getMessage(), containsString("unexpected node ID in metadata")); final List node0DataPathsPlusOne = new ArrayList<>(node0DataPaths); node0DataPathsPlusOne.add(createTempDir().toString()); diff --git a/server/src/test/java/org/elasticsearch/env/NodeEnvironmentTests.java b/server/src/test/java/org/elasticsearch/env/NodeEnvironmentTests.java index 12efa68680ae1..b0cd6cb8d2c64 100644 --- a/server/src/test/java/org/elasticsearch/env/NodeEnvironmentTests.java +++ b/server/src/test/java/org/elasticsearch/env/NodeEnvironmentTests.java @@ -419,7 +419,7 @@ public void testCustomDataPaths() throws Exception { env.close(); } - public void testPersistentNodeId() throws IOException { + public void testNodeIdNotPersistedAtInitialization() throws IOException { NodeEnvironment env = newNodeEnvironment(new String[0], Settings.builder() .put("node.local_storage", false) .put("node.master", false) @@ -433,7 +433,7 @@ public void testPersistentNodeId() throws IOException { nodeID = env.nodeId(); env.close(); env = newNodeEnvironment(paths, Settings.EMPTY); - assertThat(env.nodeId(), equalTo(nodeID)); + assertThat(env.nodeId(), not(equalTo(nodeID))); env.close(); env = newNodeEnvironment(Settings.EMPTY); assertThat(env.nodeId(), not(equalTo(nodeID))); diff --git a/server/src/test/java/org/elasticsearch/env/NodeRepurposeCommandIT.java b/server/src/test/java/org/elasticsearch/env/NodeRepurposeCommandIT.java index a6229b16c3055..f1088469a90d8 100644 --- a/server/src/test/java/org/elasticsearch/env/NodeRepurposeCommandIT.java +++ b/server/src/test/java/org/elasticsearch/env/NodeRepurposeCommandIT.java @@ -21,14 +21,13 @@ import org.elasticsearch.ElasticsearchException; import org.elasticsearch.action.NoShardAvailableActionException; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.indices.IndicesService; import org.elasticsearch.node.Node; import org.elasticsearch.test.ESIntegTestCase; import org.hamcrest.Matcher; import static org.hamcrest.Matchers.allOf; import static org.hamcrest.Matchers.containsString; -import static org.hamcrest.Matchers.not; -import static org.mockito.Matchers.contains; @ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST, numDataNodes = 0) public class NodeRepurposeCommandIT extends ESIntegTestCase { @@ -38,14 +37,14 @@ public void testRepurpose() throws Exception { logger.info("--> starting two nodes"); final String masterNode = internalCluster().startMasterOnlyNode(); - final String dataNode = internalCluster().startDataOnlyNode(); + final String dataNode = internalCluster().startDataOnlyNode( + Settings.builder().put(IndicesService.WRITE_DANGLING_INDICES_INFO_SETTING.getKey(), false).build()); logger.info("--> creating index"); prepareCreate(indexName, Settings.builder() .put("index.number_of_shards", 1) .put("index.number_of_replicas", 0) ).get(); - final String indexUUID = resolveIndex(indexName).getUUID(); logger.info("--> indexing a simple document"); client().prepareIndex(indexName, "type1", "1").setSource("field1", "value1").get(); @@ -82,10 +81,10 @@ public void testRepurpose() throws Exception { ); logger.info("--> Repurposing node 1"); - executeRepurposeCommand(noMasterNoDataSettingsForDataNode, indexUUID, 1); + executeRepurposeCommand(noMasterNoDataSettingsForDataNode, 1, 1); ElasticsearchException lockedException = expectThrows(ElasticsearchException.class, - () -> executeRepurposeCommand(noMasterNoDataSettingsForMasterNode, indexUUID, 1) + () -> executeRepurposeCommand(noMasterNoDataSettingsForMasterNode, 1, 1) ); assertThat(lockedException.getMessage(), containsString(NodeRepurposeCommand.FAILED_TO_OBTAIN_NODE_LOCK_MSG)); @@ -101,7 +100,7 @@ public void testRepurpose() throws Exception { internalCluster().stopRandomNode(s -> true); internalCluster().stopRandomNode(s -> true); - executeRepurposeCommand(noMasterNoDataSettingsForMasterNode, indexUUID, 0); + executeRepurposeCommand(noMasterNoDataSettingsForMasterNode, 1, 0); // by restarting as master and data node, we can check that the index definition was really deleted and also that the tool // does not mess things up so much that the nodes cannot boot as master or data node any longer. @@ -114,14 +113,13 @@ public void testRepurpose() throws Exception { assertFalse(indexExists(indexName)); } - private void executeRepurposeCommand(Settings settings, String indexUUID, int expectedShardCount) throws Exception { + private void executeRepurposeCommand(Settings settings, int expectedIndexCount, + int expectedShardCount) throws Exception { boolean verbose = randomBoolean(); Settings settingsWithPath = Settings.builder().put(internalCluster().getDefaultSettings()).put(settings).build(); - int expectedIndexCount = TestEnvironment.newEnvironment(settingsWithPath).dataFiles().length; Matcher matcher = allOf( - containsString(NodeRepurposeCommand.noMasterMessage(1, expectedShardCount, expectedIndexCount)), - not(contains(NodeRepurposeCommand.PRE_V7_MESSAGE)), - NodeRepurposeCommandTests.conditionalNot(containsString(indexUUID), verbose == false)); + containsString(NodeRepurposeCommand.noMasterMessage(expectedIndexCount, expectedShardCount, 0)), + NodeRepurposeCommandTests.conditionalNot(containsString("test-repurpose"), verbose == false)); NodeRepurposeCommandTests.verifySuccess(settingsWithPath, matcher, verbose); } diff --git a/server/src/test/java/org/elasticsearch/env/NodeRepurposeCommandTests.java b/server/src/test/java/org/elasticsearch/env/NodeRepurposeCommandTests.java index 8f713e57bf4da..c88a3f36da7a8 100644 --- a/server/src/test/java/org/elasticsearch/env/NodeRepurposeCommandTests.java +++ b/server/src/test/java/org/elasticsearch/env/NodeRepurposeCommandTests.java @@ -23,13 +23,16 @@ import org.elasticsearch.Version; import org.elasticsearch.cli.MockTerminal; import org.elasticsearch.cli.Terminal; -import org.elasticsearch.cluster.ClusterModule; +import org.elasticsearch.cluster.ClusterName; +import org.elasticsearch.cluster.ClusterState; +import org.elasticsearch.cluster.coordination.ElasticsearchNodeCommand; import org.elasticsearch.cluster.metadata.IndexMetaData; -import org.elasticsearch.cluster.metadata.Manifest; +import org.elasticsearch.cluster.metadata.MetaData; import org.elasticsearch.common.CheckedConsumer; import org.elasticsearch.common.CheckedRunnable; import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.common.xcontent.NamedXContentRegistry; +import org.elasticsearch.common.util.BigArrays; +import org.elasticsearch.gateway.PersistedClusterStateService; import org.elasticsearch.index.Index; import org.elasticsearch.node.Node; import org.elasticsearch.test.ESTestCase; @@ -40,16 +43,13 @@ import java.nio.file.Files; import java.nio.file.Path; import java.util.Arrays; -import java.util.Collections; import java.util.stream.Stream; import static org.elasticsearch.env.NodeRepurposeCommand.NO_CLEANUP; import static org.elasticsearch.env.NodeRepurposeCommand.NO_DATA_TO_CLEAN_UP_FOUND; import static org.elasticsearch.env.NodeRepurposeCommand.NO_SHARD_DATA_TO_CLEAN_UP_FOUND; -import static org.elasticsearch.env.NodeRepurposeCommand.PRE_V7_MESSAGE; import static org.hamcrest.Matchers.allOf; import static org.hamcrest.Matchers.containsString; -import static org.hamcrest.Matchers.hasSize; import static org.hamcrest.Matchers.not; public class NodeRepurposeCommandTests extends ESTestCase { @@ -68,6 +68,11 @@ public void createNodePaths() throws IOException { environment = TestEnvironment.newEnvironment(dataMasterSettings); try (NodeEnvironment nodeEnvironment = new NodeEnvironment(dataMasterSettings, environment)) { nodePaths = nodeEnvironment.nodeDataPaths(); + final String nodeId = randomAlphaOfLength(10); + try (PersistedClusterStateService.Writer writer = new PersistedClusterStateService(nodePaths, nodeId, + xContentRegistry(), BigArrays.NON_RECYCLING_INSTANCE, true).createWriter()) { + writer.writeFullStateAndCommit(1L, ClusterState.EMPTY_STATE); + } } dataNoMasterSettings = Settings.builder() .put(dataMasterSettings) @@ -86,27 +91,32 @@ public void createNodePaths() throws IOException { } public void testEarlyExitNoCleanup() throws Exception { - createIndexDataFiles(dataMasterSettings, randomInt(10)); + createIndexDataFiles(dataMasterSettings, randomInt(10), randomBoolean()); verifyNoQuestions(dataMasterSettings, containsString(NO_CLEANUP)); verifyNoQuestions(dataNoMasterSettings, containsString(NO_CLEANUP)); } public void testNothingToCleanup() throws Exception { - verifyNoQuestions(noDataNoMasterSettings, allOf(containsString(NO_DATA_TO_CLEAN_UP_FOUND), not(containsString(PRE_V7_MESSAGE)))); - verifyNoQuestions(noDataMasterSettings, - allOf(containsString(NO_SHARD_DATA_TO_CLEAN_UP_FOUND), not(containsString(PRE_V7_MESSAGE)))); - - createManifest(null); + verifyNoQuestions(noDataNoMasterSettings, containsString(NO_DATA_TO_CLEAN_UP_FOUND)); + verifyNoQuestions(noDataMasterSettings, containsString(NO_SHARD_DATA_TO_CLEAN_UP_FOUND)); + + Environment environment = TestEnvironment.newEnvironment(noDataMasterSettings); + if (randomBoolean()) { + try (NodeEnvironment env = new NodeEnvironment(noDataMasterSettings, environment)) { + try (PersistedClusterStateService.Writer writer = + ElasticsearchNodeCommand.createPersistedClusterStateService(env.nodeDataPaths()).createWriter()) { + writer.writeFullStateAndCommit(1L, ClusterState.EMPTY_STATE); + } + } + } - verifyNoQuestions(noDataNoMasterSettings, allOf(containsString(NO_DATA_TO_CLEAN_UP_FOUND), not(containsString(PRE_V7_MESSAGE)))); - verifyNoQuestions(noDataMasterSettings, - allOf(containsString(NO_SHARD_DATA_TO_CLEAN_UP_FOUND), not(containsString(PRE_V7_MESSAGE)))); + verifyNoQuestions(noDataNoMasterSettings, containsString(NO_DATA_TO_CLEAN_UP_FOUND)); + verifyNoQuestions(noDataMasterSettings, containsString(NO_SHARD_DATA_TO_CLEAN_UP_FOUND)); - createIndexDataFiles(dataMasterSettings, 0); + createIndexDataFiles(dataMasterSettings, 0, randomBoolean()); - verifyNoQuestions(noDataMasterSettings, - allOf(containsString(NO_SHARD_DATA_TO_CLEAN_UP_FOUND), not(containsString(PRE_V7_MESSAGE)))); + verifyNoQuestions(noDataMasterSettings, containsString(NO_SHARD_DATA_TO_CLEAN_UP_FOUND)); } @@ -119,33 +129,20 @@ public void testLocked() throws IOException { } public void testCleanupAll() throws Exception { - Manifest oldManifest = createManifest(INDEX); - checkCleanupAll(not(containsString(PRE_V7_MESSAGE))); - - Manifest newManifest = loadManifest(); - assertThat(newManifest.getIndexGenerations().entrySet(), hasSize(0)); - assertManifestIdenticalExceptIndices(oldManifest, newManifest); - } - - public void testCleanupAllPreV7() throws Exception { - checkCleanupAll(containsString(PRE_V7_MESSAGE)); - } - - private void checkCleanupAll(Matcher additionalOutputMatcher) throws Exception { - int shardCount = randomInt(10); + int shardCount = randomIntBetween(1, 10); boolean verbose = randomBoolean(); - createIndexDataFiles(dataMasterSettings, shardCount); + boolean hasClusterState = randomBoolean(); + createIndexDataFiles(dataMasterSettings, shardCount, hasClusterState); String messageText = NodeRepurposeCommand.noMasterMessage( 1, environment.dataFiles().length*shardCount, - environment.dataFiles().length); + 0); Matcher outputMatcher = allOf( containsString(messageText), - additionalOutputMatcher, - conditionalNot(containsString("testUUID"), verbose == false), - conditionalNot(containsString("testIndex"), verbose == false) + conditionalNot(containsString("testIndex"), verbose == false || hasClusterState == false), + conditionalNot(containsString("no name for uuid: testUUID"), verbose == false || hasClusterState) ); verifyUnchangedOnAbort(noDataNoMasterSettings, outputMatcher, verbose); @@ -162,18 +159,17 @@ private void checkCleanupAll(Matcher additionalOutputMatcher) throws Exc public void testCleanupShardData() throws Exception { int shardCount = randomIntBetween(1, 10); boolean verbose = randomBoolean(); - Manifest manifest = randomBoolean() ? createManifest(INDEX) : null; - - createIndexDataFiles(dataMasterSettings, shardCount); + boolean hasClusterState = randomBoolean(); + createIndexDataFiles(dataMasterSettings, shardCount, hasClusterState); Matcher matcher = allOf( containsString(NodeRepurposeCommand.shardMessage(environment.dataFiles().length * shardCount, 1)), conditionalNot(containsString("testUUID"), verbose == false), - conditionalNot(containsString("testIndex"), verbose == false) + conditionalNot(containsString("testIndex"), verbose == false || hasClusterState == false), + conditionalNot(containsString("no name for uuid: testUUID"), verbose == false || hasClusterState) ); - verifyUnchangedOnAbort(noDataMasterSettings, - matcher, verbose); + verifyUnchangedOnAbort(noDataMasterSettings, matcher, verbose); // verify test setup expectThrows(IllegalStateException.class, () -> new NodeEnvironment(noDataMasterSettings, environment).close()); @@ -182,12 +178,6 @@ public void testCleanupShardData() throws Exception { //verify clean. new NodeEnvironment(noDataMasterSettings, environment).close(); - - if (manifest != null) { - Manifest newManifest = loadManifest(); - assertThat(newManifest.getIndexGenerations().entrySet(), hasSize(1)); - assertManifestIdenticalExceptIndices(manifest, newManifest); - } } static void verifySuccess(Settings settings, Matcher outputMatcher, boolean verbose) throws Exception { @@ -237,31 +227,22 @@ private static void executeRepurposeCommand(MockTerminal terminal, Settings sett nodeRepurposeCommand.testExecute(terminal, options, env); } - private Manifest createManifest(Index index) throws org.elasticsearch.gateway.WriteStateException { - Manifest manifest = new Manifest(randomIntBetween(1,100), randomIntBetween(1,100), randomIntBetween(1,100), - index != null ? Collections.singletonMap(index, randomLongBetween(1,100)) : Collections.emptyMap()); - Manifest.FORMAT.writeAndCleanup(manifest, nodePaths); - return manifest; - } - - private Manifest loadManifest() throws IOException { - return Manifest.FORMAT.loadLatestState(logger, new NamedXContentRegistry(ClusterModule.getNamedXWriteables()), nodePaths); - } - - private void assertManifestIdenticalExceptIndices(Manifest oldManifest, Manifest newManifest) { - assertEquals(oldManifest.getGlobalGeneration(), newManifest.getGlobalGeneration()); - assertEquals(oldManifest.getClusterStateVersion(), newManifest.getClusterStateVersion()); - assertEquals(oldManifest.getCurrentTerm(), newManifest.getCurrentTerm()); - } - - private void createIndexDataFiles(Settings settings, int shardCount) throws IOException { + private void createIndexDataFiles(Settings settings, int shardCount, boolean writeClusterState) throws IOException { int shardDataDirNumber = randomInt(10); - try (NodeEnvironment env = new NodeEnvironment(settings, TestEnvironment.newEnvironment(settings))) { - IndexMetaData.FORMAT.write(IndexMetaData.builder(INDEX.getName()) - .settings(Settings.builder().put("index.version.created", Version.CURRENT)) - .numberOfShards(1) - .numberOfReplicas(1) - .build(), env.indexPaths(INDEX)); + Environment environment = TestEnvironment.newEnvironment(settings); + try (NodeEnvironment env = new NodeEnvironment(settings, environment)) { + if (writeClusterState) { + try (PersistedClusterStateService.Writer writer = + ElasticsearchNodeCommand.createPersistedClusterStateService(env.nodeDataPaths()).createWriter()) { + writer.writeFullStateAndCommit(1L, ClusterState.builder(ClusterName.DEFAULT) + .metaData(MetaData.builder().put(IndexMetaData.builder(INDEX.getName()) + .settings(Settings.builder().put("index.version.created", Version.CURRENT) + .put(IndexMetaData.SETTING_INDEX_UUID, INDEX.getUUID())) + .numberOfShards(1) + .numberOfReplicas(1)).build()) + .build()); + } + } for (Path path : env.indexPaths(INDEX)) { for (int i = 0; i < shardCount; ++i) { Files.createDirectories(path.resolve(Integer.toString(shardDataDirNumber))); diff --git a/server/src/test/java/org/elasticsearch/env/OverrideNodeVersionCommandTests.java b/server/src/test/java/org/elasticsearch/env/OverrideNodeVersionCommandTests.java index d8b86d3d51eb7..09c947d86de64 100644 --- a/server/src/test/java/org/elasticsearch/env/OverrideNodeVersionCommandTests.java +++ b/server/src/test/java/org/elasticsearch/env/OverrideNodeVersionCommandTests.java @@ -18,31 +18,34 @@ */ package org.elasticsearch.env; +import joptsimple.OptionParser; +import joptsimple.OptionSet; import org.elasticsearch.ElasticsearchException; import org.elasticsearch.Version; import org.elasticsearch.cli.MockTerminal; +import org.elasticsearch.cluster.ClusterName; +import org.elasticsearch.cluster.ClusterState; +import org.elasticsearch.cluster.metadata.MetaData; import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.common.xcontent.XContentBuilder; -import org.elasticsearch.common.xcontent.XContentParser; -import org.elasticsearch.gateway.MetaDataStateFormat; -import org.elasticsearch.gateway.WriteStateException; +import org.elasticsearch.common.util.BigArrays; +import org.elasticsearch.gateway.PersistedClusterStateService; import org.elasticsearch.test.ESTestCase; +import org.junit.After; import org.junit.Before; import java.io.IOException; import java.nio.file.Path; -import static org.elasticsearch.env.NodeMetaData.NODE_ID_KEY; -import static org.elasticsearch.env.NodeMetaData.NODE_VERSION_KEY; import static org.hamcrest.Matchers.allOf; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.equalTo; -import static org.hamcrest.Matchers.hasToString; public class OverrideNodeVersionCommandTests extends ESTestCase { private Environment environment; private Path[] nodePaths; + private String nodeId; + private final OptionSet noOptions = new OptionParser().parse(); @Before public void createNodePaths() throws IOException { @@ -50,24 +53,38 @@ public void createNodePaths() throws IOException { environment = TestEnvironment.newEnvironment(settings); try (NodeEnvironment nodeEnvironment = new NodeEnvironment(settings, environment)) { nodePaths = nodeEnvironment.nodeDataPaths(); + nodeId = nodeEnvironment.nodeId(); + + try (PersistedClusterStateService.Writer writer = new PersistedClusterStateService(nodePaths, nodeId, + xContentRegistry(), BigArrays.NON_RECYCLING_INSTANCE, true).createWriter()) { + writer.writeFullStateAndCommit(1L, ClusterState.builder(ClusterName.DEFAULT).metaData(MetaData.builder() + .persistentSettings(Settings.builder().put(MetaData.SETTING_READ_ONLY_SETTING.getKey(), true).build()).build()) + .build()); + } } } + @After + public void checkClusterStateIntact() throws IOException { + assertTrue(MetaData.SETTING_READ_ONLY_SETTING.get(new PersistedClusterStateService(nodePaths, nodeId, + xContentRegistry(), BigArrays.NON_RECYCLING_INSTANCE, true).loadBestOnDiskState().metaData.persistentSettings())); + } + public void testFailsOnEmptyPath() { final Path emptyPath = createTempDir(); final MockTerminal mockTerminal = new MockTerminal(); final ElasticsearchException elasticsearchException = expectThrows(ElasticsearchException.class, () -> - new OverrideNodeVersionCommand().processNodePaths(mockTerminal, new Path[]{emptyPath}, environment)); + new OverrideNodeVersionCommand().processNodePaths(mockTerminal, new Path[]{emptyPath}, 0, noOptions, environment)); assertThat(elasticsearchException.getMessage(), equalTo(OverrideNodeVersionCommand.NO_METADATA_MESSAGE)); expectThrows(IllegalStateException.class, () -> mockTerminal.readText("")); } - public void testFailsIfUnnecessary() throws WriteStateException { + public void testFailsIfUnnecessary() throws IOException { final Version nodeVersion = Version.fromId(between(Version.CURRENT.minimumIndexCompatibilityVersion().id, Version.CURRENT.id)); - NodeMetaData.FORMAT.writeAndCleanup(new NodeMetaData(randomAlphaOfLength(10), nodeVersion), nodePaths); + PersistedClusterStateService.overrideVersion(nodeVersion, nodePaths); final MockTerminal mockTerminal = new MockTerminal(); final ElasticsearchException elasticsearchException = expectThrows(ElasticsearchException.class, () -> - new OverrideNodeVersionCommand().processNodePaths(mockTerminal, nodePaths, environment)); + new OverrideNodeVersionCommand().processNodePaths(mockTerminal, nodePaths, 0, noOptions, environment)); assertThat(elasticsearchException.getMessage(), allOf( containsString("compatible with current version"), containsString(Version.CURRENT.toString()), @@ -76,13 +93,12 @@ public void testFailsIfUnnecessary() throws WriteStateException { } public void testWarnsIfTooOld() throws Exception { - final String nodeId = randomAlphaOfLength(10); final Version nodeVersion = NodeMetaDataTests.tooOldVersion(); - NodeMetaData.FORMAT.writeAndCleanup(new NodeMetaData(nodeId, nodeVersion), nodePaths); + PersistedClusterStateService.overrideVersion(nodeVersion, nodePaths); final MockTerminal mockTerminal = new MockTerminal(); mockTerminal.addTextInput("n\n"); final ElasticsearchException elasticsearchException = expectThrows(ElasticsearchException.class, () -> - new OverrideNodeVersionCommand().processNodePaths(mockTerminal, nodePaths, environment)); + new OverrideNodeVersionCommand().processNodePaths(mockTerminal, nodePaths, 0, noOptions, environment)); assertThat(elasticsearchException.getMessage(), equalTo("aborted by user")); assertThat(mockTerminal.getOutput(), allOf( containsString("too old"), @@ -92,19 +108,17 @@ public void testWarnsIfTooOld() throws Exception { containsString(nodeVersion.toString()))); expectThrows(IllegalStateException.class, () -> mockTerminal.readText("")); - final NodeMetaData nodeMetaData = NodeMetaData.FORMAT.loadLatestState(logger, xContentRegistry(), nodePaths); - assertThat(nodeMetaData.nodeId(), equalTo(nodeId)); + final NodeMetaData nodeMetaData = PersistedClusterStateService.nodeMetaData(nodePaths); assertThat(nodeMetaData.nodeVersion(), equalTo(nodeVersion)); } public void testWarnsIfTooNew() throws Exception { - final String nodeId = randomAlphaOfLength(10); final Version nodeVersion = NodeMetaDataTests.tooNewVersion(); - NodeMetaData.FORMAT.writeAndCleanup(new NodeMetaData(nodeId, nodeVersion), nodePaths); + PersistedClusterStateService.overrideVersion(nodeVersion, nodePaths); final MockTerminal mockTerminal = new MockTerminal(); mockTerminal.addTextInput(randomFrom("yy", "Yy", "n", "yes", "true", "N", "no")); final ElasticsearchException elasticsearchException = expectThrows(ElasticsearchException.class, () -> - new OverrideNodeVersionCommand().processNodePaths(mockTerminal, nodePaths, environment)); + new OverrideNodeVersionCommand().processNodePaths(mockTerminal, nodePaths, 0, noOptions, environment)); assertThat(elasticsearchException.getMessage(), equalTo("aborted by user")); assertThat(mockTerminal.getOutput(), allOf( containsString("data loss"), @@ -113,18 +127,16 @@ public void testWarnsIfTooNew() throws Exception { containsString(nodeVersion.toString()))); expectThrows(IllegalStateException.class, () -> mockTerminal.readText("")); - final NodeMetaData nodeMetaData = NodeMetaData.FORMAT.loadLatestState(logger, xContentRegistry(), nodePaths); - assertThat(nodeMetaData.nodeId(), equalTo(nodeId)); + final NodeMetaData nodeMetaData = PersistedClusterStateService.nodeMetaData(nodePaths); assertThat(nodeMetaData.nodeVersion(), equalTo(nodeVersion)); } public void testOverwritesIfTooOld() throws Exception { - final String nodeId = randomAlphaOfLength(10); final Version nodeVersion = NodeMetaDataTests.tooOldVersion(); - NodeMetaData.FORMAT.writeAndCleanup(new NodeMetaData(nodeId, nodeVersion), nodePaths); + PersistedClusterStateService.overrideVersion(nodeVersion, nodePaths); final MockTerminal mockTerminal = new MockTerminal(); mockTerminal.addTextInput(randomFrom("y", "Y")); - new OverrideNodeVersionCommand().processNodePaths(mockTerminal, nodePaths, environment); + new OverrideNodeVersionCommand().processNodePaths(mockTerminal, nodePaths, 0, noOptions, environment); assertThat(mockTerminal.getOutput(), allOf( containsString("too old"), containsString("data loss"), @@ -134,18 +146,16 @@ public void testOverwritesIfTooOld() throws Exception { containsString(OverrideNodeVersionCommand.SUCCESS_MESSAGE))); expectThrows(IllegalStateException.class, () -> mockTerminal.readText("")); - final NodeMetaData nodeMetaData = NodeMetaData.FORMAT.loadLatestState(logger, xContentRegistry(), nodePaths); - assertThat(nodeMetaData.nodeId(), equalTo(nodeId)); + final NodeMetaData nodeMetaData = PersistedClusterStateService.nodeMetaData(nodePaths); assertThat(nodeMetaData.nodeVersion(), equalTo(Version.CURRENT)); } public void testOverwritesIfTooNew() throws Exception { - final String nodeId = randomAlphaOfLength(10); final Version nodeVersion = NodeMetaDataTests.tooNewVersion(); - NodeMetaData.FORMAT.writeAndCleanup(new NodeMetaData(nodeId, nodeVersion), nodePaths); + PersistedClusterStateService.overrideVersion(nodeVersion, nodePaths); final MockTerminal mockTerminal = new MockTerminal(); mockTerminal.addTextInput(randomFrom("y", "Y")); - new OverrideNodeVersionCommand().processNodePaths(mockTerminal, nodePaths, environment); + new OverrideNodeVersionCommand().processNodePaths(mockTerminal, nodePaths, 0, noOptions, environment); assertThat(mockTerminal.getOutput(), allOf( containsString("data loss"), containsString("You should not use this tool"), @@ -154,59 +164,7 @@ public void testOverwritesIfTooNew() throws Exception { containsString(OverrideNodeVersionCommand.SUCCESS_MESSAGE))); expectThrows(IllegalStateException.class, () -> mockTerminal.readText("")); - final NodeMetaData nodeMetaData = NodeMetaData.FORMAT.loadLatestState(logger, xContentRegistry(), nodePaths); - assertThat(nodeMetaData.nodeId(), equalTo(nodeId)); + final NodeMetaData nodeMetaData = PersistedClusterStateService.nodeMetaData(nodePaths); assertThat(nodeMetaData.nodeVersion(), equalTo(Version.CURRENT)); } - - public void testLenientlyIgnoresExtraFields() throws Exception { - final String nodeId = randomAlphaOfLength(10); - final Version nodeVersion = NodeMetaDataTests.tooNewVersion(); - FutureNodeMetaData.FORMAT.writeAndCleanup(new FutureNodeMetaData(nodeId, nodeVersion, randomLong()), nodePaths); - assertThat(expectThrows(ElasticsearchException.class, - () -> NodeMetaData.FORMAT.loadLatestState(logger, xContentRegistry(), nodePaths)), - hasToString(containsString("unknown field [future_field]"))); - - final MockTerminal mockTerminal = new MockTerminal(); - mockTerminal.addTextInput(randomFrom("y", "Y")); - new OverrideNodeVersionCommand().processNodePaths(mockTerminal, nodePaths, environment); - assertThat(mockTerminal.getOutput(), allOf( - containsString("data loss"), - containsString("You should not use this tool"), - containsString(Version.CURRENT.toString()), - containsString(nodeVersion.toString()), - containsString(OverrideNodeVersionCommand.SUCCESS_MESSAGE))); - expectThrows(IllegalStateException.class, () -> mockTerminal.readText("")); - - final NodeMetaData nodeMetaData = NodeMetaData.FORMAT.loadLatestState(logger, xContentRegistry(), nodePaths); - assertThat(nodeMetaData.nodeId(), equalTo(nodeId)); - assertThat(nodeMetaData.nodeVersion(), equalTo(Version.CURRENT)); - } - - private static class FutureNodeMetaData { - private final String nodeId; - private final Version nodeVersion; - private final long futureValue; - - FutureNodeMetaData(String nodeId, Version nodeVersion, long futureValue) { - this.nodeId = nodeId; - this.nodeVersion = nodeVersion; - this.futureValue = futureValue; - } - - static final MetaDataStateFormat FORMAT - = new MetaDataStateFormat(NodeMetaData.FORMAT.getPrefix()) { - @Override - public void toXContent(XContentBuilder builder, FutureNodeMetaData state) throws IOException { - builder.field(NODE_ID_KEY, state.nodeId); - builder.field(NODE_VERSION_KEY, state.nodeVersion.id); - builder.field("future_field", state.futureValue); - } - - @Override - public FutureNodeMetaData fromXContent(XContentParser parser) { - throw new AssertionError("shouldn't be loading a FutureNodeMetaData"); - } - }; - } } diff --git a/server/src/test/java/org/elasticsearch/gateway/GatewayIndexStateIT.java b/server/src/test/java/org/elasticsearch/gateway/GatewayIndexStateIT.java index 6d505006984e0..51396e4e12175 100644 --- a/server/src/test/java/org/elasticsearch/gateway/GatewayIndexStateIT.java +++ b/server/src/test/java/org/elasticsearch/gateway/GatewayIndexStateIT.java @@ -30,10 +30,8 @@ import org.elasticsearch.client.Client; import org.elasticsearch.client.Requests; import org.elasticsearch.cluster.ClusterState; -import org.elasticsearch.cluster.coordination.CoordinationMetaData; import org.elasticsearch.cluster.metadata.IndexGraveyard; import org.elasticsearch.cluster.metadata.IndexMetaData; -import org.elasticsearch.cluster.metadata.Manifest; import org.elasticsearch.cluster.metadata.MappingMetaData; import org.elasticsearch.cluster.metadata.MetaData; import org.elasticsearch.cluster.routing.IndexRoutingTable; @@ -42,12 +40,10 @@ import org.elasticsearch.cluster.routing.ShardRoutingState; import org.elasticsearch.cluster.routing.UnassignedInfo; import org.elasticsearch.cluster.service.ClusterService; -import org.elasticsearch.common.CheckedConsumer; import org.elasticsearch.common.Priority; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.common.xcontent.XContentType; -import org.elasticsearch.core.internal.io.IOUtils; import org.elasticsearch.env.NodeEnvironment; import org.elasticsearch.index.mapper.MapperParsingException; import org.elasticsearch.indices.IndexClosedException; @@ -58,7 +54,6 @@ import org.elasticsearch.test.InternalTestCluster.RestartCallback; import java.io.IOException; -import java.nio.file.Files; import java.nio.file.Path; import java.util.List; import java.util.Map; @@ -379,14 +374,13 @@ public void testRecoverBrokenIndexMetadata() throws Exception { ClusterState state = client().admin().cluster().prepareState().get().getState(); final IndexMetaData metaData = state.getMetaData().index("test"); - final IndexMetaData brokenMeta = IndexMetaData.builder(metaData).settings(Settings.builder().put(metaData.getSettings()) + final IndexMetaData.Builder brokenMeta = IndexMetaData.builder(metaData).settings(Settings.builder().put(metaData.getSettings()) .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT.minimumIndexCompatibilityVersion().id) // this is invalid but should be archived .put("index.similarity.BM25.type", "classic") // this one is not validated ahead of time and breaks allocation - .put("index.analysis.filter.myCollator.type", "icu_collation") - ).build(); - writeBrokenMeta(metaStateService -> metaStateService.writeIndexAndUpdateManifest("broken metadata", brokenMeta)); + .put("index.analysis.filter.myCollator.type", "icu_collation")); + restartNodesOnBrokenClusterState(ClusterState.builder(state).metaData(MetaData.builder(state.getMetaData()).put(brokenMeta))); // check that the cluster does not keep reallocating shards assertBusy(() -> { @@ -451,9 +445,9 @@ public void testRecoverMissingAnalyzer() throws Exception { ClusterState state = client().admin().cluster().prepareState().get().getState(); final IndexMetaData metaData = state.getMetaData().index("test"); - final IndexMetaData brokenMeta = IndexMetaData.builder(metaData).settings(metaData.getSettings() - .filter((s) -> "index.analysis.analyzer.test.tokenizer".equals(s) == false)).build(); - writeBrokenMeta(metaStateService -> metaStateService.writeIndexAndUpdateManifest("broken metadata", brokenMeta)); + final IndexMetaData.Builder brokenMeta = IndexMetaData.builder(metaData).settings(metaData.getSettings() + .filter((s) -> "index.analysis.analyzer.test.tokenizer".equals(s) == false)); + restartNodesOnBrokenClusterState(ClusterState.builder(state).metaData(MetaData.builder(state.getMetaData()).put(brokenMeta))); // check that the cluster does not keep reallocating shards assertBusy(() -> { @@ -498,7 +492,7 @@ public void testArchiveBrokenClusterSettings() throws Exception { final MetaData brokenMeta = MetaData.builder(metaData).persistentSettings(Settings.builder() .put(metaData.persistentSettings()).put("this.is.unknown", true) .put(MetaData.SETTING_CLUSTER_MAX_SHARDS_PER_NODE.getKey(), "broken").build()).build(); - writeBrokenMeta(metaStateService -> metaStateService.writeGlobalStateAndUpdateManifest("broken metadata", brokenMeta)); + restartNodesOnBrokenClusterState(ClusterState.builder(state).metaData(brokenMeta)); ensureYellow("test"); // wait for state recovery state = client().admin().cluster().prepareState().get().getState(); @@ -516,6 +510,8 @@ public void testArchiveBrokenClusterSettings() throws Exception { assertHitCount(client().prepareSearch().setQuery(matchAllQuery()).get(), 1L); } + @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/48701") + // This test relates to loading a broken state that was written by a 6.x node, but for now we do not load state from old nodes. public void testHalfDeletedIndexImport() throws Exception { // It's possible for a 6.x node to add a tombstone for an index but not actually delete the index metadata from disk since that // deletion is slightly deferred and may race against the node being shut down; if you upgrade to 7.x when in this state then the @@ -530,36 +526,40 @@ public void testHalfDeletedIndexImport() throws Exception { final MetaData metaData = internalCluster().getInstance(ClusterService.class).state().metaData(); final Path[] paths = internalCluster().getInstance(NodeEnvironment.class).nodeDataPaths(); - writeBrokenMeta(metaStateService -> { - metaStateService.writeGlobalState("test", MetaData.builder(metaData) - // we remove the manifest file, resetting the term and making this look like an upgrade from 6.x, so must also reset the - // term in the coordination metadata - .coordinationMetaData(CoordinationMetaData.builder(metaData.coordinationMetaData()).term(0L).build()) - // add a tombstone but do not delete the index metadata from disk - .putCustom(IndexGraveyard.TYPE, IndexGraveyard.builder().addTombstone(metaData.index("test").getIndex()).build()).build()); - for (final Path path : paths) { - try (Stream stateFiles = Files.list(path.resolve(MetaDataStateFormat.STATE_DIR_NAME))) { - for (final Path manifestPath : stateFiles - .filter(p -> p.getFileName().toString().startsWith(Manifest.FORMAT.getPrefix())).collect(Collectors.toList())) { - IOUtils.rm(manifestPath); - } - } - } - }); +// writeBrokenMeta(metaStateService -> { +// metaStateService.writeGlobalState("test", MetaData.builder(metaData) +// // we remove the manifest file, resetting the term and making this look like an upgrade from 6.x, so must also reset the +// // term in the coordination metadata +// .coordinationMetaData(CoordinationMetaData.builder(metaData.coordinationMetaData()).term(0L).build()) +// // add a tombstone but do not delete the index metadata from disk +// .putCustom(IndexGraveyard.TYPE, IndexGraveyard.builder().addTombstone(metaData.index("test").getIndex()).build()).build()); +// for (final Path path : paths) { +// try (Stream stateFiles = Files.list(path.resolve(MetaDataStateFormat.STATE_DIR_NAME))) { +// for (final Path manifestPath : stateFiles +// .filter(p -> p.getFileName().toString().startsWith(Manifest.FORMAT.getPrefix())).collect(Collectors.toList())) { +// IOUtils.rm(manifestPath); +// } +// } +// } +// }); ensureGreen(); assertBusy(() -> assertThat(internalCluster().getInstance(NodeEnvironment.class).availableIndexFolders(), empty())); } - private void writeBrokenMeta(CheckedConsumer writer) throws Exception { - Map metaStateServices = Stream.of(internalCluster().getNodeNames()) - .collect(Collectors.toMap(Function.identity(), nodeName -> internalCluster().getInstance(MetaStateService.class, nodeName))); + private void restartNodesOnBrokenClusterState(ClusterState.Builder clusterStateBuilder) throws Exception { + Map lucenePersistedStateFactories = Stream.of(internalCluster().getNodeNames()) + .collect(Collectors.toMap(Function.identity(), + nodeName -> internalCluster().getInstance(PersistedClusterStateService.class, nodeName))); + final ClusterState clusterState = clusterStateBuilder.build(); internalCluster().fullRestart(new RestartCallback(){ @Override public Settings onNodeStopped(String nodeName) throws Exception { - final MetaStateService metaStateService = metaStateServices.get(nodeName); - writer.accept(metaStateService); + final PersistedClusterStateService lucenePersistedStateFactory = lucenePersistedStateFactories.get(nodeName); + try (PersistedClusterStateService.Writer writer = lucenePersistedStateFactory.createWriter()) { + writer.writeFullStateAndCommit(clusterState.term(), clusterState); + } return super.onNodeStopped(nodeName); } }); diff --git a/server/src/test/java/org/elasticsearch/gateway/GatewayMetaStatePersistedStateTests.java b/server/src/test/java/org/elasticsearch/gateway/GatewayMetaStatePersistedStateTests.java index e723d08d7352c..4b39abba67b3c 100644 --- a/server/src/test/java/org/elasticsearch/gateway/GatewayMetaStatePersistedStateTests.java +++ b/server/src/test/java/org/elasticsearch/gateway/GatewayMetaStatePersistedStateTests.java @@ -19,29 +19,50 @@ package org.elasticsearch.gateway; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.MockDirectoryWrapper; +import org.elasticsearch.ExceptionsHelper; import org.elasticsearch.Version; import org.elasticsearch.cluster.ClusterName; import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.cluster.coordination.CoordinationMetaData; import org.elasticsearch.cluster.coordination.CoordinationMetaData.VotingConfigExclusion; import org.elasticsearch.cluster.coordination.CoordinationState; -import org.elasticsearch.cluster.coordination.InMemoryPersistedState; import org.elasticsearch.cluster.metadata.IndexMetaData; import org.elasticsearch.cluster.metadata.Manifest; import org.elasticsearch.cluster.metadata.MetaData; import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.node.DiscoveryNodeRole; import org.elasticsearch.cluster.node.DiscoveryNodes; +import org.elasticsearch.cluster.service.ClusterService; +import org.elasticsearch.common.settings.ClusterSettings; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.util.BigArrays; import org.elasticsearch.common.util.set.Sets; +import org.elasticsearch.core.internal.io.IOUtils; +import org.elasticsearch.env.Environment; import org.elasticsearch.env.NodeEnvironment; +import org.elasticsearch.env.TestEnvironment; +import org.elasticsearch.node.Node; import org.elasticsearch.test.ESTestCase; - +import org.elasticsearch.threadpool.TestThreadPool; +import org.elasticsearch.threadpool.ThreadPool; +import org.elasticsearch.transport.TransportService; + +import java.io.IOError; +import java.io.IOException; +import java.nio.file.Path; +import java.util.ArrayList; import java.util.Collections; +import java.util.List; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicReference; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.instanceOf; import static org.hamcrest.Matchers.not; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; public class GatewayMetaStatePersistedStateTests extends ESTestCase { private NodeEnvironment nodeEnvironment; @@ -53,7 +74,7 @@ public class GatewayMetaStatePersistedStateTests extends ESTestCase { public void setUp() throws Exception { nodeEnvironment = newNodeEnvironment(); localNode = new DiscoveryNode("node1", buildNewFakeTransportAddress(), Collections.emptyMap(), - Sets.newHashSet(DiscoveryNodeRole.MASTER_ROLE), Version.CURRENT); + Sets.newHashSet(DiscoveryNodeRole.MASTER_ROLE), Version.CURRENT); clusterName = new ClusterName(randomAlphaOfLength(10)); settings = Settings.builder().put(ClusterName.CLUSTER_NAME_SETTING.getKey(), clusterName.value()).build(); super.setUp(); @@ -69,57 +90,68 @@ private CoordinationState.PersistedState newGatewayPersistedState() { final MockGatewayMetaState gateway = new MockGatewayMetaState(localNode); gateway.start(settings, nodeEnvironment, xContentRegistry()); final CoordinationState.PersistedState persistedState = gateway.getPersistedState(); - assertThat(persistedState, not(instanceOf(InMemoryPersistedState.class))); + assertThat(persistedState, instanceOf(GatewayMetaState.LucenePersistedState.class)); return persistedState; } - private CoordinationState.PersistedState maybeNew(CoordinationState.PersistedState persistedState) { + private CoordinationState.PersistedState maybeNew(CoordinationState.PersistedState persistedState) throws IOException { if (randomBoolean()) { + persistedState.close(); return newGatewayPersistedState(); } return persistedState; } - public void testInitialState() { - CoordinationState.PersistedState gateway = newGatewayPersistedState(); - ClusterState state = gateway.getLastAcceptedState(); - assertThat(state.getClusterName(), equalTo(clusterName)); - assertTrue(MetaData.isGlobalStateEquals(state.metaData(), MetaData.EMPTY_META_DATA)); - assertThat(state.getVersion(), equalTo(Manifest.empty().getClusterStateVersion())); - assertThat(state.getNodes().getLocalNode(), equalTo(localNode)); - - long currentTerm = gateway.getCurrentTerm(); - assertThat(currentTerm, equalTo(Manifest.empty().getCurrentTerm())); + public void testInitialState() throws IOException { + CoordinationState.PersistedState gateway = null; + try { + gateway = newGatewayPersistedState(); + ClusterState state = gateway.getLastAcceptedState(); + assertThat(state.getClusterName(), equalTo(clusterName)); + assertTrue(MetaData.isGlobalStateEquals(state.metaData(), MetaData.EMPTY_META_DATA)); + assertThat(state.getVersion(), equalTo(Manifest.empty().getClusterStateVersion())); + assertThat(state.getNodes().getLocalNode(), equalTo(localNode)); + + long currentTerm = gateway.getCurrentTerm(); + assertThat(currentTerm, equalTo(Manifest.empty().getCurrentTerm())); + } finally { + IOUtils.close(gateway); + } } - public void testSetCurrentTerm() { - CoordinationState.PersistedState gateway = newGatewayPersistedState(); - - for (int i = 0; i < randomIntBetween(1, 5); i++) { - final long currentTerm = randomNonNegativeLong(); - gateway.setCurrentTerm(currentTerm); - gateway = maybeNew(gateway); - assertThat(gateway.getCurrentTerm(), equalTo(currentTerm)); + public void testSetCurrentTerm() throws IOException { + CoordinationState.PersistedState gateway = null; + try { + gateway = newGatewayPersistedState(); + + for (int i = 0; i < randomIntBetween(1, 5); i++) { + final long currentTerm = randomNonNegativeLong(); + gateway.setCurrentTerm(currentTerm); + gateway = maybeNew(gateway); + assertThat(gateway.getCurrentTerm(), equalTo(currentTerm)); + } + } finally { + IOUtils.close(gateway); } } private ClusterState createClusterState(long version, MetaData metaData) { return ClusterState.builder(clusterName). - nodes(DiscoveryNodes.builder().add(localNode).localNodeId(localNode.getId()).build()). - version(version). - metaData(metaData). - build(); + nodes(DiscoveryNodes.builder().add(localNode).localNodeId(localNode.getId()).build()). + version(version). + metaData(metaData). + build(); } private CoordinationMetaData createCoordinationMetaData(long term) { CoordinationMetaData.Builder builder = CoordinationMetaData.builder(); builder.term(term); builder.lastAcceptedConfiguration( - new CoordinationMetaData.VotingConfiguration( - Sets.newHashSet(generateRandomStringArray(10, 10, false)))); + new CoordinationMetaData.VotingConfiguration( + Sets.newHashSet(generateRandomStringArray(10, 10, false)))); builder.lastCommittedConfiguration( - new CoordinationMetaData.VotingConfiguration( - Sets.newHashSet(generateRandomStringArray(10, 10, false)))); + new CoordinationMetaData.VotingConfiguration( + Sets.newHashSet(generateRandomStringArray(10, 10, false)))); for (int i = 0; i < randomIntBetween(0, 5); i++) { builder.addVotingConfigExclusion(new VotingConfigExclusion(randomAlphaOfLength(10), randomAlphaOfLength(10))); } @@ -129,12 +161,12 @@ private CoordinationMetaData createCoordinationMetaData(long term) { private IndexMetaData createIndexMetaData(String indexName, int numberOfShards, long version) { return IndexMetaData.builder(indexName).settings( - Settings.builder() - .put(IndexMetaData.SETTING_INDEX_UUID, indexName) - .put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, numberOfShards) - .put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 0) - .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT) - .build() + Settings.builder() + .put(IndexMetaData.SETTING_INDEX_UUID, indexName) + .put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, numberOfShards) + .put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 0) + .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT) + .build() ).version(version).build(); } @@ -146,70 +178,171 @@ private void assertClusterStateEqual(ClusterState expected, ClusterState actual) } } - public void testSetLastAcceptedState() { - CoordinationState.PersistedState gateway = newGatewayPersistedState(); - final long term = randomNonNegativeLong(); - - for (int i = 0; i < randomIntBetween(1, 5); i++) { - final long version = randomNonNegativeLong(); - final String indexName = randomAlphaOfLength(10); - final IndexMetaData indexMetaData = createIndexMetaData(indexName, randomIntBetween(1,5), randomNonNegativeLong()); - final MetaData metaData = MetaData.builder(). + public void testSetLastAcceptedState() throws IOException { + CoordinationState.PersistedState gateway = null; + try { + gateway = newGatewayPersistedState(); + final long term = randomNonNegativeLong(); + + for (int i = 0; i < randomIntBetween(1, 5); i++) { + final long version = randomNonNegativeLong(); + final String indexName = randomAlphaOfLength(10); + final IndexMetaData indexMetaData = createIndexMetaData(indexName, randomIntBetween(1, 5), randomNonNegativeLong()); + final MetaData metaData = MetaData.builder(). persistentSettings(Settings.builder().put(randomAlphaOfLength(10), randomAlphaOfLength(10)).build()). coordinationMetaData(createCoordinationMetaData(term)). put(indexMetaData, false). build(); - ClusterState state = createClusterState(version, metaData); + ClusterState state = createClusterState(version, metaData); - gateway.setLastAcceptedState(state); - gateway = maybeNew(gateway); + gateway.setLastAcceptedState(state); + gateway = maybeNew(gateway); - ClusterState lastAcceptedState = gateway.getLastAcceptedState(); - assertClusterStateEqual(state, lastAcceptedState); + ClusterState lastAcceptedState = gateway.getLastAcceptedState(); + assertClusterStateEqual(state, lastAcceptedState); + } + } finally { + IOUtils.close(gateway); } } - public void testSetLastAcceptedStateTermChanged() { - CoordinationState.PersistedState gateway = newGatewayPersistedState(); + public void testSetLastAcceptedStateTermChanged() throws IOException { + CoordinationState.PersistedState gateway = null; + try { + gateway = newGatewayPersistedState(); - final String indexName = randomAlphaOfLength(10); - final int numberOfShards = randomIntBetween(1, 5); - final long version = randomNonNegativeLong(); - final long term = randomNonNegativeLong(); - final IndexMetaData indexMetaData = createIndexMetaData(indexName, numberOfShards, version); - final ClusterState state = createClusterState(randomNonNegativeLong(), + final String indexName = randomAlphaOfLength(10); + final int numberOfShards = randomIntBetween(1, 5); + final long version = randomNonNegativeLong(); + final long term = randomValueOtherThan(Long.MAX_VALUE, ESTestCase::randomNonNegativeLong); + final IndexMetaData indexMetaData = createIndexMetaData(indexName, numberOfShards, version); + final ClusterState state = createClusterState(randomNonNegativeLong(), MetaData.builder().coordinationMetaData(createCoordinationMetaData(term)).put(indexMetaData, false).build()); - gateway.setLastAcceptedState(state); + gateway.setLastAcceptedState(state); - gateway = maybeNew(gateway); - final long newTerm = randomValueOtherThan(term, ESTestCase::randomNonNegativeLong); - final int newNumberOfShards = randomValueOtherThan(numberOfShards, () -> randomIntBetween(1,5)); - final IndexMetaData newIndexMetaData = createIndexMetaData(indexName, newNumberOfShards, version); - final ClusterState newClusterState = createClusterState(randomNonNegativeLong(), + gateway = maybeNew(gateway); + final long newTerm = randomLongBetween(term + 1, Long.MAX_VALUE); + final int newNumberOfShards = randomValueOtherThan(numberOfShards, () -> randomIntBetween(1, 5)); + final IndexMetaData newIndexMetaData = createIndexMetaData(indexName, newNumberOfShards, version); + final ClusterState newClusterState = createClusterState(randomNonNegativeLong(), MetaData.builder().coordinationMetaData(createCoordinationMetaData(newTerm)).put(newIndexMetaData, false).build()); - gateway.setLastAcceptedState(newClusterState); + gateway.setLastAcceptedState(newClusterState); - gateway = maybeNew(gateway); - assertThat(gateway.getLastAcceptedState().metaData().index(indexName), equalTo(newIndexMetaData)); + gateway = maybeNew(gateway); + assertThat(gateway.getLastAcceptedState().metaData().index(indexName), equalTo(newIndexMetaData)); + } finally { + IOUtils.close(gateway); + } } - public void testCurrentTermAndTermAreDifferent() { - CoordinationState.PersistedState gateway = newGatewayPersistedState(); + public void testCurrentTermAndTermAreDifferent() throws IOException { + CoordinationState.PersistedState gateway = null; + try { + gateway = newGatewayPersistedState(); - long currentTerm = randomNonNegativeLong(); - long term = randomValueOtherThan(currentTerm, ESTestCase::randomNonNegativeLong); + long currentTerm = randomNonNegativeLong(); + long term = randomValueOtherThan(currentTerm, ESTestCase::randomNonNegativeLong); - gateway.setCurrentTerm(currentTerm); - gateway.setLastAcceptedState(createClusterState(randomNonNegativeLong(), + gateway.setCurrentTerm(currentTerm); + gateway.setLastAcceptedState(createClusterState(randomNonNegativeLong(), MetaData.builder().coordinationMetaData(CoordinationMetaData.builder().term(term).build()).build())); - gateway = maybeNew(gateway); - assertThat(gateway.getCurrentTerm(), equalTo(currentTerm)); - assertThat(gateway.getLastAcceptedState().coordinationMetaData().term(), equalTo(term)); + gateway = maybeNew(gateway); + assertThat(gateway.getCurrentTerm(), equalTo(currentTerm)); + assertThat(gateway.getLastAcceptedState().coordinationMetaData().term(), equalTo(term)); + } finally { + IOUtils.close(gateway); + } + } + + public void testMarkAcceptedConfigAsCommitted() throws IOException { + CoordinationState.PersistedState gateway = null; + try { + gateway = newGatewayPersistedState(); + + // generate random coordinationMetaData with different lastAcceptedConfiguration and lastCommittedConfiguration + CoordinationMetaData coordinationMetaData; + do { + coordinationMetaData = createCoordinationMetaData(randomNonNegativeLong()); + } while (coordinationMetaData.getLastAcceptedConfiguration().equals(coordinationMetaData.getLastCommittedConfiguration())); + + ClusterState state = createClusterState(randomNonNegativeLong(), + MetaData.builder().coordinationMetaData(coordinationMetaData) + .clusterUUID(randomAlphaOfLength(10)).build()); + gateway.setLastAcceptedState(state); + + gateway = maybeNew(gateway); + assertThat(gateway.getLastAcceptedState().getLastAcceptedConfiguration(), + not(equalTo(gateway.getLastAcceptedState().getLastCommittedConfiguration()))); + gateway.markLastAcceptedStateAsCommitted(); + + CoordinationMetaData expectedCoordinationMetaData = CoordinationMetaData.builder(coordinationMetaData) + .lastCommittedConfiguration(coordinationMetaData.getLastAcceptedConfiguration()).build(); + ClusterState expectedClusterState = + ClusterState.builder(state).metaData(MetaData.builder().coordinationMetaData(expectedCoordinationMetaData) + .clusterUUID(state.metaData().clusterUUID()).clusterUUIDCommitted(true).build()).build(); + + gateway = maybeNew(gateway); + assertClusterStateEqual(expectedClusterState, gateway.getLastAcceptedState()); + gateway.markLastAcceptedStateAsCommitted(); + + gateway = maybeNew(gateway); + assertClusterStateEqual(expectedClusterState, gateway.getLastAcceptedState()); + } finally { + IOUtils.close(gateway); + } } - public void testMarkAcceptedConfigAsCommitted() { - CoordinationState.PersistedState gateway = newGatewayPersistedState(); + public void testStatePersistedOnLoad() throws IOException { + // open LucenePersistedState to make sure that cluster state is written out to each data path + final PersistedClusterStateService persistedClusterStateService = + new PersistedClusterStateService(nodeEnvironment, xContentRegistry(), BigArrays.NON_RECYCLING_INSTANCE); + final ClusterState state = createClusterState(randomNonNegativeLong(), + MetaData.builder().clusterUUID(randomAlphaOfLength(10)).build()); + try (GatewayMetaState.LucenePersistedState ignored = new GatewayMetaState.LucenePersistedState( + persistedClusterStateService, 42L, state)) { + + } + + nodeEnvironment.close(); + + // verify that the freshest state was rewritten to each data path + for (Path path : nodeEnvironment.nodeDataPaths()) { + Settings settings = Settings.builder() + .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toAbsolutePath()) + .put(Environment.PATH_DATA_SETTING.getKey(), path.getParent().getParent().toString()).build(); + try (NodeEnvironment nodeEnvironment = new NodeEnvironment(settings, TestEnvironment.newEnvironment(settings))) { + final PersistedClusterStateService newPersistedClusterStateService = + new PersistedClusterStateService(nodeEnvironment, xContentRegistry(), BigArrays.NON_RECYCLING_INSTANCE); + final PersistedClusterStateService.OnDiskState onDiskState = newPersistedClusterStateService.loadBestOnDiskState(); + assertFalse(onDiskState.empty()); + assertThat(onDiskState.currentTerm, equalTo(42L)); + assertClusterStateEqual(state, + ClusterState.builder(ClusterName.DEFAULT) + .version(onDiskState.lastAcceptedVersion) + .metaData(onDiskState.metaData).build()); + } + } + } + + public void testDataOnlyNodePersistence() throws Exception { + DiscoveryNode localNode = new DiscoveryNode("node1", buildNewFakeTransportAddress(), Collections.emptyMap(), + Sets.newHashSet(DiscoveryNodeRole.DATA_ROLE), Version.CURRENT); + Settings settings = Settings.builder().put(ClusterName.CLUSTER_NAME_SETTING.getKey(), clusterName.value()).put( + Node.NODE_MASTER_SETTING.getKey(), false).put(Node.NODE_NAME_SETTING.getKey(), "test").build(); + final MockGatewayMetaState gateway = new MockGatewayMetaState(localNode); + final TransportService transportService = mock(TransportService.class); + TestThreadPool threadPool = new TestThreadPool("testMarkAcceptedConfigAsCommittedOnDataOnlyNode"); + when(transportService.getThreadPool()).thenReturn(threadPool); + ClusterService clusterService = mock(ClusterService.class); + when(clusterService.getClusterSettings()).thenReturn( + new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS)); + final PersistedClusterStateService persistedClusterStateService = + new PersistedClusterStateService(nodeEnvironment, xContentRegistry(), BigArrays.NON_RECYCLING_INSTANCE); + gateway.start(settings, transportService, clusterService, + new MetaStateService(nodeEnvironment, xContentRegistry()), null, null, persistedClusterStateService); + final CoordinationState.PersistedState persistedState = gateway.getPersistedState(); + assertThat(persistedState, instanceOf(GatewayMetaState.AsyncLucenePersistedState.class)); //generate random coordinationMetaData with different lastAcceptedConfiguration and lastCommittedConfiguration CoordinationMetaData coordinationMetaData; @@ -218,26 +351,161 @@ public void testMarkAcceptedConfigAsCommitted() { } while (coordinationMetaData.getLastAcceptedConfiguration().equals(coordinationMetaData.getLastCommittedConfiguration())); ClusterState state = createClusterState(randomNonNegativeLong(), - MetaData.builder().coordinationMetaData(coordinationMetaData) - .clusterUUID(randomAlphaOfLength(10)).build()); - gateway.setLastAcceptedState(state); - - gateway = maybeNew(gateway); - assertThat(gateway.getLastAcceptedState().getLastAcceptedConfiguration(), - not(equalTo(gateway.getLastAcceptedState().getLastCommittedConfiguration()))); - gateway.markLastAcceptedStateAsCommitted(); + MetaData.builder().coordinationMetaData(coordinationMetaData) + .clusterUUID(randomAlphaOfLength(10)).build()); + persistedState.setLastAcceptedState(state); + assertBusy(() -> assertTrue(gateway.allPendingAsyncStatesWritten())); + + assertThat(persistedState.getLastAcceptedState().getLastAcceptedConfiguration(), + not(equalTo(persistedState.getLastAcceptedState().getLastCommittedConfiguration()))); + CoordinationMetaData persistedCoordinationMetaData = + persistedClusterStateService.loadBestOnDiskState().metaData.coordinationMetaData(); + assertThat(persistedCoordinationMetaData.getLastAcceptedConfiguration(), + equalTo(GatewayMetaState.AsyncLucenePersistedState.staleStateConfiguration)); + assertThat(persistedCoordinationMetaData.getLastCommittedConfiguration(), + equalTo(GatewayMetaState.AsyncLucenePersistedState.staleStateConfiguration)); + + persistedState.markLastAcceptedStateAsCommitted(); + assertBusy(() -> assertTrue(gateway.allPendingAsyncStatesWritten())); CoordinationMetaData expectedCoordinationMetaData = CoordinationMetaData.builder(coordinationMetaData) - .lastCommittedConfiguration(coordinationMetaData.getLastAcceptedConfiguration()).build(); + .lastCommittedConfiguration(coordinationMetaData.getLastAcceptedConfiguration()).build(); ClusterState expectedClusterState = - ClusterState.builder(state).metaData(MetaData.builder().coordinationMetaData(expectedCoordinationMetaData) - .clusterUUID(state.metaData().clusterUUID()).clusterUUIDCommitted(true).build()).build(); + ClusterState.builder(state).metaData(MetaData.builder().coordinationMetaData(expectedCoordinationMetaData) + .clusterUUID(state.metaData().clusterUUID()).clusterUUIDCommitted(true).build()).build(); + + assertClusterStateEqual(expectedClusterState, persistedState.getLastAcceptedState()); + persistedCoordinationMetaData = persistedClusterStateService.loadBestOnDiskState().metaData.coordinationMetaData(); + assertThat(persistedCoordinationMetaData.getLastAcceptedConfiguration(), + equalTo(GatewayMetaState.AsyncLucenePersistedState.staleStateConfiguration)); + assertThat(persistedCoordinationMetaData.getLastCommittedConfiguration(), + equalTo(GatewayMetaState.AsyncLucenePersistedState.staleStateConfiguration)); + assertTrue(persistedClusterStateService.loadBestOnDiskState().metaData.clusterUUIDCommitted()); + + // generate a series of updates and check if batching works + final String indexName = randomAlphaOfLength(10); + long currentTerm = state.term(); + for (int i = 0; i < 1000; i++) { + if (rarely()) { + // bump term + currentTerm = currentTerm + (rarely() ? randomIntBetween(1, 5) : 0L); + persistedState.setCurrentTerm(currentTerm); + } else { + // update cluster state + final int numberOfShards = randomIntBetween(1, 5); + final long term = Math.min(state.term() + (rarely() ? randomIntBetween(1, 5) : 0L), currentTerm); + final IndexMetaData indexMetaData = createIndexMetaData(indexName, numberOfShards, i); + state = createClusterState(state.version() + 1, + MetaData.builder().coordinationMetaData(createCoordinationMetaData(term)).put(indexMetaData, false).build()); + persistedState.setLastAcceptedState(state); + } + } + assertEquals(currentTerm, persistedState.getCurrentTerm()); + assertClusterStateEqual(state, persistedState.getLastAcceptedState()); + assertBusy(() -> assertTrue(gateway.allPendingAsyncStatesWritten())); + + gateway.close(); + + try (CoordinationState.PersistedState reloadedPersistedState = newGatewayPersistedState()) { + assertEquals(currentTerm, reloadedPersistedState.getCurrentTerm()); + assertClusterStateEqual(GatewayMetaState.AsyncLucenePersistedState.resetVotingConfiguration(state), + reloadedPersistedState.getLastAcceptedState()); + assertNotNull(reloadedPersistedState.getLastAcceptedState().metaData().index(indexName)); + } + + ThreadPool.terminate(threadPool, 10, TimeUnit.SECONDS); + } - gateway = maybeNew(gateway); - assertClusterStateEqual(expectedClusterState, gateway.getLastAcceptedState()); - gateway.markLastAcceptedStateAsCommitted(); + public void testStatePersistenceWithIOIssues() throws IOException { + final AtomicReference ioExceptionRate = new AtomicReference<>(0.01d); + final List list = new ArrayList<>(); + final PersistedClusterStateService persistedClusterStateService = + new PersistedClusterStateService(nodeEnvironment, xContentRegistry(), BigArrays.NON_RECYCLING_INSTANCE) { + @Override + Directory createDirectory(Path path) { + final MockDirectoryWrapper wrapper = newMockFSDirectory(path); + wrapper.setAllowRandomFileNotFoundException(randomBoolean()); + wrapper.setRandomIOExceptionRate(ioExceptionRate.get()); + wrapper.setRandomIOExceptionRateOnOpen(ioExceptionRate.get()); + list.add(wrapper); + return wrapper; + } + }; + ClusterState state = createClusterState(randomNonNegativeLong(), + MetaData.builder().clusterUUID(randomAlphaOfLength(10)).build()); + long currentTerm = 42L; + try (GatewayMetaState.LucenePersistedState persistedState = new GatewayMetaState.LucenePersistedState( + persistedClusterStateService, currentTerm, state)) { + + try { + if (randomBoolean()) { + final ClusterState newState = createClusterState(randomNonNegativeLong(), + MetaData.builder().clusterUUID(randomAlphaOfLength(10)).build()); + persistedState.setLastAcceptedState(newState); + state = newState; + } else { + final long newTerm = currentTerm + 1; + persistedState.setCurrentTerm(newTerm); + currentTerm = newTerm; + } + } catch (IOError | Exception e) { + assertNotNull(ExceptionsHelper.unwrap(e, IOException.class)); + } + + ioExceptionRate.set(0.0d); + for (MockDirectoryWrapper wrapper : list) { + wrapper.setRandomIOExceptionRate(ioExceptionRate.get()); + wrapper.setRandomIOExceptionRateOnOpen(ioExceptionRate.get()); + } + + for (int i = 0; i < randomIntBetween(1, 5); i++) { + if (randomBoolean()) { + final long version = randomNonNegativeLong(); + final String indexName = randomAlphaOfLength(10); + final IndexMetaData indexMetaData = createIndexMetaData(indexName, randomIntBetween(1, 5), randomNonNegativeLong()); + final MetaData metaData = MetaData.builder(). + persistentSettings(Settings.builder().put(randomAlphaOfLength(10), randomAlphaOfLength(10)).build()). + coordinationMetaData(createCoordinationMetaData(1L)). + put(indexMetaData, false). + build(); + state = createClusterState(version, metaData); + persistedState.setLastAcceptedState(state); + } else { + currentTerm += 1; + persistedState.setCurrentTerm(currentTerm); + } + } + + assertEquals(state, persistedState.getLastAcceptedState()); + assertEquals(currentTerm, persistedState.getCurrentTerm()); + + } catch (IOError | Exception e) { + if (ioExceptionRate.get() == 0.0d) { + throw e; + } + assertNotNull(ExceptionsHelper.unwrap(e, IOException.class)); + return; + } - gateway = maybeNew(gateway); - assertClusterStateEqual(expectedClusterState, gateway.getLastAcceptedState()); + nodeEnvironment.close(); + + // verify that the freshest state was rewritten to each data path + for (Path path : nodeEnvironment.nodeDataPaths()) { + Settings settings = Settings.builder() + .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toAbsolutePath()) + .put(Environment.PATH_DATA_SETTING.getKey(), path.getParent().getParent().toString()).build(); + try (NodeEnvironment nodeEnvironment = new NodeEnvironment(settings, TestEnvironment.newEnvironment(settings))) { + final PersistedClusterStateService newPersistedClusterStateService = + new PersistedClusterStateService(nodeEnvironment, xContentRegistry(), BigArrays.NON_RECYCLING_INSTANCE); + final PersistedClusterStateService.OnDiskState onDiskState = newPersistedClusterStateService.loadBestOnDiskState(); + assertFalse(onDiskState.empty()); + assertThat(onDiskState.currentTerm, equalTo(currentTerm)); + assertClusterStateEqual(state, + ClusterState.builder(ClusterName.DEFAULT) + .version(onDiskState.lastAcceptedVersion) + .metaData(onDiskState.metaData).build()); + } + } } + } diff --git a/server/src/test/java/org/elasticsearch/gateway/IncrementalClusterStateWriterTests.java b/server/src/test/java/org/elasticsearch/gateway/IncrementalClusterStateWriterTests.java index 65432466c61a1..86fe94587093a 100644 --- a/server/src/test/java/org/elasticsearch/gateway/IncrementalClusterStateWriterTests.java +++ b/server/src/test/java/org/elasticsearch/gateway/IncrementalClusterStateWriterTests.java @@ -173,7 +173,7 @@ private IndexMetaData createIndexMetaData(String name) { public void testGetRelevantIndicesWithUnassignedShardsOnMasterEligibleNode() { IndexMetaData indexMetaData = createIndexMetaData("test"); Set indices = IncrementalClusterStateWriter.getRelevantIndices(clusterStateWithUnassignedIndex(indexMetaData, true)); - assertThat(indices.size(), equalTo(1)); + assertThat(indices.size(), equalTo(0)); } public void testGetRelevantIndicesWithUnassignedShardsOnDataOnlyNode() { diff --git a/server/src/test/java/org/elasticsearch/gateway/MetaDataWriteDataNodesIT.java b/server/src/test/java/org/elasticsearch/gateway/MetaDataWriteDataNodesIT.java index 93bd3a44de919..6d318a77a50f6 100644 --- a/server/src/test/java/org/elasticsearch/gateway/MetaDataWriteDataNodesIT.java +++ b/server/src/test/java/org/elasticsearch/gateway/MetaDataWriteDataNodesIT.java @@ -21,10 +21,11 @@ import org.elasticsearch.action.admin.cluster.state.ClusterStateResponse; import org.elasticsearch.action.admin.indices.mapping.get.GetMappingsResponse; +import org.elasticsearch.cluster.coordination.Coordinator; import org.elasticsearch.cluster.metadata.IndexMetaData; -import org.elasticsearch.cluster.metadata.MetaData; import org.elasticsearch.common.collect.ImmutableOpenMap; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.discovery.Discovery; import org.elasticsearch.env.NodeEnvironment; import org.elasticsearch.index.Index; import org.elasticsearch.test.ESIntegTestCase; @@ -55,8 +56,8 @@ public void testMetaWrittenAlsoOnDataNode() throws Exception { assertIndexInMetaState(masterNode, "test"); } - public void testMetaIsRemovedIfAllShardsFromIndexRemoved() throws Exception { - // this test checks that the index state is removed from a data only node once all shards have been allocated away from it + public void testIndexFilesAreRemovedIfAllShardsFromIndexRemoved() throws Exception { + // this test checks that the index data is removed from a data only node once all shards have been allocated away from it String masterNode = internalCluster().startMasterOnlyNode(Settings.EMPTY); List nodeNames= internalCluster().startDataOnlyNodes(2); String node1 = nodeNames.get(0); @@ -69,8 +70,10 @@ public void testMetaIsRemovedIfAllShardsFromIndexRemoved() throws Exception { ensureGreen(); assertIndexInMetaState(node1, index); Index resolveIndex = resolveIndex(index); + assertIndexDirectoryExists(node1, resolveIndex); assertIndexDirectoryDeleted(node2, resolveIndex); assertIndexInMetaState(masterNode, index); + assertIndexDirectoryDeleted(masterNode, resolveIndex); logger.debug("relocating index..."); client().admin().indices().prepareUpdateSettings(index).setSettings(Settings.builder() @@ -79,7 +82,13 @@ public void testMetaIsRemovedIfAllShardsFromIndexRemoved() throws Exception { ensureGreen(); assertIndexDirectoryDeleted(node1, resolveIndex); assertIndexInMetaState(node2, index); + assertIndexDirectoryExists(node2, resolveIndex); assertIndexInMetaState(masterNode, index); + assertIndexDirectoryDeleted(masterNode, resolveIndex); + + client().admin().indices().prepareDelete(index).get(); + assertIndexDirectoryDeleted(node1, resolveIndex); + assertIndexDirectoryDeleted(node2, resolveIndex); } @SuppressWarnings("unchecked") @@ -156,17 +165,19 @@ public void testMetaWrittenWhenIndexIsClosedAndMetaUpdated() throws Exception { } protected void assertIndexDirectoryDeleted(final String nodeName, final Index index) throws Exception { - assertBusy(() -> { - logger.info("checking if index directory exists..."); - assertFalse("Expecting index directory of " + index + " to be deleted from node " + nodeName, - indexDirectoryExists(nodeName, index)); - } + assertBusy(() -> assertFalse("Expecting index directory of " + index + " to be deleted from node " + nodeName, + indexDirectoryExists(nodeName, index)) + ); + } + + protected void assertIndexDirectoryExists(final String nodeName, final Index index) throws Exception { + assertBusy(() -> assertTrue("Expecting index directory of " + index + " to exist on node " + nodeName, + indexDirectoryExists(nodeName, index)) ); } protected void assertIndexInMetaState(final String nodeName, final String indexName) throws Exception { assertBusy(() -> { - logger.info("checking if meta state exists..."); try { assertTrue("Expecting meta state of index " + indexName + " to be on node " + nodeName, getIndicesMetaDataOnNode(nodeName).containsKey(indexName)); @@ -190,8 +201,7 @@ private boolean indexDirectoryExists(String nodeName, Index index) { } private ImmutableOpenMap getIndicesMetaDataOnNode(String nodeName) { - GatewayMetaState nodeMetaState = ((InternalTestCluster) cluster()).getInstance(GatewayMetaState.class, nodeName); - MetaData nodeMetaData = nodeMetaState.getMetaData(); - return nodeMetaData.getIndices(); + final Coordinator coordinator = (Coordinator) internalCluster().getInstance(Discovery.class, nodeName); + return coordinator.getApplierState().getMetaData().getIndices(); } } diff --git a/server/src/test/java/org/elasticsearch/gateway/MetaStateServiceTests.java b/server/src/test/java/org/elasticsearch/gateway/MetaStateServiceTests.java index 2f34cc4300d2d..97b29327b9d3b 100644 --- a/server/src/test/java/org/elasticsearch/gateway/MetaStateServiceTests.java +++ b/server/src/test/java/org/elasticsearch/gateway/MetaStateServiceTests.java @@ -155,7 +155,7 @@ public void testLoadFullStateMissingGlobalMetaData() throws IOException { assertThat(loadedMetaData.index("test1"), equalTo(index)); } - public void testLoadFullStateAndUpdate() throws IOException { + public void testLoadFullStateAndUpdateAndClean() throws IOException { IndexMetaData index = indexMetaData("test1"); MetaData metaData = MetaData.builder() .persistentSettings(Settings.builder().put("test1", "value1").build()) @@ -201,5 +201,15 @@ public void testLoadFullStateAndUpdate() throws IOException { assertThat(loadedMetaData.persistentSettings(), equalTo(newMetaData.persistentSettings())); assertThat(loadedMetaData.hasIndex("test1"), equalTo(true)); assertThat(loadedMetaData.index("test1"), equalTo(index)); + + if (randomBoolean()) { + metaStateService.unreferenceAll(); + } else { + metaStateService.deleteAll(); + } + manifestAndMetaData = metaStateService.loadFullState(); + assertTrue(manifestAndMetaData.v1().isEmpty()); + metaData = manifestAndMetaData.v2(); + assertTrue(MetaData.isGlobalStateEquals(metaData, MetaData.EMPTY_META_DATA)); } } diff --git a/server/src/test/java/org/elasticsearch/gateway/PersistedClusterStateServiceTests.java b/server/src/test/java/org/elasticsearch/gateway/PersistedClusterStateServiceTests.java new file mode 100644 index 0000000000000..f3a0cf25f6bb3 --- /dev/null +++ b/server/src/test/java/org/elasticsearch/gateway/PersistedClusterStateServiceTests.java @@ -0,0 +1,793 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.gateway; + +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.Term; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.FilterDirectory; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.store.SimpleFSDirectory; +import org.elasticsearch.Version; +import org.elasticsearch.cluster.ClusterName; +import org.elasticsearch.cluster.ClusterState; +import org.elasticsearch.cluster.coordination.CoordinationMetaData; +import org.elasticsearch.cluster.metadata.IndexMetaData; +import org.elasticsearch.cluster.metadata.MetaData; +import org.elasticsearch.common.UUIDs; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.util.BigArrays; +import org.elasticsearch.common.util.MockBigArrays; +import org.elasticsearch.common.util.MockPageCacheRecycler; +import org.elasticsearch.core.internal.io.IOUtils; +import org.elasticsearch.env.Environment; +import org.elasticsearch.env.NodeEnvironment; +import org.elasticsearch.env.NodeMetaData; +import org.elasticsearch.gateway.PersistedClusterStateService.Writer; +import org.elasticsearch.index.Index; +import org.elasticsearch.indices.breaker.NoneCircuitBreakerService; +import org.elasticsearch.test.ESTestCase; + +import java.io.IOError; +import java.io.IOException; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.HashSet; +import java.util.List; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import static org.hamcrest.Matchers.allOf; +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.nullValue; + +public class PersistedClusterStateServiceTests extends ESTestCase { + + private PersistedClusterStateService newPersistedClusterStateService(NodeEnvironment nodeEnvironment) { + return new PersistedClusterStateService(nodeEnvironment, xContentRegistry(), + usually() + ? BigArrays.NON_RECYCLING_INSTANCE + : new MockBigArrays(new MockPageCacheRecycler(Settings.EMPTY), new NoneCircuitBreakerService())); + } + + public void testPersistsAndReloadsTerm() throws IOException { + try (NodeEnvironment nodeEnvironment = newNodeEnvironment(createDataPaths())) { + final PersistedClusterStateService persistedClusterStateService = newPersistedClusterStateService(nodeEnvironment); + final long newTerm = randomNonNegativeLong(); + + assertThat(persistedClusterStateService.loadBestOnDiskState().currentTerm, equalTo(0L)); + try (Writer writer = persistedClusterStateService.createWriter()) { + writer.writeFullStateAndCommit(newTerm, ClusterState.EMPTY_STATE); + assertThat(persistedClusterStateService.loadBestOnDiskState().currentTerm, equalTo(newTerm)); + } + + assertThat(persistedClusterStateService.loadBestOnDiskState().currentTerm, equalTo(newTerm)); + } + } + + public void testPersistsAndReloadsGlobalMetadata() throws IOException { + try (NodeEnvironment nodeEnvironment = newNodeEnvironment(createDataPaths())) { + final PersistedClusterStateService persistedClusterStateService = newPersistedClusterStateService(nodeEnvironment); + final String clusterUUID = UUIDs.randomBase64UUID(random()); + final long version = randomLongBetween(1L, Long.MAX_VALUE); + + ClusterState clusterState = loadPersistedClusterState(persistedClusterStateService); + try (Writer writer = persistedClusterStateService.createWriter()) { + writer.writeFullStateAndCommit(0L, ClusterState.builder(clusterState) + .metaData(MetaData.builder(clusterState.metaData()) + .clusterUUID(clusterUUID) + .clusterUUIDCommitted(true) + .version(version)) + .incrementVersion().build()); + clusterState = loadPersistedClusterState(persistedClusterStateService); + assertThat(clusterState.metaData().clusterUUID(), equalTo(clusterUUID)); + assertTrue(clusterState.metaData().clusterUUIDCommitted()); + assertThat(clusterState.metaData().version(), equalTo(version)); + } + + try (Writer writer = persistedClusterStateService.createWriter()) { + writer.writeFullStateAndCommit(0L, ClusterState.builder(clusterState) + .metaData(MetaData.builder(clusterState.metaData()) + .clusterUUID(clusterUUID) + .clusterUUIDCommitted(true) + .version(version + 1)) + .incrementVersion().build()); + } + + clusterState = loadPersistedClusterState(persistedClusterStateService); + assertThat(clusterState.metaData().clusterUUID(), equalTo(clusterUUID)); + assertTrue(clusterState.metaData().clusterUUIDCommitted()); + assertThat(clusterState.metaData().version(), equalTo(version + 1)); + } + } + + private static void writeState(Writer writer, long currentTerm, ClusterState clusterState, + ClusterState previousState) throws IOException { + if (randomBoolean() || clusterState.term() != previousState.term() || writer.fullStateWritten == false) { + writer.writeFullStateAndCommit(currentTerm, clusterState); + } else { + writer.writeIncrementalStateAndCommit(currentTerm, previousState, clusterState); + } + } + + public void testLoadsFreshestState() throws IOException { + final Path[] dataPaths = createDataPaths(); + final long freshTerm = randomLongBetween(1L, Long.MAX_VALUE); + final long staleTerm = randomBoolean() ? freshTerm : randomLongBetween(1L, freshTerm); + final long freshVersion = randomLongBetween(2L, Long.MAX_VALUE); + final long staleVersion = staleTerm == freshTerm ? randomLongBetween(1L, freshVersion - 1) : randomLongBetween(1L, Long.MAX_VALUE); + + final HashSet unimportantPaths = Arrays.stream(dataPaths).collect(Collectors.toCollection(HashSet::new)); + + try (NodeEnvironment nodeEnvironment = newNodeEnvironment(dataPaths)) { + final ClusterState clusterState = loadPersistedClusterState(newPersistedClusterStateService(nodeEnvironment)); + try (Writer writer = newPersistedClusterStateService(nodeEnvironment).createWriter()) { + writeState(writer, staleTerm, + ClusterState.builder(clusterState).version(staleVersion) + .metaData(MetaData.builder(clusterState.metaData()).coordinationMetaData( + CoordinationMetaData.builder(clusterState.coordinationMetaData()).term(staleTerm).build())).build(), + clusterState); + } + } + + final Path freshPath = randomFrom(dataPaths); + try (NodeEnvironment nodeEnvironment = newNodeEnvironment(new Path[]{freshPath})) { + unimportantPaths.remove(freshPath); + try (Writer writer = newPersistedClusterStateService(nodeEnvironment).createWriter()) { + final ClusterState clusterState = loadPersistedClusterState(newPersistedClusterStateService(nodeEnvironment)); + writeState(writer, freshTerm, + ClusterState.builder(clusterState).version(freshVersion) + .metaData(MetaData.builder(clusterState.metaData()).coordinationMetaData( + CoordinationMetaData.builder(clusterState.coordinationMetaData()).term(freshTerm).build())).build(), + clusterState); + } + } + + if (randomBoolean() && unimportantPaths.isEmpty() == false) { + IOUtils.rm(randomFrom(unimportantPaths)); + } + + // verify that the freshest state is chosen + try (NodeEnvironment nodeEnvironment = newNodeEnvironment(dataPaths)) { + final PersistedClusterStateService.OnDiskState onDiskState = newPersistedClusterStateService(nodeEnvironment) + .loadBestOnDiskState(); + final ClusterState clusterState = clusterStateFromMetadata(onDiskState.lastAcceptedVersion, onDiskState.metaData); + assertThat(clusterState.term(), equalTo(freshTerm)); + assertThat(clusterState.version(), equalTo(freshVersion)); + } + } + + public void testFailsOnMismatchedNodeIds() throws IOException { + final Path[] dataPaths1 = createDataPaths(); + final Path[] dataPaths2 = createDataPaths(); + + final String[] nodeIds = new String[2]; + + try (NodeEnvironment nodeEnvironment = newNodeEnvironment(dataPaths1)) { + nodeIds[0] = nodeEnvironment.nodeId(); + try (Writer writer = newPersistedClusterStateService(nodeEnvironment).createWriter()) { + final ClusterState clusterState = loadPersistedClusterState(newPersistedClusterStateService(nodeEnvironment)); + writer.writeFullStateAndCommit(0L, + ClusterState.builder(clusterState).version(randomLongBetween(1L, Long.MAX_VALUE)).build()); + } + } + + try (NodeEnvironment nodeEnvironment = newNodeEnvironment(dataPaths2)) { + nodeIds[1] = nodeEnvironment.nodeId(); + try (Writer writer = newPersistedClusterStateService(nodeEnvironment).createWriter()) { + final ClusterState clusterState = loadPersistedClusterState(newPersistedClusterStateService(nodeEnvironment)); + writer.writeFullStateAndCommit(0L, + ClusterState.builder(clusterState).version(randomLongBetween(1L, Long.MAX_VALUE)).build()); + } + } + + NodeMetaData.FORMAT.cleanupOldFiles(Long.MAX_VALUE, dataPaths2); + + final Path[] combinedPaths = Stream.concat(Arrays.stream(dataPaths1), Arrays.stream(dataPaths2)).toArray(Path[]::new); + + final String failure = expectThrows(IllegalStateException.class, () -> newNodeEnvironment(combinedPaths)).getMessage(); + assertThat(failure, + allOf(containsString("unexpected node ID in metadata"), containsString(nodeIds[0]), containsString(nodeIds[1]))); + assertTrue("[" + failure + "] should match " + Arrays.toString(dataPaths2), + Arrays.stream(dataPaths2).anyMatch(p -> failure.contains(p.toString()))); + + // verify that loadBestOnDiskState has same check + final String message = expectThrows(IllegalStateException.class, + () -> new PersistedClusterStateService(Stream.of(combinedPaths).map(path -> NodeEnvironment.resolveNodePath(path, 0)) + .toArray(Path[]::new), nodeIds[0], xContentRegistry(), BigArrays.NON_RECYCLING_INSTANCE, + randomBoolean()).loadBestOnDiskState()).getMessage(); + assertThat(message, + allOf(containsString("unexpected node ID in metadata"), containsString(nodeIds[0]), containsString(nodeIds[1]))); + assertTrue("[" + message + "] should match " + Arrays.toString(dataPaths2), + Arrays.stream(dataPaths2).anyMatch(p -> message.contains(p.toString()))); + } + + public void testFailsOnMismatchedCommittedClusterUUIDs() throws IOException { + final Path[] dataPaths1 = createDataPaths(); + final Path[] dataPaths2 = createDataPaths(); + final Path[] combinedPaths = Stream.concat(Arrays.stream(dataPaths1), Arrays.stream(dataPaths2)).toArray(Path[]::new); + + final String clusterUUID1 = UUIDs.randomBase64UUID(random()); + final String clusterUUID2 = UUIDs.randomBase64UUID(random()); + + // first establish consistent node IDs and write initial metadata + try (NodeEnvironment nodeEnvironment = newNodeEnvironment(combinedPaths)) { + try (Writer writer = newPersistedClusterStateService(nodeEnvironment).createWriter()) { + final ClusterState clusterState = loadPersistedClusterState(newPersistedClusterStateService(nodeEnvironment)); + assertFalse(clusterState.metaData().clusterUUIDCommitted()); + writer.writeFullStateAndCommit(0L, clusterState); + } + } + + try (NodeEnvironment nodeEnvironment = newNodeEnvironment(dataPaths1)) { + try (Writer writer = newPersistedClusterStateService(nodeEnvironment).createWriter()) { + final ClusterState clusterState = loadPersistedClusterState(newPersistedClusterStateService(nodeEnvironment)); + assertFalse(clusterState.metaData().clusterUUIDCommitted()); + writer.writeFullStateAndCommit(0L, ClusterState.builder(clusterState) + .metaData(MetaData.builder(clusterState.metaData()) + .clusterUUID(clusterUUID1) + .clusterUUIDCommitted(true) + .version(1)) + .incrementVersion().build()); + } + } + + try (NodeEnvironment nodeEnvironment = newNodeEnvironment(dataPaths2)) { + try (Writer writer = newPersistedClusterStateService(nodeEnvironment).createWriter()) { + final ClusterState clusterState = loadPersistedClusterState(newPersistedClusterStateService(nodeEnvironment)); + assertFalse(clusterState.metaData().clusterUUIDCommitted()); + writer.writeFullStateAndCommit(0L, ClusterState.builder(clusterState) + .metaData(MetaData.builder(clusterState.metaData()) + .clusterUUID(clusterUUID2) + .clusterUUIDCommitted(true) + .version(1)) + .incrementVersion().build()); + } + } + + try (NodeEnvironment nodeEnvironment = newNodeEnvironment(combinedPaths)) { + final String message = expectThrows(IllegalStateException.class, + () -> newPersistedClusterStateService(nodeEnvironment).loadBestOnDiskState()).getMessage(); + assertThat(message, + allOf(containsString("mismatched cluster UUIDs in metadata"), containsString(clusterUUID1), containsString(clusterUUID2))); + assertTrue("[" + message + "] should match " + Arrays.toString(dataPaths1), + Arrays.stream(dataPaths1).anyMatch(p -> message.contains(p.toString()))); + assertTrue("[" + message + "] should match " + Arrays.toString(dataPaths2), + Arrays.stream(dataPaths2).anyMatch(p -> message.contains(p.toString()))); + } + } + + public void testFailsIfFreshestStateIsInStaleTerm() throws IOException { + final Path[] dataPaths1 = createDataPaths(); + final Path[] dataPaths2 = createDataPaths(); + final Path[] combinedPaths = Stream.concat(Arrays.stream(dataPaths1), Arrays.stream(dataPaths2)).toArray(Path[]::new); + + final long staleCurrentTerm = randomLongBetween(1L, Long.MAX_VALUE - 1); + final long freshCurrentTerm = randomLongBetween(staleCurrentTerm + 1, Long.MAX_VALUE); + + final long freshTerm = randomLongBetween(1L, Long.MAX_VALUE); + final long staleTerm = randomBoolean() ? freshTerm : randomLongBetween(1L, freshTerm); + final long freshVersion = randomLongBetween(2L, Long.MAX_VALUE); + final long staleVersion = staleTerm == freshTerm ? randomLongBetween(1L, freshVersion - 1) : randomLongBetween(1L, Long.MAX_VALUE); + + try (NodeEnvironment nodeEnvironment = newNodeEnvironment(combinedPaths)) { + try (Writer writer = newPersistedClusterStateService(nodeEnvironment).createWriter()) { + final ClusterState clusterState = loadPersistedClusterState(newPersistedClusterStateService(nodeEnvironment)); + assertFalse(clusterState.metaData().clusterUUIDCommitted()); + writeState(writer, staleCurrentTerm, ClusterState.builder(clusterState) + .metaData(MetaData.builder(clusterState.metaData()).version(1) + .coordinationMetaData(CoordinationMetaData.builder(clusterState.coordinationMetaData()).term(staleTerm).build())) + .version(staleVersion) + .build(), + clusterState); + } + } + + try (NodeEnvironment nodeEnvironment = newNodeEnvironment(dataPaths1)) { + try (Writer writer = newPersistedClusterStateService(nodeEnvironment).createWriter()) { + final ClusterState clusterState = loadPersistedClusterState(newPersistedClusterStateService(nodeEnvironment)); + writeState(writer, freshCurrentTerm, clusterState, clusterState); + } + } + + try (NodeEnvironment nodeEnvironment = newNodeEnvironment(dataPaths2)) { + try (Writer writer = newPersistedClusterStateService(nodeEnvironment).createWriter()) { + final PersistedClusterStateService.OnDiskState onDiskState = newPersistedClusterStateService(nodeEnvironment) + .loadBestOnDiskState(); + final ClusterState clusterState = clusterStateFromMetadata(onDiskState.lastAcceptedVersion, onDiskState.metaData); + writeState(writer, onDiskState.currentTerm, ClusterState.builder(clusterState) + .metaData(MetaData.builder(clusterState.metaData()).version(2) + .coordinationMetaData(CoordinationMetaData.builder(clusterState.coordinationMetaData()).term(freshTerm).build())) + .version(freshVersion) + .build(), clusterState); + } + } + + try (NodeEnvironment nodeEnvironment = newNodeEnvironment(combinedPaths)) { + final String message = expectThrows(IllegalStateException.class, + () -> newPersistedClusterStateService(nodeEnvironment).loadBestOnDiskState()).getMessage(); + assertThat(message, allOf( + containsString("inconsistent terms found"), + containsString(Long.toString(staleCurrentTerm)), + containsString(Long.toString(freshCurrentTerm)))); + assertTrue("[" + message + "] should match " + Arrays.toString(dataPaths1), + Arrays.stream(dataPaths1).anyMatch(p -> message.contains(p.toString()))); + assertTrue("[" + message + "] should match " + Arrays.toString(dataPaths2), + Arrays.stream(dataPaths2).anyMatch(p -> message.contains(p.toString()))); + } + } + + public void testFailsGracefullyOnExceptionDuringFlush() throws IOException { + final AtomicBoolean throwException = new AtomicBoolean(); + + try (NodeEnvironment nodeEnvironment = newNodeEnvironment(createDataPaths())) { + final PersistedClusterStateService persistedClusterStateService + = new PersistedClusterStateService(nodeEnvironment, xContentRegistry(), BigArrays.NON_RECYCLING_INSTANCE) { + @Override + Directory createDirectory(Path path) throws IOException { + return new FilterDirectory(super.createDirectory(path)) { + @Override + public IndexOutput createOutput(String name, IOContext context) throws IOException { + if (throwException.get()) { + throw new IOException("simulated"); + } + return super.createOutput(name, context); + } + }; + } + }; + + try (Writer writer = persistedClusterStateService.createWriter()) { + final ClusterState clusterState = loadPersistedClusterState(persistedClusterStateService); + final long newTerm = randomNonNegativeLong(); + final ClusterState newState = ClusterState.builder(clusterState) + .metaData(MetaData.builder(clusterState.metaData()) + .clusterUUID(UUIDs.randomBase64UUID(random())) + .clusterUUIDCommitted(true) + .version(randomLongBetween(1L, Long.MAX_VALUE))) + .incrementVersion().build(); + throwException.set(true); + assertThat(expectThrows(IOException.class, () -> + writeState(writer, newTerm, newState, clusterState)).getMessage(), + containsString("simulated")); + } + } + } + + public void testClosesWriterOnFatalError() throws IOException { + final AtomicBoolean throwException = new AtomicBoolean(); + + try (NodeEnvironment nodeEnvironment = newNodeEnvironment(createDataPaths())) { + final PersistedClusterStateService persistedClusterStateService + = new PersistedClusterStateService(nodeEnvironment, xContentRegistry(), BigArrays.NON_RECYCLING_INSTANCE) { + @Override + Directory createDirectory(Path path) throws IOException { + return new FilterDirectory(super.createDirectory(path)) { + @Override + public void sync(Collection names) { + throw new OutOfMemoryError("simulated"); + } + }; + } + }; + + try (Writer writer = persistedClusterStateService.createWriter()) { + final ClusterState clusterState = loadPersistedClusterState(persistedClusterStateService); + final long newTerm = randomNonNegativeLong(); + final ClusterState newState = ClusterState.builder(clusterState) + .metaData(MetaData.builder(clusterState.metaData()) + .clusterUUID(UUIDs.randomBase64UUID(random())) + .clusterUUIDCommitted(true) + .version(randomLongBetween(1L, Long.MAX_VALUE))) + .incrementVersion().build(); + throwException.set(true); + assertThat(expectThrows(OutOfMemoryError.class, () -> { + if (randomBoolean()) { + writeState(writer, newTerm, newState, clusterState); + } else { + writer.commit(newTerm, newState.version()); + } + }).getMessage(), + containsString("simulated")); + assertFalse(writer.isOpen()); + } + + // check if we can open writer again + try (Writer ignored = persistedClusterStateService.createWriter()) { + + } + } + } + + public void testCrashesWithIOErrorOnCommitFailure() throws IOException { + final AtomicBoolean throwException = new AtomicBoolean(); + + try (NodeEnvironment nodeEnvironment = newNodeEnvironment(createDataPaths())) { + final PersistedClusterStateService persistedClusterStateService + = new PersistedClusterStateService(nodeEnvironment, xContentRegistry(), BigArrays.NON_RECYCLING_INSTANCE) { + @Override + Directory createDirectory(Path path) throws IOException { + return new FilterDirectory(super.createDirectory(path)) { + @Override + public void rename(String source, String dest) throws IOException { + if (throwException.get() && dest.startsWith("segments")) { + throw new IOException("simulated"); + } + } + }; + } + }; + + try (Writer writer = persistedClusterStateService.createWriter()) { + final ClusterState clusterState = loadPersistedClusterState(persistedClusterStateService); + final long newTerm = randomNonNegativeLong(); + final ClusterState newState = ClusterState.builder(clusterState) + .metaData(MetaData.builder(clusterState.metaData()) + .clusterUUID(UUIDs.randomBase64UUID(random())) + .clusterUUIDCommitted(true) + .version(randomLongBetween(1L, Long.MAX_VALUE))) + .incrementVersion().build(); + throwException.set(true); + assertThat(expectThrows(IOError.class, () -> { + if (randomBoolean()) { + writeState(writer, newTerm, newState, clusterState); + } else { + writer.commit(newTerm, newState.version()); + } + }).getMessage(), + containsString("simulated")); + assertFalse(writer.isOpen()); + } + + // check if we can open writer again + try (Writer ignored = persistedClusterStateService.createWriter()) { + + } + } + } + + public void testFailsIfGlobalMetadataIsMissing() throws IOException { + // if someone attempted surgery on the metadata index by hand, e.g. deleting broken segments, then maybe the global metadata + // isn't there any more + + try (NodeEnvironment nodeEnvironment = newNodeEnvironment(createDataPaths())) { + try (Writer writer = newPersistedClusterStateService(nodeEnvironment).createWriter()) { + final ClusterState clusterState = loadPersistedClusterState(newPersistedClusterStateService(nodeEnvironment)); + writeState(writer, 0L, ClusterState.builder(clusterState).version(randomLongBetween(1L, Long.MAX_VALUE)).build(), + clusterState); + } + + final Path brokenPath = randomFrom(nodeEnvironment.nodeDataPaths()); + try (Directory directory = new SimpleFSDirectory(brokenPath.resolve(PersistedClusterStateService.METADATA_DIRECTORY_NAME))) { + final IndexWriterConfig indexWriterConfig = new IndexWriterConfig(); + indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE); + try (IndexWriter indexWriter = new IndexWriter(directory, indexWriterConfig)) { + indexWriter.commit(); + } + } + + final String message = expectThrows(IllegalStateException.class, + () -> newPersistedClusterStateService(nodeEnvironment).loadBestOnDiskState()).getMessage(); + assertThat(message, allOf(containsString("no global metadata found"), containsString(brokenPath.toString()))); + } + } + + public void testFailsIfGlobalMetadataIsDuplicated() throws IOException { + // if someone attempted surgery on the metadata index by hand, e.g. deleting broken segments, then maybe the global metadata + // is duplicated + + final Path[] dataPaths1 = createDataPaths(); + final Path[] dataPaths2 = createDataPaths(); + final Path[] combinedPaths = Stream.concat(Arrays.stream(dataPaths1), Arrays.stream(dataPaths2)).toArray(Path[]::new); + + try (NodeEnvironment nodeEnvironment = newNodeEnvironment(combinedPaths)) { + try (Writer writer = newPersistedClusterStateService(nodeEnvironment).createWriter()) { + final ClusterState clusterState = loadPersistedClusterState(newPersistedClusterStateService(nodeEnvironment)); + writeState(writer, 0L, ClusterState.builder(clusterState).version(randomLongBetween(1L, Long.MAX_VALUE)).build(), + clusterState); + } + + final Path brokenPath = randomFrom(nodeEnvironment.nodeDataPaths()); + final Path dupPath = randomValueOtherThan(brokenPath, () -> randomFrom(nodeEnvironment.nodeDataPaths())); + try (Directory directory = new SimpleFSDirectory(brokenPath.resolve(PersistedClusterStateService.METADATA_DIRECTORY_NAME)); + Directory dupDirectory = new SimpleFSDirectory(dupPath.resolve(PersistedClusterStateService.METADATA_DIRECTORY_NAME))) { + try (IndexWriter indexWriter = new IndexWriter(directory, new IndexWriterConfig())) { + indexWriter.addIndexes(dupDirectory); + indexWriter.commit(); + } + } + + final String message = expectThrows(IllegalStateException.class, + () -> newPersistedClusterStateService(nodeEnvironment).loadBestOnDiskState()).getMessage(); + assertThat(message, allOf(containsString("duplicate global metadata found"), containsString(brokenPath.toString()))); + } + } + + public void testFailsIfIndexMetadataIsDuplicated() throws IOException { + // if someone attempted surgery on the metadata index by hand, e.g. deleting broken segments, then maybe some index metadata + // is duplicated + + final Path[] dataPaths1 = createDataPaths(); + final Path[] dataPaths2 = createDataPaths(); + final Path[] combinedPaths = Stream.concat(Arrays.stream(dataPaths1), Arrays.stream(dataPaths2)).toArray(Path[]::new); + + try (NodeEnvironment nodeEnvironment = newNodeEnvironment(combinedPaths)) { + final String indexUUID = UUIDs.randomBase64UUID(random()); + final String indexName = randomAlphaOfLength(10); + + try (Writer writer = newPersistedClusterStateService(nodeEnvironment).createWriter()) { + final ClusterState clusterState = loadPersistedClusterState(newPersistedClusterStateService(nodeEnvironment)); + writeState(writer, 0L, ClusterState.builder(clusterState) + .metaData(MetaData.builder(clusterState.metaData()) + .version(1L) + .coordinationMetaData(CoordinationMetaData.builder(clusterState.coordinationMetaData()).term(1L).build()) + .put(IndexMetaData.builder(indexName) + .version(1L) + .settings(Settings.builder() + .put(IndexMetaData.INDEX_NUMBER_OF_SHARDS_SETTING.getKey(), 1) + .put(IndexMetaData.INDEX_NUMBER_OF_REPLICAS_SETTING.getKey(), 0) + .put(IndexMetaData.SETTING_INDEX_VERSION_CREATED.getKey(), Version.CURRENT) + .put(IndexMetaData.SETTING_INDEX_UUID, indexUUID)))) + .incrementVersion().build(), + clusterState); + } + + final Path brokenPath = randomFrom(nodeEnvironment.nodeDataPaths()); + final Path dupPath = randomValueOtherThan(brokenPath, () -> randomFrom(nodeEnvironment.nodeDataPaths())); + try (Directory directory = new SimpleFSDirectory(brokenPath.resolve(PersistedClusterStateService.METADATA_DIRECTORY_NAME)); + Directory dupDirectory = new SimpleFSDirectory(dupPath.resolve(PersistedClusterStateService.METADATA_DIRECTORY_NAME))) { + try (IndexWriter indexWriter = new IndexWriter(directory, new IndexWriterConfig())) { + indexWriter.deleteDocuments(new Term("type", "global")); // do not duplicate global metadata + indexWriter.addIndexes(dupDirectory); + indexWriter.commit(); + } + } + + final String message = expectThrows(IllegalStateException.class, + () -> newPersistedClusterStateService(nodeEnvironment).loadBestOnDiskState()).getMessage(); + assertThat(message, allOf( + containsString("duplicate metadata found"), + containsString(brokenPath.toString()), + containsString(indexName), + containsString(indexUUID))); + } + } + + public void testPersistsAndReloadsIndexMetadataIffVersionOrTermChanges() throws IOException { + try (NodeEnvironment nodeEnvironment = newNodeEnvironment(createDataPaths())) { + final PersistedClusterStateService persistedClusterStateService = newPersistedClusterStateService(nodeEnvironment); + final long globalVersion = randomLongBetween(1L, Long.MAX_VALUE); + final String indexUUID = UUIDs.randomBase64UUID(random()); + final long indexMetaDataVersion = randomLongBetween(1L, Long.MAX_VALUE); + + final long oldTerm = randomLongBetween(1L, Long.MAX_VALUE - 1); + final long newTerm = randomLongBetween(oldTerm + 1, Long.MAX_VALUE); + + try (Writer writer = persistedClusterStateService.createWriter()) { + ClusterState clusterState = loadPersistedClusterState(persistedClusterStateService); + writeState(writer, 0L, ClusterState.builder(clusterState) + .metaData(MetaData.builder(clusterState.metaData()) + .version(globalVersion) + .coordinationMetaData(CoordinationMetaData.builder(clusterState.coordinationMetaData()).term(oldTerm).build()) + .put(IndexMetaData.builder("test") + .version(indexMetaDataVersion - 1) // -1 because it's incremented in .put() + .settings(Settings.builder() + .put(IndexMetaData.INDEX_NUMBER_OF_SHARDS_SETTING.getKey(), 1) + .put(IndexMetaData.INDEX_NUMBER_OF_REPLICAS_SETTING.getKey(), 0) + .put(IndexMetaData.SETTING_INDEX_VERSION_CREATED.getKey(), Version.CURRENT) + .put(IndexMetaData.SETTING_INDEX_UUID, indexUUID)))) + .incrementVersion().build(), + clusterState); + + + clusterState = loadPersistedClusterState(persistedClusterStateService); + IndexMetaData indexMetaData = clusterState.metaData().index("test"); + assertThat(indexMetaData.getIndexUUID(), equalTo(indexUUID)); + assertThat(indexMetaData.getVersion(), equalTo(indexMetaDataVersion)); + assertThat(IndexMetaData.INDEX_NUMBER_OF_REPLICAS_SETTING.get(indexMetaData.getSettings()), equalTo(0)); + // ensure we do not wastefully persist the same index metadata version by making a bad update with the same version + writer.writeIncrementalStateAndCommit(0L, clusterState, ClusterState.builder(clusterState) + .metaData(MetaData.builder(clusterState.metaData()) + .put(IndexMetaData.builder(indexMetaData).settings(Settings.builder() + .put(indexMetaData.getSettings()) + .put(IndexMetaData.INDEX_NUMBER_OF_REPLICAS_SETTING.getKey(), 1)).build(), false)) + .incrementVersion().build()); + + clusterState = loadPersistedClusterState(persistedClusterStateService); + indexMetaData = clusterState.metaData().index("test"); + assertThat(indexMetaData.getIndexUUID(), equalTo(indexUUID)); + assertThat(indexMetaData.getVersion(), equalTo(indexMetaDataVersion)); + assertThat(IndexMetaData.INDEX_NUMBER_OF_REPLICAS_SETTING.get(indexMetaData.getSettings()), equalTo(0)); + // ensure that we do persist the same index metadata version by making an update with a higher version + writeState(writer, 0L, ClusterState.builder(clusterState) + .metaData(MetaData.builder(clusterState.metaData()) + .put(IndexMetaData.builder(indexMetaData).settings(Settings.builder() + .put(indexMetaData.getSettings()) + .put(IndexMetaData.INDEX_NUMBER_OF_REPLICAS_SETTING.getKey(), 2)).build(), true)) + .incrementVersion().build(), + clusterState); + + clusterState = loadPersistedClusterState(persistedClusterStateService); + indexMetaData = clusterState.metaData().index("test"); + assertThat(indexMetaData.getVersion(), equalTo(indexMetaDataVersion + 1)); + assertThat(IndexMetaData.INDEX_NUMBER_OF_REPLICAS_SETTING.get(indexMetaData.getSettings()), equalTo(2)); + // ensure that we also persist the index metadata when the term changes + writeState(writer, 0L, ClusterState.builder(clusterState) + .metaData(MetaData.builder(clusterState.metaData()) + .coordinationMetaData(CoordinationMetaData.builder(clusterState.coordinationMetaData()).term(newTerm).build()) + .put(IndexMetaData.builder(indexMetaData).settings(Settings.builder() + .put(indexMetaData.getSettings()) + .put(IndexMetaData.INDEX_NUMBER_OF_REPLICAS_SETTING.getKey(), 3)).build(), false)) + .incrementVersion().build(), + clusterState); + } + + final ClusterState clusterState = loadPersistedClusterState(persistedClusterStateService); + final IndexMetaData indexMetaData = clusterState.metaData().index("test"); + assertThat(indexMetaData.getIndexUUID(), equalTo(indexUUID)); + assertThat(indexMetaData.getVersion(), equalTo(indexMetaDataVersion + 1)); + assertThat(IndexMetaData.INDEX_NUMBER_OF_REPLICAS_SETTING.get(indexMetaData.getSettings()), equalTo(3)); + } + } + + public void testPersistsAndReloadsIndexMetadataForMultipleIndices() throws IOException { + try (NodeEnvironment nodeEnvironment = newNodeEnvironment(createDataPaths())) { + final PersistedClusterStateService persistedClusterStateService = newPersistedClusterStateService(nodeEnvironment); + + final long term = randomLongBetween(1L, Long.MAX_VALUE); + final String addedIndexUuid = UUIDs.randomBase64UUID(random()); + final String updatedIndexUuid = UUIDs.randomBase64UUID(random()); + final String deletedIndexUuid = UUIDs.randomBase64UUID(random()); + + try (Writer writer = persistedClusterStateService.createWriter()) { + final ClusterState clusterState = loadPersistedClusterState(persistedClusterStateService); + writeState(writer, 0L, ClusterState.builder(clusterState) + .metaData(MetaData.builder(clusterState.metaData()) + .version(clusterState.metaData().version() + 1) + .coordinationMetaData(CoordinationMetaData.builder(clusterState.coordinationMetaData()).term(term).build()) + .put(IndexMetaData.builder("updated") + .version(randomLongBetween(0L, Long.MAX_VALUE - 1) - 1) // -1 because it's incremented in .put() + .settings(Settings.builder() + .put(IndexMetaData.INDEX_NUMBER_OF_REPLICAS_SETTING.getKey(), 1) + .put(IndexMetaData.INDEX_NUMBER_OF_SHARDS_SETTING.getKey(), 1) + .put(IndexMetaData.SETTING_INDEX_VERSION_CREATED.getKey(), Version.CURRENT) + .put(IndexMetaData.SETTING_INDEX_UUID, updatedIndexUuid))) + .put(IndexMetaData.builder("deleted") + .version(randomLongBetween(0L, Long.MAX_VALUE - 1) - 1) // -1 because it's incremented in .put() + .settings(Settings.builder() + .put(IndexMetaData.INDEX_NUMBER_OF_REPLICAS_SETTING.getKey(), 1) + .put(IndexMetaData.INDEX_NUMBER_OF_SHARDS_SETTING.getKey(), 1) + .put(IndexMetaData.SETTING_INDEX_VERSION_CREATED.getKey(), Version.CURRENT) + .put(IndexMetaData.SETTING_INDEX_UUID, deletedIndexUuid)))) + .incrementVersion().build(), + clusterState); + } + + try (Writer writer = persistedClusterStateService.createWriter()) { + final ClusterState clusterState = loadPersistedClusterState(persistedClusterStateService); + + assertThat(clusterState.metaData().indices().size(), equalTo(2)); + assertThat(clusterState.metaData().index("updated").getIndexUUID(), equalTo(updatedIndexUuid)); + assertThat(IndexMetaData.INDEX_NUMBER_OF_REPLICAS_SETTING.get(clusterState.metaData().index("updated").getSettings()), + equalTo(1)); + assertThat(clusterState.metaData().index("deleted").getIndexUUID(), equalTo(deletedIndexUuid)); + + writeState(writer, 0L, ClusterState.builder(clusterState) + .metaData(MetaData.builder(clusterState.metaData()) + .version(clusterState.metaData().version() + 1) + .remove("deleted") + .put(IndexMetaData.builder("updated") + .settings(Settings.builder() + .put(clusterState.metaData().index("updated").getSettings()) + .put(IndexMetaData.INDEX_NUMBER_OF_REPLICAS_SETTING.getKey(), 2))) + .put(IndexMetaData.builder("added") + .version(randomLongBetween(0L, Long.MAX_VALUE - 1) - 1) // -1 because it's incremented in .put() + .settings(Settings.builder() + .put(IndexMetaData.INDEX_NUMBER_OF_REPLICAS_SETTING.getKey(), 1) + .put(IndexMetaData.INDEX_NUMBER_OF_SHARDS_SETTING.getKey(), 1) + .put(IndexMetaData.SETTING_INDEX_VERSION_CREATED.getKey(), Version.CURRENT) + .put(IndexMetaData.SETTING_INDEX_UUID, addedIndexUuid)))) + .incrementVersion().build(), + clusterState); + } + + final ClusterState clusterState = loadPersistedClusterState(persistedClusterStateService); + + assertThat(clusterState.metaData().indices().size(), equalTo(2)); + assertThat(clusterState.metaData().index("updated").getIndexUUID(), equalTo(updatedIndexUuid)); + assertThat(IndexMetaData.INDEX_NUMBER_OF_REPLICAS_SETTING.get(clusterState.metaData().index("updated").getSettings()), + equalTo(2)); + assertThat(clusterState.metaData().index("added").getIndexUUID(), equalTo(addedIndexUuid)); + assertThat(clusterState.metaData().index("deleted"), nullValue()); + } + } + + public void testReloadsMetadataAcrossMultipleSegments() throws IOException { + try (NodeEnvironment nodeEnvironment = newNodeEnvironment(createDataPaths())) { + final PersistedClusterStateService persistedClusterStateService = newPersistedClusterStateService(nodeEnvironment); + + final int writes = between(5, 20); + final List indices = new ArrayList<>(writes); + + try (Writer writer = persistedClusterStateService.createWriter()) { + for (int i = 0; i < writes; i++) { + final Index index = new Index("test-" + i, UUIDs.randomBase64UUID(random())); + indices.add(index); + final ClusterState clusterState = loadPersistedClusterState(persistedClusterStateService); + writeState(writer, 0L, ClusterState.builder(clusterState) + .metaData(MetaData.builder(clusterState.metaData()) + .version(i + 2) + .put(IndexMetaData.builder(index.getName()) + .settings(Settings.builder() + .put(IndexMetaData.INDEX_NUMBER_OF_SHARDS_SETTING.getKey(), 1) + .put(IndexMetaData.INDEX_NUMBER_OF_REPLICAS_SETTING.getKey(), 0) + .put(IndexMetaData.SETTING_INDEX_VERSION_CREATED.getKey(), Version.CURRENT) + .put(IndexMetaData.SETTING_INDEX_UUID, index.getUUID())))) + .incrementVersion().build(), + clusterState); + } + } + + final ClusterState clusterState = loadPersistedClusterState(persistedClusterStateService); + for (Index index : indices) { + final IndexMetaData indexMetaData = clusterState.metaData().index(index.getName()); + assertThat(indexMetaData.getIndexUUID(), equalTo(index.getUUID())); + } + } + } + + @Override + public Settings buildEnvSettings(Settings settings) { + assertTrue(settings.hasValue(Environment.PATH_DATA_SETTING.getKey())); + return Settings.builder() + .put(settings) + .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toAbsolutePath()).build(); + } + + public static Path[] createDataPaths() { + final Path[] dataPaths = new Path[randomIntBetween(1, 4)]; + for (int i = 0; i < dataPaths.length; i++) { + dataPaths[i] = createTempDir(); + } + return dataPaths; + } + + private NodeEnvironment newNodeEnvironment(Path[] dataPaths) throws IOException { + return newNodeEnvironment(Settings.builder() + .putList(Environment.PATH_DATA_SETTING.getKey(), Arrays.stream(dataPaths).map(Path::toString).collect(Collectors.toList())) + .build()); + } + + private static ClusterState loadPersistedClusterState(PersistedClusterStateService persistedClusterStateService) throws IOException { + final PersistedClusterStateService.OnDiskState onDiskState = persistedClusterStateService.loadBestOnDiskState(); + return clusterStateFromMetadata(onDiskState.lastAcceptedVersion, onDiskState.metaData); + } + + private static ClusterState clusterStateFromMetadata(long version, MetaData metaData) { + return ClusterState.builder(ClusterName.DEFAULT).version(version).metaData(metaData).build(); + } + + +} diff --git a/server/src/test/java/org/elasticsearch/index/seqno/PeerRecoveryRetentionLeaseCreationIT.java b/server/src/test/java/org/elasticsearch/index/seqno/PeerRecoveryRetentionLeaseCreationIT.java index 0f543f9a4d051..32957b0f9e42c 100644 --- a/server/src/test/java/org/elasticsearch/index/seqno/PeerRecoveryRetentionLeaseCreationIT.java +++ b/server/src/test/java/org/elasticsearch/index/seqno/PeerRecoveryRetentionLeaseCreationIT.java @@ -42,6 +42,7 @@ protected boolean forbidPrivateIndexSettings() { return false; } + @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/48701") public void testCanRecoverFromStoreWithoutPeerRecoveryRetentionLease() throws Exception { /* * In a full cluster restart from a version without peer-recovery retention leases, the leases on disk will not include a lease for diff --git a/server/src/test/java/org/elasticsearch/index/shard/RemoveCorruptedShardDataCommandIT.java b/server/src/test/java/org/elasticsearch/index/shard/RemoveCorruptedShardDataCommandIT.java index 5cd5a1328695a..f9c3b5f4b4516 100644 --- a/server/src/test/java/org/elasticsearch/index/shard/RemoveCorruptedShardDataCommandIT.java +++ b/server/src/test/java/org/elasticsearch/index/shard/RemoveCorruptedShardDataCommandIT.java @@ -56,6 +56,7 @@ import org.elasticsearch.env.Environment; import org.elasticsearch.env.NodeEnvironment; import org.elasticsearch.env.TestEnvironment; +import org.elasticsearch.gateway.GatewayMetaState; import org.elasticsearch.index.Index; import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.MergePolicyConfig; @@ -85,6 +86,7 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.stream.Collectors; +import java.util.stream.Stream; import java.util.stream.StreamSupport; import static org.elasticsearch.common.util.CollectionUtils.iterableAsArrayList; @@ -155,8 +157,8 @@ public void testCorruptIndex() throws Exception { fail("expected the command to fail as node is locked"); } catch (Exception e) { assertThat(e.getMessage(), - allOf(containsString("Failed to lock node's directory"), - containsString("is Elasticsearch still running ?"))); + allOf(containsString("failed to lock node's directory"), + containsString("is Elasticsearch still running?"))); } final Path indexDir = getPathToShardData(indexName, ShardPath.INDEX_FOLDER_NAME); @@ -478,6 +480,9 @@ public void testCorruptTranslogTruncationOfReplica() throws Exception { final Settings node1PathSettings = internalCluster().dataPathSettings(node1); final Settings node2PathSettings = internalCluster().dataPathSettings(node2); + assertBusy(() -> internalCluster().getInstances(GatewayMetaState.class) + .forEach(gw -> assertTrue(gw.allPendingAsyncStatesWritten()))); + // stop data nodes internalCluster().stopRandomDataNode(); internalCluster().stopRandomDataNode(); @@ -574,7 +579,8 @@ public void testResolvePath() throws Exception { final Path indexPath = indexPathByNodeName.get(nodeName); final OptionSet options = parser.parse("--dir", indexPath.toAbsolutePath().toString()); command.findAndProcessShardPath(options, environmentByNodeName.get(nodeName), - shardPath -> assertThat(shardPath.resolveIndex(), equalTo(indexPath))); + Stream.of(environmentByNodeName.get(nodeName).dataFiles()).map(path -> NodeEnvironment.resolveNodePath(path, 0)) + .toArray(Path[]::new), 0, state, shardPath -> assertThat(shardPath.resolveIndex(), equalTo(indexPath))); } } diff --git a/server/src/test/java/org/elasticsearch/index/shard/RemoveCorruptedShardDataCommandTests.java b/server/src/test/java/org/elasticsearch/index/shard/RemoveCorruptedShardDataCommandTests.java index 847c4543f63b0..af0244c00c2c7 100644 --- a/server/src/test/java/org/elasticsearch/index/shard/RemoveCorruptedShardDataCommandTests.java +++ b/server/src/test/java/org/elasticsearch/index/shard/RemoveCorruptedShardDataCommandTests.java @@ -25,17 +25,23 @@ import org.elasticsearch.Version; import org.elasticsearch.cli.MockTerminal; import org.elasticsearch.cli.Terminal; +import org.elasticsearch.cluster.ClusterName; +import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.cluster.metadata.IndexMetaData; +import org.elasticsearch.cluster.metadata.MetaData; import org.elasticsearch.cluster.routing.RecoverySource; import org.elasticsearch.cluster.routing.ShardRouting; import org.elasticsearch.cluster.routing.ShardRoutingHelper; import org.elasticsearch.cluster.routing.ShardRoutingState; import org.elasticsearch.cluster.routing.TestShardRouting; import org.elasticsearch.common.CheckedFunction; +import org.elasticsearch.common.UUIDs; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.util.BigArrays; import org.elasticsearch.env.Environment; import org.elasticsearch.env.NodeEnvironment; import org.elasticsearch.env.TestEnvironment; +import org.elasticsearch.gateway.PersistedClusterStateService; import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.MergePolicyConfig; import org.elasticsearch.index.engine.EngineException; @@ -52,6 +58,7 @@ import java.nio.file.Files; import java.nio.file.Path; import java.util.Arrays; +import java.util.Objects; import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -72,7 +79,9 @@ public class RemoveCorruptedShardDataCommandTests extends IndexShardTestCase { private Environment environment; private ShardPath shardPath; private IndexMetaData indexMetaData; + private ClusterState clusterState; private IndexShard indexShard; + private Path[] dataPaths; private Path translogPath; private Path indexPath; @@ -81,7 +90,7 @@ public class RemoveCorruptedShardDataCommandTests extends IndexShardTestCase { @Before public void setup() throws IOException { - shardId = new ShardId("index0", "_na_", 0); + shardId = new ShardId("index0", UUIDs.randomBase64UUID(), 0); final String nodeId = randomAlphaOfLength(10); routing = TestShardRouting.newShardRouting(shardId, nodeId, true, ShardRoutingState.INITIALIZING, RecoverySource.EmptyStoreRecoverySource.INSTANCE); @@ -96,11 +105,13 @@ public void setup() throws IOException { // create same directory structure as prod does final Path path = NodeEnvironment.resolveNodePath(dataDir, 0); Files.createDirectories(path); + dataPaths = new Path[] {path}; final Settings settings = Settings.builder() .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT) .put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1) .put(MergePolicyConfig.INDEX_MERGE_ENABLED, false) .put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 0) + .put(IndexMetaData.SETTING_INDEX_UUID, shardId.getIndex().getUUID()) .build(); final NodeEnvironment.NodePath nodePath = new NodeEnvironment.NodePath(path); @@ -111,6 +122,16 @@ public void setup() throws IOException { .putMapping("_doc", "{ \"properties\": {} }"); indexMetaData = metaData.build(); + clusterState = ClusterState.builder(ClusterName.DEFAULT).metaData(MetaData.builder().put(indexMetaData, false).build()).build(); + + try (NodeEnvironment.NodeLock lock = new NodeEnvironment.NodeLock(0, logger, environment, Files::exists)) { + final Path[] dataPaths = Arrays.stream(lock.getNodePaths()).filter(Objects::nonNull).map(p -> p.path).toArray(Path[]::new); + try (PersistedClusterStateService.Writer writer = new PersistedClusterStateService(dataPaths, nodeId, + xContentRegistry(), BigArrays.NON_RECYCLING_INSTANCE, true).createWriter()) { + writer.writeFullStateAndCommit(1L, clusterState); + } + } + indexShard = newStartedShard(p -> newShard(routing, shardPath, indexMetaData, null, null, new InternalEngineFactory(), () -> { }, RetentionLeaseSyncer.EMPTY, EMPTY_EVENT_LISTENER), true); @@ -331,7 +352,6 @@ public void testResolveIndexDirectory() throws Exception { // index a single doc to have files on a disk indexDoc(indexShard, "_doc", "0", "{}"); flushShard(indexShard, true); - writeIndexState(); // close shard closeShards(indexShard); @@ -343,11 +363,11 @@ public void testResolveIndexDirectory() throws Exception { final OptionSet options = parser.parse("--index", shardId.getIndex().getName(), "--shard-id", Integer.toString(shardId.id())); - command.findAndProcessShardPath(options, environment, + command.findAndProcessShardPath(options, environment, dataPaths, 0, clusterState, shardPath -> assertThat(shardPath.resolveIndex(), equalTo(indexPath))); final OptionSet options2 = parser.parse("--dir", indexPath.toAbsolutePath().toString()); - command.findAndProcessShardPath(options2, environment, + command.findAndProcessShardPath(options2, environment, dataPaths, 0, clusterState, shardPath -> assertThat(shardPath.resolveIndex(), equalTo(indexPath))); } @@ -485,17 +505,7 @@ private int indexDocs(IndexShard indexShard, boolean flushLast) throws IOExcepti logger.info("--> indexed {} docs, {} to keep", numDocs, numDocsToKeep); - writeIndexState(); return numDocsToKeep; } - private void writeIndexState() throws IOException { - // create _state of IndexMetaData - try(NodeEnvironment nodeEnvironment = new NodeEnvironment(environment.settings(), environment)) { - final Path[] paths = nodeEnvironment.indexPaths(indexMetaData.getIndex()); - IndexMetaData.FORMAT.writeAndCleanup(indexMetaData, paths); - logger.info("--> index metadata persisted to {} ", Arrays.toString(paths)); - } - } - } diff --git a/server/src/test/java/org/elasticsearch/indices/IndicesLifecycleListenerSingleNodeTests.java b/server/src/test/java/org/elasticsearch/indices/IndicesLifecycleListenerSingleNodeTests.java index 436126930e3d1..b3fdd53ee8f15 100644 --- a/server/src/test/java/org/elasticsearch/indices/IndicesLifecycleListenerSingleNodeTests.java +++ b/server/src/test/java/org/elasticsearch/indices/IndicesLifecycleListenerSingleNodeTests.java @@ -122,7 +122,7 @@ public void afterIndexRemoved(Index index, IndexSettings indexSettings, IndexRem }; indicesService.removeIndex(idx, DELETED, "simon says"); try { - IndexService index = indicesService.createIndex(metaData, Arrays.asList(countingListener)); + IndexService index = indicesService.createIndex(metaData, Arrays.asList(countingListener), false); assertEquals(3, counter.get()); idx = index.index(); ShardRouting newRouting = shardRouting; diff --git a/server/src/test/java/org/elasticsearch/indices/IndicesServiceTests.java b/server/src/test/java/org/elasticsearch/indices/IndicesServiceTests.java index 8dd3eff6e9152..472ccef42d95c 100644 --- a/server/src/test/java/org/elasticsearch/indices/IndicesServiceTests.java +++ b/server/src/test/java/org/elasticsearch/indices/IndicesServiceTests.java @@ -223,13 +223,11 @@ public void testDeleteIndexStore() throws Exception { ClusterService clusterService = getInstanceFromNode(ClusterService.class); IndexMetaData firstMetaData = clusterService.state().metaData().index("test"); assertTrue(test.hasShard(0)); + ShardPath firstPath = ShardPath.loadShardPath(logger, getNodeEnvironment(), new ShardId(test.index(), 0), + test.getIndexSettings().customDataPath()); - try { - indicesService.deleteIndexStore("boom", firstMetaData, clusterService.state()); - fail(); - } catch (IllegalStateException ex) { - // all good - } + expectThrows(IllegalStateException.class, () -> indicesService.deleteIndexStore("boom", firstMetaData)); + assertTrue(firstPath.exists()); GatewayMetaState gwMetaState = getInstanceFromNode(GatewayMetaState.class); MetaData meta = gwMetaState.getMetaData(); @@ -237,37 +235,25 @@ public void testDeleteIndexStore() throws Exception { assertNotNull(meta.index("test")); assertAcked(client().admin().indices().prepareDelete("test")); + assertFalse(firstPath.exists()); + meta = gwMetaState.getMetaData(); assertNotNull(meta); assertNull(meta.index("test")); - test = createIndex("test"); client().prepareIndex("test", "type", "1").setSource("field", "value").setRefreshPolicy(IMMEDIATE).get(); client().admin().indices().prepareFlush("test").get(); assertHitCount(client().prepareSearch("test").get(), 1); IndexMetaData secondMetaData = clusterService.state().metaData().index("test"); - assertAcked(client().admin().indices().prepareClose("test")); - ShardPath path = ShardPath.loadShardPath(logger, getNodeEnvironment(), new ShardId(test.index(), 0), + assertAcked(client().admin().indices().prepareClose("test").setWaitForActiveShards(1)); + ShardPath secondPath = ShardPath.loadShardPath(logger, getNodeEnvironment(), new ShardId(test.index(), 0), test.getIndexSettings().customDataPath()); - assertTrue(path.exists()); + assertTrue(secondPath.exists()); - try { - indicesService.deleteIndexStore("boom", secondMetaData, clusterService.state()); - fail(); - } catch (IllegalStateException ex) { - // all good - } - - assertTrue(path.exists()); + expectThrows(IllegalStateException.class, () -> indicesService.deleteIndexStore("boom", secondMetaData)); + assertTrue(secondPath.exists()); - // now delete the old one and make sure we resolve against the name - try { - indicesService.deleteIndexStore("boom", firstMetaData, clusterService.state()); - fail(); - } catch (IllegalStateException ex) { - // all good - } assertAcked(client().admin().indices().prepareOpen("test")); ensureGreen("test"); } @@ -563,7 +549,7 @@ public void testGetEngineFactory() throws IOException { .numberOfShards(1) .numberOfReplicas(0) .build(); - final IndexService indexService = indicesService.createIndex(indexMetaData, Collections.emptyList()); + final IndexService indexService = indicesService.createIndex(indexMetaData, Collections.emptyList(), false); if (value != null && value) { assertThat(indexService.getEngineFactory(), instanceOf(FooEnginePlugin.FooEngineFactory.class)); } else { @@ -589,7 +575,7 @@ public void testConflictingEngineFactories() { final IndicesService indicesService = getIndicesService(); final IllegalStateException e = - expectThrows(IllegalStateException.class, () -> indicesService.createIndex(indexMetaData, Collections.emptyList())); + expectThrows(IllegalStateException.class, () -> indicesService.createIndex(indexMetaData, Collections.emptyList(), false)); final String pattern = ".*multiple engine factories provided for \\[foobar/.*\\]: \\[.*FooEngineFactory\\],\\[.*BarEngineFactory\\].*"; assertThat(e, hasToString(new RegexMatcher(pattern))); @@ -675,7 +661,7 @@ public void testOptimizeAutoGeneratedIdsSettingRemoval() throws Exception { .numberOfShards(1) .numberOfReplicas(0) .build(); - IndexService indexService = indicesService.createIndex(indexMetaData, Collections.emptyList()); + IndexService indexService = indicesService.createIndex(indexMetaData, Collections.emptyList(), false); assertNotNull(indexService); final Index index2 = new Index("bar-index", UUIDs.randomBase64UUID()); @@ -689,7 +675,7 @@ public void testOptimizeAutoGeneratedIdsSettingRemoval() throws Exception { .numberOfReplicas(0) .build(); IllegalArgumentException ex = expectThrows(IllegalArgumentException.class, - () -> indicesService.createIndex(indexMetaData2, Collections.emptyList())); + () -> indicesService.createIndex(indexMetaData2, Collections.emptyList(), false)); assertEquals("Setting [" + EngineConfig.INDEX_OPTIMIZE_AUTO_GENERATED_IDS.getKey() + "] was removed in version 7.0.0", ex.getMessage()); @@ -703,7 +689,7 @@ public void testOptimizeAutoGeneratedIdsSettingRemoval() throws Exception { .numberOfShards(1) .numberOfReplicas(0) .build(); - IndexService indexService2 = indicesService.createIndex(indexMetaData3, Collections.emptyList()); + IndexService indexService2 = indicesService.createIndex(indexMetaData3, Collections.emptyList(), false); assertNotNull(indexService2); } diff --git a/server/src/test/java/org/elasticsearch/indices/cluster/AbstractIndicesClusterStateServiceTestCase.java b/server/src/test/java/org/elasticsearch/indices/cluster/AbstractIndicesClusterStateServiceTestCase.java index 46b4d7fd035ca..54b4b5aa09bb7 100644 --- a/server/src/test/java/org/elasticsearch/indices/cluster/AbstractIndicesClusterStateServiceTestCase.java +++ b/server/src/test/java/org/elasticsearch/indices/cluster/AbstractIndicesClusterStateServiceTestCase.java @@ -195,7 +195,8 @@ protected class MockIndicesService implements AllocatedIndices buildInIndexListener) throws IOException { + List buildInIndexListener, + boolean writeDanglingIndices) throws IOException { MockIndexService indexService = new MockIndexService(new IndexSettings(indexMetaData, Settings.EMPTY)); indices = newMapBuilder(indices).put(indexMetaData.getIndexUUID(), indexService).immutableMap(); return indexService; diff --git a/server/src/test/java/org/elasticsearch/indices/cluster/ClusterStateChanges.java b/server/src/test/java/org/elasticsearch/indices/cluster/ClusterStateChanges.java index 627a08960bdf0..e0500860819cb 100644 --- a/server/src/test/java/org/elasticsearch/indices/cluster/ClusterStateChanges.java +++ b/server/src/test/java/org/elasticsearch/indices/cluster/ClusterStateChanges.java @@ -108,6 +108,7 @@ import static org.hamcrest.Matchers.notNullValue; import static org.junit.Assert.assertThat; import static org.mockito.Matchers.any; +import static org.mockito.Matchers.anyBoolean; import static org.mockito.Matchers.anyList; import static org.mockito.Matchers.anyString; import static org.mockito.Mockito.doAnswer; @@ -161,7 +162,7 @@ public ClusterStateChanges(NamedXContentRegistry xContentRegistry, ThreadPool th // MetaDataCreateIndexService creates indices using its IndicesService instance to check mappings -> fake it here try { @SuppressWarnings("unchecked") final List listeners = anyList(); - when(indicesService.createIndex(any(IndexMetaData.class), listeners)) + when(indicesService.createIndex(any(IndexMetaData.class), listeners, anyBoolean())) .then(invocationOnMock -> { IndexService indexService = mock(IndexService.class); IndexMetaData indexMetaData = (IndexMetaData)invocationOnMock.getArguments()[0]; diff --git a/server/src/test/java/org/elasticsearch/indices/recovery/DanglingIndicesIT.java b/server/src/test/java/org/elasticsearch/indices/recovery/DanglingIndicesIT.java index ba77877f7f67d..5dbb34df24a54 100644 --- a/server/src/test/java/org/elasticsearch/indices/recovery/DanglingIndicesIT.java +++ b/server/src/test/java/org/elasticsearch/indices/recovery/DanglingIndicesIT.java @@ -20,6 +20,7 @@ package org.elasticsearch.indices.recovery; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.indices.IndicesService; import org.elasticsearch.test.ESIntegTestCase; import org.elasticsearch.test.ESIntegTestCase.ClusterScope; import org.elasticsearch.test.InternalTestCluster; @@ -29,16 +30,18 @@ import static org.elasticsearch.cluster.metadata.IndexGraveyard.SETTING_MAX_TOMBSTONES; import static org.elasticsearch.gateway.DanglingIndicesState.AUTO_IMPORT_DANGLING_INDICES_SETTING; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; +import static org.hamcrest.Matchers.equalTo; @ClusterScope(numDataNodes = 0, scope = ESIntegTestCase.Scope.TEST) public class DanglingIndicesIT extends ESIntegTestCase { private static final String INDEX_NAME = "test-idx-1"; - private Settings buildSettings(boolean importDanglingIndices) { + private Settings buildSettings(boolean writeDanglingIndices, boolean importDanglingIndices) { return Settings.builder() // Don't keep any indices in the graveyard, so that when we delete an index, // it's definitely considered to be dangling. .put(SETTING_MAX_TOMBSTONES.getKey(), 0) + .put(IndicesService.WRITE_DANGLING_INDICES_INFO_SETTING.getKey(), writeDanglingIndices) .put(AUTO_IMPORT_DANGLING_INDICES_SETTING.getKey(), importDanglingIndices) .build(); } @@ -48,10 +51,21 @@ private Settings buildSettings(boolean importDanglingIndices) { * the cluster, so long as the recovery setting is enabled. */ public void testDanglingIndicesAreRecoveredWhenSettingIsEnabled() throws Exception { - final Settings settings = buildSettings(true); + final Settings settings = buildSettings(true, true); internalCluster().startNodes(3, settings); createIndex(INDEX_NAME, Settings.builder().put("number_of_replicas", 2).build()); + ensureGreen(INDEX_NAME); + assertBusy(() -> internalCluster().getInstances(IndicesService.class).forEach( + indicesService -> assertTrue(indicesService.allPendingDanglingIndicesWritten()))); + + boolean refreshIntervalChanged = randomBoolean(); + if (refreshIntervalChanged) { + client().admin().indices().prepareUpdateSettings(INDEX_NAME).setSettings( + Settings.builder().put("index.refresh_interval", "42s").build()).get(); + assertBusy(() -> internalCluster().getInstances(IndicesService.class).forEach( + indicesService -> assertTrue(indicesService.allPendingDanglingIndicesWritten()))); + } if (randomBoolean()) { client().admin().indices().prepareClose(INDEX_NAME).get(); @@ -63,12 +77,17 @@ public void testDanglingIndicesAreRecoveredWhenSettingIsEnabled() throws Excepti @Override public Settings onNodeStopped(String nodeName) throws Exception { + ensureClusterSizeConsistency(); assertAcked(client().admin().indices().prepareDelete(INDEX_NAME)); return super.onNodeStopped(nodeName); } }); assertBusy(() -> assertTrue("Expected dangling index " + INDEX_NAME + " to be recovered", indexExists(INDEX_NAME))); + if (refreshIntervalChanged) { + assertThat(client().admin().indices().prepareGetSettings(INDEX_NAME).get().getSetting(INDEX_NAME, "index.refresh_interval"), + equalTo("42s")); + } ensureGreen(INDEX_NAME); } @@ -77,15 +96,49 @@ public Settings onNodeStopped(String nodeName) throws Exception { * the cluster when the recovery setting is disabled. */ public void testDanglingIndicesAreNotRecoveredWhenSettingIsDisabled() throws Exception { - internalCluster().startNodes(3, buildSettings(false)); + internalCluster().startNodes(3, buildSettings(true, false)); + + createIndex(INDEX_NAME, Settings.builder().put("number_of_replicas", 2).build()); + ensureGreen(INDEX_NAME); + assertBusy(() -> internalCluster().getInstances(IndicesService.class).forEach( + indicesService -> assertTrue(indicesService.allPendingDanglingIndicesWritten()))); + + // Restart node, deleting the index in its absence, so that there is a dangling index to recover + internalCluster().restartRandomDataNode(new InternalTestCluster.RestartCallback() { + + @Override + public Settings onNodeStopped(String nodeName) throws Exception { + ensureClusterSizeConsistency(); + assertAcked(client().admin().indices().prepareDelete(INDEX_NAME)); + return super.onNodeStopped(nodeName); + } + }); + + // Since index recovery is async, we can't prove index recovery will never occur, just that it doesn't occur within some reasonable + // amount of time + assertFalse( + "Did not expect dangling index " + INDEX_NAME + " to be recovered", + waitUntil(() -> indexExists(INDEX_NAME), 1, TimeUnit.SECONDS) + ); + } + + /** + * Check that when dangling indices are not written, then they cannot be recovered into the cluster. + */ + public void testDanglingIndicesAreNotRecoveredWhenNotWritten() throws Exception { + internalCluster().startNodes(3, buildSettings(false, true)); createIndex(INDEX_NAME, Settings.builder().put("number_of_replicas", 2).build()); + ensureGreen(INDEX_NAME); + internalCluster().getInstances(IndicesService.class).forEach( + indicesService -> assertTrue(indicesService.allPendingDanglingIndicesWritten())); // Restart node, deleting the index in its absence, so that there is a dangling index to recover internalCluster().restartRandomDataNode(new InternalTestCluster.RestartCallback() { @Override public Settings onNodeStopped(String nodeName) throws Exception { + ensureClusterSizeConsistency(); assertAcked(client().admin().indices().prepareDelete(INDEX_NAME)); return super.onNodeStopped(nodeName); } diff --git a/test/framework/src/main/java/org/elasticsearch/cluster/coordination/AbstractCoordinatorTestCase.java b/test/framework/src/main/java/org/elasticsearch/cluster/coordination/AbstractCoordinatorTestCase.java index e5b64b2c4034f..1563b5fc32df8 100644 --- a/test/framework/src/main/java/org/elasticsearch/cluster/coordination/AbstractCoordinatorTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/cluster/coordination/AbstractCoordinatorTestCase.java @@ -35,7 +35,6 @@ import org.elasticsearch.cluster.coordination.CoordinationMetaData.VotingConfiguration; import org.elasticsearch.cluster.coordination.LinearizabilityChecker.History; import org.elasticsearch.cluster.coordination.LinearizabilityChecker.SequentialSpec; -import org.elasticsearch.cluster.metadata.Manifest; import org.elasticsearch.cluster.metadata.MetaData; import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.node.DiscoveryNodeRole; @@ -57,14 +56,15 @@ import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.transport.TransportAddress; import org.elasticsearch.common.unit.TimeValue; +import org.elasticsearch.common.util.BigArrays; import org.elasticsearch.common.util.concurrent.PrioritizedEsThreadPoolExecutor; import org.elasticsearch.discovery.DiscoveryModule; import org.elasticsearch.discovery.SeedHostsProvider; import org.elasticsearch.env.NodeEnvironment; import org.elasticsearch.gateway.ClusterStateUpdaters; import org.elasticsearch.gateway.GatewayService; -import org.elasticsearch.gateway.MetaStateService; import org.elasticsearch.gateway.MockGatewayMetaState; +import org.elasticsearch.gateway.PersistedClusterStateService; import org.elasticsearch.node.Node; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.test.disruption.DisruptableMockTransport; @@ -741,17 +741,15 @@ class MockPersistedState implements CoordinationState.PersistedState { try { if (oldState.nodeEnvironment != null) { nodeEnvironment = oldState.nodeEnvironment; - final MetaStateService metaStateService = new MetaStateService(nodeEnvironment, xContentRegistry()); final MetaData updatedMetaData = adaptGlobalMetaData.apply(oldState.getLastAcceptedState().metaData()); - if (updatedMetaData != oldState.getLastAcceptedState().metaData()) { - metaStateService.writeGlobalStateAndUpdateManifest("update global state", updatedMetaData); - } final long updatedTerm = adaptCurrentTerm.apply(oldState.getCurrentTerm()); - if (updatedTerm != oldState.getCurrentTerm()) { - final Manifest manifest = metaStateService.loadManifestOrEmpty(); - metaStateService.writeManifestAndCleanup("update term", - new Manifest(updatedTerm, manifest.getClusterStateVersion(), manifest.getGlobalGeneration(), - manifest.getIndexGenerations())); + if (updatedMetaData != oldState.getLastAcceptedState().metaData() || updatedTerm != oldState.getCurrentTerm()) { + try (PersistedClusterStateService.Writer writer = + new PersistedClusterStateService(nodeEnvironment, xContentRegistry(), BigArrays.NON_RECYCLING_INSTANCE) + .createWriter()) { + writer.writeFullStateAndCommit(updatedTerm, + ClusterState.builder(oldState.getLastAcceptedState()).metaData(updatedMetaData).build()); + } } final MockGatewayMetaState gatewayMetaState = new MockGatewayMetaState(newLocalNode); gatewayMetaState.start(Settings.EMPTY, nodeEnvironment, xContentRegistry()); @@ -854,6 +852,11 @@ public void setLastAcceptedState(ClusterState clusterState) { @Override public void close() { assertTrue(openPersistedStates.remove(this)); + try { + delegate.close(); + } catch (IOException e) { + throw new AssertionError("unexpected", e); + } } } diff --git a/test/framework/src/main/java/org/elasticsearch/gateway/MockGatewayMetaState.java b/test/framework/src/main/java/org/elasticsearch/gateway/MockGatewayMetaState.java index b73a90b428485..25f43e28ca88c 100644 --- a/test/framework/src/main/java/org/elasticsearch/gateway/MockGatewayMetaState.java +++ b/test/framework/src/main/java/org/elasticsearch/gateway/MockGatewayMetaState.java @@ -20,17 +20,23 @@ package org.elasticsearch.gateway; import org.elasticsearch.cluster.ClusterState; +import org.elasticsearch.cluster.metadata.Manifest; +import org.elasticsearch.cluster.metadata.MetaData; import org.elasticsearch.cluster.metadata.MetaDataIndexUpgradeService; import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.service.ClusterService; +import org.elasticsearch.common.collect.Tuple; import org.elasticsearch.common.settings.ClusterSettings; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.util.BigArrays; import org.elasticsearch.common.xcontent.NamedXContentRegistry; import org.elasticsearch.env.NodeEnvironment; import org.elasticsearch.plugins.MetaDataUpgrader; import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.transport.TransportService; +import java.io.IOException; + import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; @@ -48,9 +54,10 @@ public MockGatewayMetaState(DiscoveryNode localNode) { } @Override - void upgradeMetaData(Settings settings, MetaStateService metaStateService, MetaDataIndexUpgradeService metaDataIndexUpgradeService, - MetaDataUpgrader metaDataUpgrader) { + MetaData upgradeMetaDataForNode(MetaData metaData, MetaDataIndexUpgradeService metaDataIndexUpgradeService, + MetaDataUpgrader metaDataUpgrader) { // MetaData upgrade is tested in GatewayMetaStateTests, we override this method to NOP to make mocking easier + return metaData; } @Override @@ -65,7 +72,13 @@ public void start(Settings settings, NodeEnvironment nodeEnvironment, NamedXCont final ClusterService clusterService = mock(ClusterService.class); when(clusterService.getClusterSettings()) .thenReturn(new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS)); - start(settings, transportService, clusterService, new MetaStateService(nodeEnvironment, xContentRegistry), - null, null); + final MetaStateService metaStateService = mock(MetaStateService.class); + try { + when(metaStateService.loadFullState()).thenReturn(new Tuple<>(Manifest.empty(), MetaData.builder().build())); + } catch (IOException e) { + throw new AssertionError(e); + } + start(settings, transportService, clusterService, metaStateService, + null, null, new PersistedClusterStateService(nodeEnvironment, xContentRegistry, BigArrays.NON_RECYCLING_INSTANCE)); } } diff --git a/test/framework/src/main/java/org/elasticsearch/test/InternalTestCluster.java b/test/framework/src/main/java/org/elasticsearch/test/InternalTestCluster.java index 9841f060de8b2..f4d022ee7f427 100644 --- a/test/framework/src/main/java/org/elasticsearch/test/InternalTestCluster.java +++ b/test/framework/src/main/java/org/elasticsearch/test/InternalTestCluster.java @@ -73,6 +73,7 @@ import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.common.util.PageCacheRecycler; import org.elasticsearch.common.util.concurrent.EsExecutors; +import org.elasticsearch.common.util.concurrent.FutureUtils; import org.elasticsearch.common.util.concurrent.ThreadContext; import org.elasticsearch.common.util.set.Sets; import org.elasticsearch.core.internal.io.IOUtils; @@ -1701,7 +1702,9 @@ private synchronized void startAndPublishNodesAndClients(List nod } catch (InterruptedException e) { throw new AssertionError("interrupted while starting nodes", e); } catch (ExecutionException e) { - throw new RuntimeException("failed to start nodes", e); + RuntimeException re = FutureUtils.rethrowExecutionException(e); + re.addSuppressed(new RuntimeException("failed to start nodes")); + throw re; } nodeAndClients.forEach(this::publishNode);