Skip to content

Commit

Permalink
Add extra logging for investigation into #52000 (#52472)
Browse files Browse the repository at this point in the history
It looks like #52000 is caused by a slowdown in cluster state application
(maybe due to #50907) but I would like to understand the details to ensure that
there's nothing else going on here too before simply increasing the timeout.
This commit enables some relevant `DEBUG` loggers and also captures stack
traces from all threads rather than just the three hottest ones.
  • Loading branch information
DaveCTurner authored Feb 18, 2020
1 parent 306d7a0 commit 34f302b
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
import org.elasticsearch.test.ESIntegTestCase.Scope;
import org.elasticsearch.test.InternalTestCluster;
import org.elasticsearch.test.MockLogAppender;
import org.elasticsearch.test.junit.annotations.TestLogging;

import java.nio.file.Path;
import java.util.Arrays;
Expand Down Expand Up @@ -187,6 +188,8 @@ public void testRerouteWithAllocateLocalGateway_enableAllocationSettings() throw
rerouteWithAllocateLocalGateway(commonSettings);
}

@TestLogging(reason = "https://github.com/elastic/elasticsearch/issues/52000",
value = "org.elasticsearch.gateway.PersistedClusterStateService:DEBUG,org.elasticsearch.cluster.service.MasterService:DEBUG")
public void testDelayWithALargeAmountOfShards() throws Exception {
Settings commonSettings = Settings.builder()
.put(ThrottlingAllocationDecider.CLUSTER_ROUTING_ALLOCATION_NODE_CONCURRENT_INCOMING_RECOVERIES_SETTING.getKey(), 1)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -873,8 +873,8 @@ private ClusterHealthStatus ensureColor(ClusterHealthStatus clusterHealthStatus,

ClusterHealthResponse actionGet = client().admin().cluster().health(healthRequest).actionGet();
if (actionGet.isTimedOut()) {
final String hotThreads = client().admin().cluster().prepareNodesHotThreads().setIgnoreIdleThreads(false).get().getNodes()
.stream().map(NodeHotThreads::getHotThreads).collect(Collectors.joining("\n"));
final String hotThreads = client().admin().cluster().prepareNodesHotThreads().setThreads(99999).setIgnoreIdleThreads(false)
.get().getNodes().stream().map(NodeHotThreads::getHotThreads).collect(Collectors.joining("\n"));
logger.info("{} timed out, cluster state:\n{}\npending tasks:\n{}\nhot threads:\n{}\n",
method,
client().admin().cluster().prepareState().get().getState(),
Expand Down

0 comments on commit 34f302b

Please sign in to comment.