Skip to content

Commit

Permalink
[#4992] Add application.conf setting to dump output of cluster_health.py
Browse files Browse the repository at this point in the history
Summary:
Currently, the output of cluster_health.py is not written to the terminal.

This diff adds the flag "yb.health.logOutput" that can be set using runtimeConfig API to determine whether it should log the output of cluster_health.py script or not.

To change the value of "yb.health.logOutput", you should send a PUT request to "[Platform URL]/api/customers/[customer UUID]/runtime_config/[universe UUID]/key/yb.health.logOutput". The body should have either "true" or "false" in plain text. Please note that as API call to runtime_config imposes that you must set "Content-Type" to "text/plain" in the request's header too.

Test Plan:
The flag was set and reset manually and I made sure that it dumps the output of cluster_health.py to the terminal when the flag is true and does not dump it when the flag is false.

Reviewers: sanketh

Reviewed By: sanketh

Subscribers: jenkins-bot, yugaware

Differential Revision: https://phabricator.dev.yugabyte.com/D11787
  • Loading branch information
shahrooz1997 committed Jul 30, 2021
1 parent 7eec117 commit 36a9ed7
Show file tree
Hide file tree
Showing 6 changed files with 44 additions and 19 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -732,10 +732,20 @@ public void checkSingleUniverse(CheckSingleUniverseParams params) {
Provider mainProvider =
Provider.get(UUID.fromString(details.getPrimaryCluster().userIntent.provider));

// Check if it should log the output of the command.
Boolean shouldLogOutput = false; // Default value.
if (runtimeConfigFactory.forUniverse(params.universe).hasPath("yb.health.logOutput")) {
shouldLogOutput =
runtimeConfigFactory.forUniverse(params.universe).getBoolean("yb.health.logOutput");
}

// Call devops and process response.
ShellResponse response =
healthManager.runCommand(
mainProvider, new ArrayList<>(clusterMetadata.values()), potentialStartTime);
mainProvider,
new ArrayList<>(clusterMetadata.values()),
potentialStartTime,
shouldLogOutput);

long durationMs = System.currentTimeMillis() - startMs;
boolean sendMailAlways = (params.shouldSendStatusUpdate || lastCheckHadErrors);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,10 @@ public static class ClusterInfo {
}

public ShellResponse runCommand(
Provider provider, List<ClusterInfo> clusters, Long potentialStartTimeMs) {
Provider provider,
List<ClusterInfo> clusters,
Long potentialStartTimeMs,
Boolean shouldLogOutput) {
List<String> commandArgs = new ArrayList<>();

commandArgs.add(PY_WRAPPER);
Expand All @@ -68,7 +71,7 @@ public ShellResponse runCommand(
HashMap<String, String> extraEnvVars =
provider == null ? new HashMap<>() : new HashMap<>(provider.getConfig());

return shellProcessHandler.run(commandArgs, extraEnvVars, false /*logCmdOutput*/, description);
return shellProcessHandler.run(commandArgs, extraEnvVars, shouldLogOutput, description);
}

@Override
Expand Down
2 changes: 2 additions & 0 deletions managed/src/main/resources/application.common.conf
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,5 @@ play.http.errorHandler = "com.yugabyte.yw.common.YWErrorHandler"
play.modules.enabled += "play.modules.swagger.SwaggerModule"

api.version = "v1"

yb.health.logOutput = false
2 changes: 2 additions & 0 deletions managed/src/main/resources/reference.conf
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ yb {
smtp_connection_timeout_ms = 30000
# Default timeout for sending the mail messages, in msec.
smtp_timeout_ms = 60000
logOutput = false
}

security {
Expand All @@ -140,6 +141,7 @@ runtime_config {
"yb.proxy_endpoint_timeout"
"yb.cloud.enabled" # should be excluded for cloud deployments
"yb.universe_boot_script"
"yb.health.logOutput"
]
excluded_paths = [
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import com.yugabyte.yw.common.PlacementInfoUtil;
import com.yugabyte.yw.common.ShellResponse;
import com.yugabyte.yw.common.alerts.MetricService;
import com.yugabyte.yw.common.config.RuntimeConfigFactory;
import com.yugabyte.yw.common.config.impl.RuntimeConfig;
import com.yugabyte.yw.forms.CustomerRegisterFormData.AlertingData;
import com.yugabyte.yw.forms.UniverseDefinitionTaskParams;
Expand Down Expand Up @@ -96,6 +97,9 @@ public class HealthCheckerTest extends FakeDBApplication {

@Mock Config mockRuntimeConfig;

@Mock RuntimeConfigFactory mockruntimeConfigFactory;
@Mock Config mockConfigUniverseScope;

@Before
public void setUp() {
defaultCustomer = ModelFactory.testCustomer();
Expand All @@ -114,13 +118,16 @@ public void setUp() {
+ "'' } ] }")
.replace("''", "\""));

when(mockHealthManager.runCommand(any(), any(), any())).thenReturn(dummyShellResponse);
when(mockHealthManager.runCommand(any(), any(), any(), any())).thenReturn(dummyShellResponse);

testRegistry = new CollectorRegistry();
report = spy(new HealthCheckerReport());

when(mockRuntimeConfig.getInt("yb.health.max_num_parallel_checks")).thenReturn(11);

when(mockruntimeConfigFactory.forUniverse(any())).thenReturn(mockConfigUniverseScope);
when(mockConfigUniverseScope.hasPath("yb.health.logOutput")).thenReturn(false);

// Finally setup the mocked instance.
healthChecker =
new HealthChecker(
Expand All @@ -132,7 +139,7 @@ public void setUp() {
report,
mockEmailHelper,
metricService,
null,
mockruntimeConfigFactory,
null) {
@Override
RuntimeConfig<Model> getRuntimeConfig() {
Expand Down Expand Up @@ -207,13 +214,13 @@ private Universe setupDisabledAlertsConfig(String email, long disabledUntilSecs)

private void verifyHealthManager(int invocationsCount) {
verify(mockHealthManager, times(invocationsCount))
.runCommand(eq(defaultProvider), any(), eq(0L));
.runCommand(eq(defaultProvider), any(), eq(0L), any());
}

private void verifyK8sHealthManager() {
ArgumentCaptor<List> expectedClusters = ArgumentCaptor.forClass(List.class);
verify(mockHealthManager, times(1))
.runCommand(eq(kubernetesProvider), expectedClusters.capture(), eq(0L));
.runCommand(eq(kubernetesProvider), expectedClusters.capture(), eq(0L), any());
HealthManager.ClusterInfo cluster =
(HealthManager.ClusterInfo) expectedClusters.getValue().get(0);
assertEquals(cluster.namespaceToConfig.get("univ1"), "foo");
Expand Down Expand Up @@ -253,7 +260,7 @@ private void testSingleK8sUniverse(Universe u) {
private void validateNoDevopsCall() {
healthChecker.checkCustomer(defaultCustomer);

verify(mockHealthManager, times(0)).runCommand(any(), any(), any());
verify(mockHealthManager, times(0)).runCommand(any(), any(), any(), any());
}

@Test
Expand Down Expand Up @@ -286,7 +293,7 @@ public void testReportOnlyErrors() {
healthChecker.checkSingleUniverse(
new HealthChecker.CheckSingleUniverseParams(
u, defaultCustomer, false, true, YB_ALERT_TEST_EMAIL));
verify(mockHealthManager, times(1)).runCommand(eq(defaultProvider), any(), eq(0L));
verify(mockHealthManager, times(1)).runCommand(eq(defaultProvider), any(), eq(0L), any());

// Erase stored into DB data to avoid DuplicateKeyException.
HealthCheck.keepOnlyLast(u.universeUUID, 0);
Expand All @@ -296,7 +303,7 @@ public void testReportOnlyErrors() {
healthChecker.checkSingleUniverse(
new HealthChecker.CheckSingleUniverseParams(
u, defaultCustomer, false, false, YB_ALERT_TEST_EMAIL));
verify(mockHealthManager, times(2)).runCommand(eq(defaultProvider), any(), eq(0L));
verify(mockHealthManager, times(2)).runCommand(eq(defaultProvider), any(), eq(0L), any());
}

@Test
Expand Down Expand Up @@ -460,15 +467,15 @@ public void testTimingLogic() {
while (!healthChecker.runningHealthChecks.get(u.universeUUID).isDone()) {}
} catch (Exception ignored) {
}
verify(mockHealthManager, times(1)).runCommand(any(), any(), any());
verify(mockHealthManager, times(1)).runCommand(any(), any(), any(), any());
// If we run right afterwards, none of the timers should be hit again, so total hit with any
// args should still be 1.
healthChecker.checkCustomer(defaultCustomer);
try {
while (!healthChecker.runningHealthChecks.get(u.universeUUID).isDone()) {}
} catch (Exception ignored) {
}
verify(mockHealthManager, times(1)).runCommand(any(), any(), any());
verify(mockHealthManager, times(1)).runCommand(any(), any(), any(), any());
try {
Thread.sleep(waitMs);
} catch (InterruptedException e) {
Expand All @@ -480,7 +487,7 @@ public void testTimingLogic() {
while (!healthChecker.runningHealthChecks.get(u.universeUUID).isDone()) {}
} catch (Exception ignored) {
}
verify(mockHealthManager, times(2)).runCommand(any(), any(), any());
verify(mockHealthManager, times(2)).runCommand(any(), any(), any(), any());
// Another cycle later, we should be running yet another test, but now with status update.
try {
Thread.sleep(waitMs);
Expand All @@ -493,14 +500,15 @@ public void testTimingLogic() {
while (!healthChecker.runningHealthChecks.get(u.universeUUID).isDone()) {}
} catch (Exception ignored) {
}
verify(mockHealthManager, times(3)).runCommand(any(), any(), any());
verify(mockHealthManager, times(3)).runCommand(any(), any(), any(), any());
}

@Test
public void testScriptFailure() {
ShellResponse dummyShellResponseFail = ShellResponse.create(1, "Should error");

when(mockHealthManager.runCommand(any(), any(), any())).thenReturn(dummyShellResponseFail);
when(mockHealthManager.runCommand(any(), any(), any(), any()))
.thenReturn(dummyShellResponseFail);
Universe u = setupUniverse("univ1");
setupAlertingData(null, false, false);
testSingleUniverse(u, null, true, 1);
Expand Down Expand Up @@ -535,7 +543,7 @@ private void testSingleUniverseWithYedisState(boolean enabledYEDIS) {
healthChecker.checkSingleUniverse(
new HealthChecker.CheckSingleUniverseParams(u, defaultCustomer, true, false, null));
ArgumentCaptor<List> expectedClusters = ArgumentCaptor.forClass(List.class);
verify(mockHealthManager, times(1)).runCommand(any(), expectedClusters.capture(), any());
verify(mockHealthManager, times(1)).runCommand(any(), expectedClusters.capture(), any(), any());

HealthManager.ClusterInfo clusterInfo = (ClusterInfo) expectedClusters.getValue().get(0);
assertEquals(enabledYEDIS, clusterInfo.redisPort == 1234);
Expand Down Expand Up @@ -605,7 +613,7 @@ private void mockGoodHealthResponse() {
+ dummyCheck
+ "'' } ] }")
.replace("''", "\""));
when(mockHealthManager.runCommand(any(), any(), any())).thenReturn(dummyShellResponse);
when(mockHealthManager.runCommand(any(), any(), any(), any())).thenReturn(dummyShellResponse);
}

@Test
Expand Down Expand Up @@ -672,7 +680,7 @@ public void testSingleUniverseWithUnprovisionedNodeAlertSent() {

healthChecker.checkSingleUniverse(
new HealthChecker.CheckSingleUniverseParams(u, defaultCustomer, true, false, null));
verify(mockHealthManager, never()).runCommand(any(), any(), any());
verify(mockHealthManager, never()).runCommand(any(), any(), any(), any());

Metric metric =
AssertHelper.assertMetricValue(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ public void testHealthManager() {
startTime,
sendStatus,
reportOnlyErrors);
healthManager.runCommand(provider, ImmutableList.of(cluster), startTime);
healthManager.runCommand(provider, ImmutableList.of(cluster), startTime, false);
HashMap extraEnvVars = new HashMap<>(provider.getConfig());
verify(shellProcessHandler, times(1))
.run(eq(expectedCommand), eq(extraEnvVars), eq(false), anyString());
Expand Down

0 comments on commit 36a9ed7

Please sign in to comment.