Skip to content

Commit

Permalink
[#414] Show uptime in tserver status page
Browse files Browse the repository at this point in the history
Summary:
Show uptime of individual servers in tserver status page. We use the
tserver metrics sent in the heartbeat to send the tserver uptime to the
master.

Test Plan: localhost:7000/tablet-servers

Reviewers: kannan, mikhail, timur, bogdan, sergei

Reviewed By: sergei

Subscribers: sergei, bogdan, ybase, bharat

Differential Revision: https://phabricator.dev.yugabyte.com/D5383
  • Loading branch information
rven1 committed Sep 7, 2018
1 parent c6538a8 commit 2e5579a
Show file tree
Hide file tree
Showing 6 changed files with 68 additions and 9 deletions.
28 changes: 27 additions & 1 deletion src/yb/master/master-path-handlers.cc
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,31 @@ inline void MasterPathHandlers::TServerTable(std::stringstream* output) {
<< " </tr>\n";
}

namespace {

constexpr int kHoursPerDay = 24;
constexpr int kSecondsPerMinute = 60;
constexpr int kMinutesPerHour = 60;
constexpr int kSecondsPerHour = kSecondsPerMinute * kMinutesPerHour;
constexpr int kMinutesPerDay = kMinutesPerHour * kHoursPerDay;
constexpr int kSecondsPerDay = kSecondsPerHour * kHoursPerDay;

string UptimeString(uint64_t seconds) {
int days = seconds / kSecondsPerDay;
int hours = (seconds / kSecondsPerHour) - (days * kHoursPerDay);
int mins = (seconds / kSecondsPerMinute) - (days * kMinutesPerDay) - (hours * kMinutesPerHour);

std::ostringstream uptime_string_stream;
uptime_string_stream << " Uptime: ";
if (days > 0) {
uptime_string_stream << days << "days, ";
}
uptime_string_stream << hours << ":" << std::setw(2) << std::setfill('0') << mins;
return uptime_string_stream.str();
}

} // anonymous namespace

void MasterPathHandlers::TServerDisplay(const std::string& current_uuid,
std::vector<std::shared_ptr<TSDescriptor>>* descs,
std::stringstream* output) {
Expand All @@ -190,7 +215,8 @@ void MasterPathHandlers::TServerDisplay(const std::string& current_uuid,
*output << " <td>" << RegistrationToHtml(reg.common(), host_port) << "</td>";
*output << " <td>" << time_since_hb << "</td>";
if (master_->ts_manager()->IsTSLive(desc)) {
*output << " <td style=\"color:Green\">" << kTserverAlive << "</td>";
*output << " <td style=\"color:Green\">" << kTserverAlive << ":" <<
UptimeString(desc->uptime_seconds()) << "</td>";
} else {
*output << " <td style=\"color:Red\">" << kTserverDead << "</td>";
}
Expand Down
1 change: 1 addition & 0 deletions src/yb/master/master.proto
Original file line number Diff line number Diff line change
Expand Up @@ -458,6 +458,7 @@ message TServerMetricsPB {
optional double read_ops_per_sec = 3;
optional double write_ops_per_sec = 4;
optional int64 uncompressed_sst_file_size = 5;
optional uint64 uptime_seconds = 6;
}

// Heartbeat sent from the tablet-server to the master
Expand Down
8 changes: 1 addition & 7 deletions src/yb/master/master_service.cc
Original file line number Diff line number Diff line change
Expand Up @@ -165,13 +165,7 @@ void MasterServiceImpl::TSHeartbeat(const TSHeartbeatRequestPB* req,

// Set the TServer metrics in TS Descriptor.
if (req->has_metrics()) {
if (req->metrics().has_total_ram_usage()) {
ts_desc->set_total_memory_usage(req->metrics().total_ram_usage());
}
ts_desc->set_total_sst_file_size(req->metrics().total_sst_file_size());
ts_desc->set_uncompressed_sst_file_size(req->metrics().uncompressed_sst_file_size());
ts_desc->set_write_ops_per_sec(req->metrics().write_ops_per_sec());
ts_desc->set_read_ops_per_sec(req->metrics().read_ops_per_sec());
ts_desc->UpdateMetrics(req->metrics());
}

if (req->has_tablet_report()) {
Expand Down
10 changes: 10 additions & 0 deletions src/yb/master/ts_descriptor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,16 @@ bool TSDescriptor::IsAcceptingLeaderLoad(const ReplicationInfoPB& replication_in
return true;
}

void TSDescriptor::UpdateMetrics(const TServerMetricsPB& metrics) {
std::lock_guard<simple_spinlock> l(lock_);
tsMetrics_.total_memory_usage = metrics.total_ram_usage();
tsMetrics_.total_sst_file_size = metrics.total_sst_file_size();
tsMetrics_.uncompressed_sst_file_size = metrics.uncompressed_sst_file_size();
tsMetrics_.read_ops_per_sec = metrics.read_ops_per_sec();
tsMetrics_.write_ops_per_sec = metrics.write_ops_per_sec();
tsMetrics_.uptime_seconds = metrics.uptime_seconds();
}

bool TSDescriptor::HasTabletDeletePending() const {
std::lock_guard<simple_spinlock> l(lock_);
return !tablets_pending_delete_.empty();
Expand Down
12 changes: 12 additions & 0 deletions src/yb/master/ts_descriptor.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
#include <string>

#include "yb/gutil/gscoped_ptr.h"
#include "yb/master/master.pb.h"
#include "yb/tserver/tserver_service.proxy.h"
#include "yb/util/locks.h"
#include "yb/util/monotime.h"
Expand All @@ -61,6 +62,7 @@ namespace master {
class TSRegistrationPB;
class TSInformationPB;
class ReplicationInfoPB;
class TServerMetricsPB;

typedef util::SharedPtrTuple<tserver::TabletServerAdminServiceProxy,
tserver::TabletServerServiceProxy,
Expand Down Expand Up @@ -218,6 +220,13 @@ class TSDescriptor {
return tsMetrics_.write_ops_per_sec;
}

uint64_t uptime_seconds() {
std::lock_guard<simple_spinlock> l(lock_);
return tsMetrics_.uptime_seconds;
}

void UpdateMetrics(const TServerMetricsPB& metrics);

void ClearMetrics() {
tsMetrics_.ClearMetrics();
}
Expand Down Expand Up @@ -275,12 +284,15 @@ class TSDescriptor {

double write_ops_per_sec = 0;

uint64_t uptime_seconds = 0;

void ClearMetrics() {
total_memory_usage = 0;
total_sst_file_size = 0;
uncompressed_sst_file_size = 0;
read_ops_per_sec = 0;
write_ops_per_sec = 0;
uptime_seconds = 0;
}
};

Expand Down
18 changes: 17 additions & 1 deletion src/yb/tserver/heartbeater.cc
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ class Heartbeater::Thread {
CHECKED_STATUS SetupRegistration(master::TSRegistrationPB* reg);
void SetupCommonField(master::TSToMasterCommonPB* common);
bool IsCurrentThread() const;
uint64_t CalculateUptime();

server::MasterAddressesPtr get_master_addresses() {
std::lock_guard<std::mutex> l(master_addresses_mtx_);
Expand Down Expand Up @@ -182,6 +183,8 @@ class Heartbeater::Thread {
uint64_t prev_reads_;
uint64_t prev_writes_;

MonoTime start_time_;

DISALLOW_COPY_AND_ASSIGN(Thread);
};

Expand Down Expand Up @@ -220,7 +223,8 @@ Heartbeater::Thread::Thread(const TabletServerOptions& opts, TabletServer* serve
tserver_metrics_interval_sec_(5),
prev_tserver_metrics_submission_(MonoTime::Now()),
prev_reads_(0),
prev_writes_(0) {
prev_writes_(0),
start_time_(MonoTime::Now()) {
CHECK_NOTNULL(master_addresses_.get());
CHECK(!master_addresses_->empty());
VLOG(1) << "Initializing heartbeater thread with master addresses: "
Expand Down Expand Up @@ -335,6 +339,13 @@ int Heartbeater::Thread::GetMillisUntilNextHeartbeat() const {
return FLAGS_heartbeat_interval_ms;
}

// Calculate Uptime
uint64_t Heartbeater::Thread::CalculateUptime() {
MonoDelta delta = MonoTime::Now().GetDeltaSince(start_time_);
uint64_t uptime_seconds = static_cast<uint64_t>(delta.ToSeconds());
return uptime_seconds;
}

Status Heartbeater::Thread::TryHeartbeat() {
master::TSHeartbeatRequestPB req;

Expand Down Expand Up @@ -411,11 +422,16 @@ Status Heartbeater::Thread::TryHeartbeat() {
prev_writes_ = num_writes;
req.mutable_metrics()->set_read_ops_per_sec(rops_per_sec);
req.mutable_metrics()->set_write_ops_per_sec(wops_per_sec);
uint64_t uptime_seconds = CalculateUptime();

req.mutable_metrics()->set_uptime_seconds(uptime_seconds);

prev_tserver_metrics_submission_ = MonoTime::Now();

VLOG(4) << "Read Ops per second: " << rops_per_sec;
VLOG(4) << "Write Ops per second: " << wops_per_sec;
VLOG(4) << "Total SST File Sizes: "<< total_file_sizes;
VLOG(4) << "Uptime seconds: "<< uptime_seconds;
}

RpcController rpc;
Expand Down

0 comments on commit 2e5579a

Please sign in to comment.