Skip to content

Commit

Permalink
[#23578] YSQL: Add HELP and TYPE to :13000/prometheus-metrics
Browse files Browse the repository at this point in the history
Summary:
Similar to D25254, adds the following metadata to the YSQL Prometheus metrics endpoint for each metric:
  - #HELP: A brief description of the metric
  - #TYPE: Type of the metric, in this case either gauge or counter.

Gauge metrics can increase or decrease: for example, the number of YSQL connections
is a gauge metric.

Counters can only increase: for example, the number of select statements executed is
a counter metric.

By default, HELP and TYPE metadata are always shown. If a user wants to get the metrics without the metadata, they can use the `?show_help=false` URL parameter.

Connection manager metric descriptions are taken from D27240.

Also, move `ParseRequestOptions` out of the anonymous namespace in `default-path-handlers.cc` so that the pgsql webserver can use it.
Jira: DB-12496

Test Plan:
```
./yb_build.sh --cxx-test pgwrapper_pg_libpq-test --gtest_filter PgLibPqTest.CatalogCacheIdMissMetricsTest
./yb_build.sh --cxx-test pgwrapper_pg_libpq-test --gtest_filter PgLibPqTest.PrometheusMetricsHelpAndTypeTest
```

Reviewers: yyan, myang

Reviewed By: myang

Subscribers: svc_phabricator, esheng, yql, ybase

Differential Revision: https://phorge.dev.yugabyte.com/D37766
  • Loading branch information
kai-franz committed Sep 11, 2024
1 parent ffa537e commit 5ae4558
Show file tree
Hide file tree
Showing 7 changed files with 290 additions and 144 deletions.
84 changes: 84 additions & 0 deletions src/postgres/yb-extensions/yb_pg_metrics/yb_pg_metrics.c
Original file line number Diff line number Diff line change
Expand Up @@ -313,7 +313,11 @@ void
set_metric_names(void)
{
for (int i = 0; i < kMaxStatementType; i++)
{
ybpgm_table[i].table_name[0] = '\0';
ybpgm_table[i].count_help[0] = '\0';
ybpgm_table[i].sum_help[0] = '\0';
}

strcpy(ybpgm_table[Select].name, YSQL_METRIC_PREFIX "SelectStmt");
strcpy(ybpgm_table[Insert].name, YSQL_METRIC_PREFIX "InsertStmt");
Expand Down Expand Up @@ -351,6 +355,86 @@ set_metric_names(void)
snprintf(ybpgm_table[i].table_name, YB_PG_METRIC_NAME_LEN, "%s",
table_name);
}

strcpy(ybpgm_table[Select].count_help,
"Number of SELECT statements that have been executed");
strcpy(ybpgm_table[Select].sum_help,
"Total time spent executing SELECT statements");

strcpy(ybpgm_table[Insert].count_help,
"Number of INSERT statements that have been executed");
strcpy(ybpgm_table[Insert].sum_help,
"Total time spent executing INSERT statements");

strcpy(ybpgm_table[Delete].count_help,
"Number of DELETE statements that have been executed");
strcpy(ybpgm_table[Delete].sum_help,
"Total time spent executing DELETE statements");

strcpy(ybpgm_table[Update].count_help,
"Number of UPDATE statements that have been executed");
strcpy(ybpgm_table[Update].sum_help,
"Total time spent executing UPDATE statements");

strcpy(ybpgm_table[Begin].count_help,
"Number of BEGIN statements that have been executed");
strcpy(ybpgm_table[Begin].sum_help,
"Total time spent executing BEGIN statements");

strcpy(ybpgm_table[Commit].count_help,
"Number of COMMIT statements that have been executed");
strcpy(ybpgm_table[Commit].sum_help,
"Total time spent executing COMMIT statements");

strcpy(ybpgm_table[Rollback].count_help,
"Number of ROLLBACK statements that have been executed");
strcpy(ybpgm_table[Rollback].sum_help,
"Total time spent executing ROLLBACK statements");

strcpy(ybpgm_table[Other].count_help,
"Number of other statements that have been executed");
strcpy(ybpgm_table[Other].sum_help,
"Total time spent executing other statements");

strcpy(ybpgm_table[Single_Shard_Transaction].count_help,
"Number of single shard transactions that have been executed (deprecated)");
strcpy(ybpgm_table[Single_Shard_Transaction].sum_help,
"Total time spent executing single shard transactions (deprecated)");

strcpy(ybpgm_table[SingleShardTransaction].count_help,
"Number of single shard transactions that have been executed");
strcpy(ybpgm_table[SingleShardTransaction].sum_help,
"Total time spent executing single shard transactions");

strcpy(ybpgm_table[Transaction].count_help,
"Number of transactions that have been executed");
strcpy(ybpgm_table[Transaction].sum_help,
"Total time spent executing transactions");

strcpy(ybpgm_table[AggregatePushdown].count_help,
"Number of aggregate pushdowns");
strcpy(ybpgm_table[AggregatePushdown].sum_help,
"Total time spent executing aggregate pushdowns");

strcpy(ybpgm_table[CatCacheMisses].count_help,
"Total number of catalog cache misses");
strcpy(ybpgm_table[CatCacheMisses].sum_help, "Not applicable");

for (int i = CatCacheIdMisses_Start; i <= CatCacheIdMisses_End; ++i)
{
snprintf(ybpgm_table[i].count_help, YB_PG_METRIC_NAME_LEN,
"Number of catalog cache misses for index %s",
ybpgm_table[i].table_name);
strcpy(ybpgm_table[i].sum_help, "Not applicable");
}

for (int i = CatCacheTableMisses_Start; i <= CatCacheTableMisses_End; ++i)
{
snprintf(ybpgm_table[i].count_help, YB_PG_METRIC_NAME_LEN,
"Number of catalog cache misses for table %s",
ybpgm_table[i].table_name);
strcpy(ybpgm_table[i].sum_help, "Not applicable");
}
}

/*
Expand Down
179 changes: 87 additions & 92 deletions src/yb/server/default-path-handlers.cc
Original file line number Diff line number Diff line change
Expand Up @@ -452,96 +452,6 @@ void SetParsedValue(Value* v, const Result<Value>& result) {
}
}

static void ParseRequestOptions(const Webserver::WebRequest& req,
MetricPrometheusOptions *prometheus_opts,
MetricJsonOptions *json_opts = nullptr,
JsonWriter::Mode *json_mode = nullptr) {
auto ParseMetricOptions = [](const Webserver::WebRequest& req,
MetricOptions *metric_opts) {
if (const string* metrics_p = FindOrNull(req.parsed_args, "metrics")) {
metric_opts->general_metrics_allowlist = SplitStringUsing(*metrics_p, ",");
}

string arg = FindWithDefault(req.parsed_args, "reset_histograms", "true");
metric_opts->reset_histograms = ParseLeadingBoolValue(arg.c_str(), true);

arg = FindWithDefault(req.parsed_args, "level", "debug");
SetParsedValue(&metric_opts->level, MetricLevelFromName(arg));
};

string arg;
if (json_opts) {
ParseMetricOptions(req, json_opts);

arg = FindWithDefault(req.parsed_args, "include_raw_histograms", "false");
json_opts->include_raw_histograms = ParseLeadingBoolValue(arg.c_str(), false);

arg = FindWithDefault(req.parsed_args, "include_schema", "false");
json_opts->include_schema_info = ParseLeadingBoolValue(arg.c_str(), false);
}

if (prometheus_opts) {
ParseMetricOptions(req, prometheus_opts);

if (const std::string* arg_p = FindOrNull(req.parsed_args, "show_help")) {
prometheus_opts->export_help_and_type =
ExportHelpAndType(ParseLeadingBoolValue(arg_p->c_str(), false));
}

if (const std::string* arg_p = FindOrNull(req.parsed_args, "max_metric_entries")) {
try {
if (arg_p->starts_with('-')) {
throw std::invalid_argument("Input value is negative");
}
prometheus_opts->max_metric_entries = static_cast<uint32_t>(std::stoul(*arg_p));
} catch (const std::exception& e) {
LOG(WARNING) << "Prometheus metric endpoint URL parameter max_metric_entries=" << *arg_p
<< ". Failed to convert its value to unsigned 32 bits integer: "
<< e.what();
}
}

prometheus_opts->version = FindWithDefault(req.parsed_args, "version",
kFilterVersionOne);

if (prometheus_opts->version == kFilterVersionTwo) {
// Set it to accept all metrics, because we ignore metrics URL parameter when using v2.
prometheus_opts->general_metrics_allowlist = std::nullopt;

auto FindHandlingAllOrNone = [&](
const std::string& arg, const std::string& default_value) -> std::string {
std::string regex_string = FindWithDefault(req.parsed_args, arg, default_value);
if (regex_string == "ALL") {
return ".*";
} else if (regex_string == "NONE") {
return "";
}
return regex_string;
};

prometheus_opts->table_allowlist_string = FindHandlingAllOrNone("table_allowlist", "ALL");

prometheus_opts->table_blocklist_string = FindHandlingAllOrNone("table_blocklist", "NONE");

prometheus_opts->server_allowlist_string = FindHandlingAllOrNone("server_allowlist", "ALL");

prometheus_opts->server_blocklist_string = FindHandlingAllOrNone("server_blocklist", "NONE");
} else {
prometheus_opts->priority_regex_string = FindWithDefault(
req.parsed_args, "priority_regex", ".*");
LOG_IF(WARNING, prometheus_opts->version != kFilterVersionOne)
<< "Prometheus endpoint URL parameter version=" << prometheus_opts->version
<< " is not recognized. Only v1 or v2 can be accepted.";
}
}

if (json_mode) {
arg = FindWithDefault(req.parsed_args, "compact", "false");
*json_mode =
ParseLeadingBoolValue(arg.c_str(), false) ? JsonWriter::COMPACT : JsonWriter::PRETTY;
}
}

static void WriteMetricsAsJson(const MetricRegistry* const metrics,
const Webserver::WebRequest& req, Webserver::WebResponse* resp) {
MetricJsonOptions opts;
Expand All @@ -557,8 +467,7 @@ static void WriteMetricsForPrometheus(const MetricRegistry* const metrics,
const Webserver::WebRequest& req,
Webserver::WebResponse* resp) {
MetricPrometheusOptions opts;
opts.export_help_and_type =
ExportHelpAndType(GetAtomicFlag(&FLAGS_export_help_and_type_in_prometheus_metrics));
opts.export_help_and_type = ExportHelpAndType(FLAGS_export_help_and_type_in_prometheus_metrics);
opts.max_metric_entries = GetAtomicFlag(&FLAGS_max_prometheus_metric_entries);
ParseRequestOptions(req, &opts);

Expand Down Expand Up @@ -687,6 +596,92 @@ static void ResetStackTraceHandler(const Webserver::WebRequest& req, Webserver::

} // anonymous namespace

void ParseRequestOptions(
const Webserver::WebRequest& req, MetricPrometheusOptions* prometheus_opts,
MetricJsonOptions* json_opts, JsonWriter::Mode* json_mode) {
auto ParseMetricOptions = [](const Webserver::WebRequest& req, MetricOptions* metric_opts) {
if (const string* metrics_p = FindOrNull(req.parsed_args, "metrics")) {
metric_opts->general_metrics_allowlist = SplitStringUsing(*metrics_p, ",");
}

string arg = FindWithDefault(req.parsed_args, "reset_histograms", "true");
metric_opts->reset_histograms = ParseLeadingBoolValue(arg.c_str(), true);

arg = FindWithDefault(req.parsed_args, "level", "debug");
SetParsedValue(&metric_opts->level, MetricLevelFromName(arg));
};

string arg;
if (json_opts) {
ParseMetricOptions(req, json_opts);

arg = FindWithDefault(req.parsed_args, "include_raw_histograms", "false");
json_opts->include_raw_histograms = ParseLeadingBoolValue(arg.c_str(), false);

arg = FindWithDefault(req.parsed_args, "include_schema", "false");
json_opts->include_schema_info = ParseLeadingBoolValue(arg.c_str(), false);
}

if (prometheus_opts) {
ParseMetricOptions(req, prometheus_opts);

if (const std::string* arg_p = FindOrNull(req.parsed_args, "show_help")) {
prometheus_opts->export_help_and_type =
ExportHelpAndType(ParseLeadingBoolValue(arg_p->c_str(), false));
}

if (const std::string* arg_p = FindOrNull(req.parsed_args, "max_metric_entries")) {
try {
if (arg_p->starts_with('-')) {
throw std::invalid_argument("Input value is negative");
}
prometheus_opts->max_metric_entries = static_cast<uint32_t>(std::stoul(*arg_p));
} catch (const std::exception& e) {
LOG(WARNING) << "Prometheus metric endpoint URL parameter max_metric_entries=" << *arg_p
<< ". Failed to convert its value to unsigned 32 bits integer: " << e.what();
}
}

prometheus_opts->version = FindWithDefault(req.parsed_args, "version", kFilterVersionOne);

if (prometheus_opts->version == kFilterVersionTwo) {
// Set it to accept all metrics, because we ignore metrics URL parameter when using v2.
prometheus_opts->general_metrics_allowlist = std::nullopt;

auto FindHandlingAllOrNone = [&](const std::string& arg,
const std::string& default_value) -> std::string {
std::string regex_string = FindWithDefault(req.parsed_args, arg, default_value);
if (regex_string == "ALL") {
return ".*";
} else if (regex_string == "NONE") {
return "";
}
return regex_string;
};

prometheus_opts->table_allowlist_string = FindHandlingAllOrNone("table_allowlist", "ALL");

prometheus_opts->table_blocklist_string = FindHandlingAllOrNone("table_blocklist", "NONE");

prometheus_opts->server_allowlist_string = FindHandlingAllOrNone("server_allowlist", "ALL");

prometheus_opts->server_blocklist_string = FindHandlingAllOrNone("server_blocklist", "NONE");
} else {
prometheus_opts->priority_regex_string =
FindWithDefault(req.parsed_args, "priority_regex", ".*");
LOG_IF(WARNING, prometheus_opts->version != kFilterVersionOne)
<< "Prometheus endpoint URL parameter version=" << prometheus_opts->version
<< " is not recognized. Only v1 or v2 can be accepted.";
}
}

if (json_mode) {
arg = FindWithDefault(req.parsed_args, "compact", "false");
*json_mode =
ParseLeadingBoolValue(arg.c_str(), false) ? JsonWriter::COMPACT : JsonWriter::PRETTY;
}
}

// Registered to handle "/memz", and prints out memory allocation statistics.
void MemUsageHandler(const Webserver::WebRequest& req, Webserver::WebResponse* resp) {
std::stringstream *output = &resp->output;
Expand Down
6 changes: 6 additions & 0 deletions src/yb/server/default-path-handlers.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@
#pragma once

#include "yb/server/webserver.h"
#include "yb/util/metric_entity.h"
#include "yb/util/jsonwriter.h"

namespace yb {

Expand All @@ -70,4 +72,8 @@ void RegisterPathUsageHandler(Webserver* webserver, FsManager* fsmanager);

void RegisterTlsHandler(Webserver* webserver, server::RpcServerBase* server);

void ParseRequestOptions(
const Webserver::WebRequest& req, MetricPrometheusOptions* prometheus_opts,
MetricJsonOptions* json_opts = nullptr, JsonWriter::Mode* json_mode = nullptr);

} // namespace yb
5 changes: 3 additions & 2 deletions src/yb/util/metrics_writer.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,13 +40,14 @@ class PrometheusWriter {
// Write to the a single metric entry for non-table level metrics.
template <typename T>
Status WriteSingleEntryNonTable(
const MetricEntity::AttributeMap& attr, const std::string& name, const T& value) {
const MetricEntity::AttributeMap& attr, const std::string& name, const T& value,
const char* type = "unknown", const char* help = "unknown") {
auto it = attr.find("table_id");
if (it != attr.end()) {
return STATUS(
InvalidArgument, "Expect no table_id in attr argument when calling this function.");
}

FlushHelpAndTypeIfRequested(name, type, help);
RETURN_NOT_OK(FlushSingleEntry(attr, name, value));
return Status::OK();
}
Expand Down
Loading

0 comments on commit 5ae4558

Please sign in to comment.