From 3c32f367468539ca5ea9178bb5b009a3f4e52222 Mon Sep 17 00:00:00 2001 From: Nail Islamov Date: Wed, 23 Sep 2020 11:51:24 +1000 Subject: [PATCH] Prometheus Receiver: Print a more informative message about 'up' metric value (#1826) * Prometheus Receiver: Cleanup special handling of the 'up' metric * gofmt --- .../internal/metricsbuilder.go | 38 +++++++++---------- .../internal/transaction.go | 11 ------ 2 files changed, 17 insertions(+), 32 deletions(-) diff --git a/receiver/prometheusreceiver/internal/metricsbuilder.go b/receiver/prometheusreceiver/internal/metricsbuilder.go index 7bb109d9bd6..274b3db75ea 100644 --- a/receiver/prometheusreceiver/internal/metricsbuilder.go +++ b/receiver/prometheusreceiver/internal/metricsbuilder.go @@ -30,16 +30,11 @@ import ( ) const ( - metricsSuffixCount = "_count" - metricsSuffixBucket = "_bucket" - metricsSuffixSum = "_sum" - startTimeMetricName = "process_start_time_seconds" - scrapeLatencyMetricName = "scrape_duration_seconds" - scrapeStatusMetricName = "up" - scrapeStatusOk = "200" - // The 'up' metric only reports whether or not the scrape succeeded - in the case that - // it fails, we set the status to '404', which is the most generic failure status. - scrapeStatusErr = "404" + metricsSuffixCount = "_count" + metricsSuffixBucket = "_bucket" + metricsSuffixSum = "_sum" + startTimeMetricName = "process_start_time_seconds" + scrapeUpMetricName = "up" ) var ( @@ -61,8 +56,6 @@ type metricBuilder struct { useStartTimeMetric bool startTimeMetricRegex *regexp.Regexp startTime float64 - scrapeLatencyMs float64 - scrapeStatus string logger *zap.Logger currentMf MetricFamily } @@ -106,16 +99,19 @@ func (b *metricBuilder) AddDataPoint(ls labels.Labels, t int64, v float64) error b.hasInternalMetric = true lm := ls.Map() delete(lm, model.MetricNameLabel) - switch metricName { - case scrapeStatusMetricName: - if v == 1.0 { - b.scrapeStatus = scrapeStatusOk + // See https://www.prometheus.io/docs/concepts/jobs_instances/#automatically-generated-labels-and-time-series + // up: 1 if the instance is healthy, i.e. reachable, or 0 if the scrape failed. + if metricName == scrapeUpMetricName && v != 1.0 { + if v == 0.0 { + b.logger.Warn("Failed to scrape Prometheus endpoint", + zap.Int64("scrape_timestamp", t), + zap.String("target_labels", fmt.Sprintf("%v", lm))) } else { - b.scrapeStatus = scrapeStatusErr - b.logger.Warn("http client error", zap.Int64("timestamp", t), zap.Float64("value", v), zap.String("labels", fmt.Sprintf("%v", lm))) + b.logger.Warn("The 'up' metric contains invalid value", + zap.Float64("value", v), + zap.Int64("scrape_timestamp", t), + zap.String("target_labels", fmt.Sprintf("%v", lm))) } - case scrapeLatencyMetricName: - b.scrapeLatencyMs = v * 1000 } return nil case b.useStartTimeMetric && b.matchStartTimeMetric(metricName): @@ -302,7 +298,7 @@ func timestampFromMs(timeAtMs int64) *timestamppb.Timestamp { } func isInternalMetric(metricName string) bool { - if metricName == "up" || strings.HasPrefix(metricName, "scrape_") { + if metricName == scrapeUpMetricName || strings.HasPrefix(metricName, "scrape_") { return true } return false diff --git a/receiver/prometheusreceiver/internal/transaction.go b/receiver/prometheusreceiver/internal/transaction.go index 1ca72b0e51a..487f671cc7a 100644 --- a/receiver/prometheusreceiver/internal/transaction.go +++ b/receiver/prometheusreceiver/internal/transaction.go @@ -27,9 +27,6 @@ import ( "github.com/prometheus/common/model" "github.com/prometheus/prometheus/pkg/labels" "github.com/prometheus/prometheus/storage" - "go.opencensus.io/plugin/ochttp" - "go.opencensus.io/stats" - "go.opencensus.io/tag" "go.uber.org/zap" "google.golang.org/protobuf/types/known/timestamppb" @@ -159,14 +156,6 @@ func (tr *transaction) Commit() error { return err } - if tr.metricBuilder.hasInternalMetric { - m := ochttp.ClientRoundtripLatency.M(tr.metricBuilder.scrapeLatencyMs) - stats.RecordWithTags(tr.ctx, []tag.Mutator{ - tag.Upsert(ochttp.KeyClientStatus, tr.metricBuilder.scrapeStatus), - }, m) - - } - if tr.useStartTimeMetric { // AdjustStartTime - startTime has to be non-zero in this case. if tr.metricBuilder.startTime == 0.0 {