Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Prometheus Receiver: Print a more informative message about 'up' metric value #1826

Merged
merged 2 commits into from
Sep 23, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 17 additions & 21 deletions receiver/prometheusreceiver/internal/metricsbuilder.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,16 +30,11 @@ import (
)

const (
metricsSuffixCount = "_count"
metricsSuffixBucket = "_bucket"
metricsSuffixSum = "_sum"
startTimeMetricName = "process_start_time_seconds"
scrapeLatencyMetricName = "scrape_duration_seconds"
scrapeStatusMetricName = "up"
scrapeStatusOk = "200"
// The 'up' metric only reports whether or not the scrape succeeded - in the case that
// it fails, we set the status to '404', which is the most generic failure status.
scrapeStatusErr = "404"
metricsSuffixCount = "_count"
metricsSuffixBucket = "_bucket"
metricsSuffixSum = "_sum"
startTimeMetricName = "process_start_time_seconds"
scrapeUpMetricName = "up"
)

var (
Expand All @@ -61,8 +56,6 @@ type metricBuilder struct {
useStartTimeMetric bool
startTimeMetricRegex *regexp.Regexp
startTime float64
scrapeLatencyMs float64
scrapeStatus string
logger *zap.Logger
currentMf MetricFamily
}
Expand Down Expand Up @@ -106,16 +99,19 @@ func (b *metricBuilder) AddDataPoint(ls labels.Labels, t int64, v float64) error
b.hasInternalMetric = true
lm := ls.Map()
delete(lm, model.MetricNameLabel)
switch metricName {
case scrapeStatusMetricName:
if v == 1.0 {
b.scrapeStatus = scrapeStatusOk
// See https://www.prometheus.io/docs/concepts/jobs_instances/#automatically-generated-labels-and-time-series
// up: 1 if the instance is healthy, i.e. reachable, or 0 if the scrape failed.
if metricName == scrapeUpMetricName && v != 1.0 {
if v == 0.0 {
b.logger.Warn("Failed to scrape Prometheus endpoint",
zap.Int64("scrape_timestamp", t),
zap.String("target_labels", fmt.Sprintf("%v", lm)))
} else {
b.scrapeStatus = scrapeStatusErr
b.logger.Warn("http client error", zap.Int64("timestamp", t), zap.Float64("value", v), zap.String("labels", fmt.Sprintf("%v", lm)))
b.logger.Warn("The 'up' metric contains invalid value",
zap.Float64("value", v),
zap.Int64("scrape_timestamp", t),
zap.String("target_labels", fmt.Sprintf("%v", lm)))
}
case scrapeLatencyMetricName:
b.scrapeLatencyMs = v * 1000
}
return nil
case b.useStartTimeMetric && b.matchStartTimeMetric(metricName):
Expand Down Expand Up @@ -302,7 +298,7 @@ func timestampFromMs(timeAtMs int64) *timestamppb.Timestamp {
}

func isInternalMetric(metricName string) bool {
if metricName == "up" || strings.HasPrefix(metricName, "scrape_") {
if metricName == scrapeUpMetricName || strings.HasPrefix(metricName, "scrape_") {
return true
}
return false
Expand Down
11 changes: 0 additions & 11 deletions receiver/prometheusreceiver/internal/transaction.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,6 @@ import (
"github.com/prometheus/common/model"
"github.com/prometheus/prometheus/pkg/labels"
"github.com/prometheus/prometheus/storage"
"go.opencensus.io/plugin/ochttp"
"go.opencensus.io/stats"
"go.opencensus.io/tag"
"go.uber.org/zap"
"google.golang.org/protobuf/types/known/timestamppb"

Expand Down Expand Up @@ -159,14 +156,6 @@ func (tr *transaction) Commit() error {
return err
}

if tr.metricBuilder.hasInternalMetric {
m := ochttp.ClientRoundtripLatency.M(tr.metricBuilder.scrapeLatencyMs)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removed redundant OC metric and related code since it must be registered via OpenCensus view to be used which we don't do in the collector.

stats.RecordWithTags(tr.ctx, []tag.Mutator{
tag.Upsert(ochttp.KeyClientStatus, tr.metricBuilder.scrapeStatus),
}, m)

}

if tr.useStartTimeMetric {
// AdjustStartTime - startTime has to be non-zero in this case.
if tr.metricBuilder.startTime == 0.0 {
Expand Down