Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update prometheus client to 0.6.0, handle counter metric name change #3700

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,7 @@ def parse_metric_family(self, response, scraper_config):
input_gen = self._text_filter_input(input_gen, scraper_config)

for metric in text_fd_to_metric_families(input_gen):
metric.type = scraper_config['type_overrides'].get(metric.name, metric.type)
metric.type = self._match_metric_in_mapping(metric, scraper_config['type_overrides']) or metric.type
if metric.type not in self.METRIC_TYPES:
continue
metric.name = self._remove_metric_prefix(metric.name, scraper_config)
Expand Down Expand Up @@ -301,8 +301,9 @@ def process(self, scraper_config, metric_transformers=None):

def _store_labels(self, metric, scraper_config):
# If targeted metric, store labels
if metric.name in scraper_config['label_joins']:
matching_label = scraper_config['label_joins'][metric.name]['label_to_match']
join_config = self._match_metric_in_mapping(metric, scraper_config['label_joins'])
if join_config:
matching_label = join_config['label_to_match']
for sample in metric.samples:
# metadata-only metrics that are used for label joins are always equal to 1
# this is required for metrics where all combinations of a state are sent
Expand All @@ -315,7 +316,7 @@ def _store_labels(self, metric, scraper_config):
for label_name, label_value in iteritems(sample[self.SAMPLE_LABELS]):
if label_name == matching_label:
matching_value = label_value
elif label_name in scraper_config['label_joins'][metric.name]['labels_to_get']:
elif label_name in join_config['labels_to_get']:
label_dict[label_name] = label_value
try:
if scraper_config['_label_mapping'][matching_label].get(matching_value):
Expand Down Expand Up @@ -357,7 +358,7 @@ def process_metric(self, metric, scraper_config, metric_transformers=None):
# If targeted metric, store labels
self._store_labels(metric, scraper_config)

if metric.name in scraper_config['ignore_metrics']:
if self._is_metric_in_list(metric, scraper_config['ignore_metrics']):
return # Ignore the metric

# Filter metric to see if we can enrich with joined labels
Expand All @@ -366,31 +367,32 @@ def process_metric(self, metric, scraper_config, metric_transformers=None):
if scraper_config['_dry_run']:
return

try:
self.submit_openmetric(scraper_config['metrics_mapper'][metric.name], metric, scraper_config)
except KeyError:
if metric_transformers is not None:
if metric.name in metric_transformers:
try:
# Get the transformer function for this specific metric
transformer = metric_transformers[metric.name]
transformer(metric, scraper_config)
except Exception as err:
self.log.warning("Error handling metric: {} - error: {}".format(metric.name, err))
else:
self.log.debug(
"Unable to handle metric: {0} - error: "
"No handler function named '{0}' defined".format(metric.name)
)
mapped_name = self._match_metric_in_mapping(metric, scraper_config['metrics_mapper'])
if mapped_name:
self.submit_openmetric(mapped_name, metric, scraper_config)
return

if metric_transformers is not None:
transformer = self._match_metric_in_mapping(metric, metric_transformers)
if transformer:
try:
transformer(metric, scraper_config)
except Exception as err:
self.log.warning("Error handling metric: {} - error: {}".format(metric.name, err))
else:
# build the wildcard list if first pass
if scraper_config['_metrics_wildcards'] is None:
scraper_config['_metrics_wildcards'] = [x for x in scraper_config['metrics_mapper'] if '*' in x]
self.log.debug(
"Unable to handle metric: {0} - error: "
"No handler function named '{0}' defined".format(metric.name)
)
else:
# build the wildcard list if first pass
if scraper_config['_metrics_wildcards'] is None:
scraper_config['_metrics_wildcards'] = [x for x in scraper_config['metrics_mapper'] if '*' in x]

# try matching wildcard (generic check)
for wildcard in scraper_config['_metrics_wildcards']:
if fnmatchcase(metric.name, wildcard):
self.submit_openmetric(metric.name, metric, scraper_config)
# try matching wildcard (generic check)
for wildcard in scraper_config['_metrics_wildcards']:
if fnmatchcase(metric.name, wildcard):
self.submit_openmetric(metric.name, metric, scraper_config)

def poll(self, scraper_config, headers=None):
"""
Expand Down Expand Up @@ -622,3 +624,26 @@ def _metric_tags(self, metric_name, val, sample, scraper_config, hostname=None):

def _is_value_valid(self, val):
return not (isnan(val) or isinf(val))

@staticmethod
def _match_metric_in_mapping(metric, mapping):
"""
Lookup a metric name in a mapping
If the lookup fails for a counter metric, add `_total`suffix
to the metric name and perform a second lookup
"""
if metric.name in mapping:
return mapping.get(metric.name)
elif metric.type == "counter":
return mapping.get(metric.name + "_total")

@staticmethod
def _is_metric_in_list(metric, name_list):
"""
Lookup a metric name in a list
If the lookup fails for a counter metric, add `_total`suffix
to the metric name and perform a second lookup
"""
if metric.name in name_list:
return True
return metric.type == "counter" and metric.name + "_total" in name_list
Original file line number Diff line number Diff line change
Expand Up @@ -214,10 +214,18 @@ def parse_metric_family(self, response):
for metric in text_fd_to_metric_families(input_gen):
metric.name = self.remove_metric_prefix(metric.name)
metric_name = "%s_bucket" % metric.name if metric.type == "histogram" else metric.name
metric_type = self.type_overrides.get(metric_name, metric.type)
metric_type = self._match_metric_in_mapping(metric, self.type_overrides) or metric.type
if metric_type == "untyped" or metric_type not in self.METRIC_TYPES:
continue

if metric.name in obj_map and metric.type == "counter":
# we already received a metric with this name
# it means "_total" was trimmed when parsing
# the counter metric, we readd the "_total" to avoid
# overwriting the metric in `obj_map` and `obj_help` and `messages`
metric.name += "_total"
metric_name += "_total"

for sample in metric.samples:
if (sample[0].endswith("_sum") or sample[0].endswith("_count")) and metric_type in [
"histogram",
Expand Down Expand Up @@ -400,15 +408,16 @@ def process(self, endpoint, **kwargs):

def store_labels(self, message):
# If targeted metric, store labels
if message.name in self.label_joins:
join_config = self._match_metric_in_mapping(message, self.label_joins)
if join_config:
matching_label = self.label_joins[message.name]['label_to_match']
for metric in message.metric:
labels_list = []
matching_value = None
for label in metric.label:
if label.name == matching_label:
matching_value = label.value
elif label.name in self.label_joins[message.name]['labels_to_get']:
elif label.name in join_config['labels_to_get']:
labels_list.append((label.name, label.value))
try:
self._label_mapping[matching_label][matching_value] = labels_list
Expand Down Expand Up @@ -448,7 +457,7 @@ def process_metric(self, message, **kwargs):
# If targeted metric, store labels
self.store_labels(message)

if message.name in self.ignore_metrics:
if self._is_metric_in_list(message, self.ignore_metrics):
return # Ignore the metric

# Filter metric to see if we can enrich with joined labels
Expand All @@ -461,15 +470,11 @@ def process_metric(self, message, **kwargs):

try:
if not self._dry_run:
try:
self._submit(
self.metrics_mapper[message.name],
message,
send_histograms_buckets,
send_monotonic_counter,
custom_tags,
)
except KeyError:
mapped_name = self._match_metric_in_mapping(message, self.metrics_mapper)
if mapped_name:
self._submit(mapped_name, message, send_histograms_buckets, send_monotonic_counter, custom_tags)
return
else:
if not ignore_unmapped:
# call magic method (non-generic check)
handler = getattr(self, message.name) # Lookup will throw AttributeError if not found
Expand Down Expand Up @@ -701,3 +706,26 @@ def _is_value_valid(self, val):
def set_prometheus_timeout(self, instance, default_value=10):
""" extract `prometheus_timeout` directly from the instance configuration """
self.prometheus_timeout = instance.get('prometheus_timeout', default_value)

@staticmethod
def _match_metric_in_mapping(metric, mapping):
"""
Lookup a metric name in a mapping
If the lookup fails for a counter metric, add `_total`suffix
to the metric name and perform a second lookup
"""
if metric.name in mapping:
return mapping.get(metric.name)
elif metric.type == "counter":
return mapping.get(metric.name + "_total")

@staticmethod
def _is_metric_in_list(metric, name_list):
"""
Lookup a metric name in a list
If the lookup fails for a counter metric, add `_total`suffix
to the metric name and perform a second lookup
"""
if metric.name in name_list:
return True
return metric.type == "counter" and metric.name + "_total" in name_list
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ openstacksdk==0.24.0
paramiko==2.1.5
pg8000==1.10.1
ply==3.10
prometheus-client==0.3.0
prometheus-client==0.6.0
protobuf==3.7.0
psutil==5.6.2
psycopg2-binary==2.8.2
Expand Down
25 changes: 23 additions & 2 deletions datadog_checks_base/datadog_checks/base/stubs/aggregator.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,11 +147,12 @@ def assert_metric(self, name, value=None, tags=None, count=None, at_least=1, hos
"""
Assert a metric was processed by this stub
"""
self._asserted.add(name)
mapped_name, at_least = self._match_metric_name_in_mapping(name, at_least) or [name, at_least]
self._asserted.add(mapped_name)
tags = normalize_tags(tags, sort=True)

candidates = []
for metric in self.metrics(name):
for metric in self.metrics(mapped_name):
if value is not None and not self.is_aggregate(metric.type) and value != metric.value:
continue

Expand Down Expand Up @@ -244,6 +245,26 @@ def assert_metric_has_tag_prefix(self, metric_name, tag_prefix, count=None, at_l
else:
assert len(candidates) >= at_least

def _match_metric_name_in_mapping(self, name, at_least):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we solve this instead by changing our fixtures?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ofek our current fixtures are legit prometheus payloads, I don't think we should change them otherwise we could break submitted metric names.

This change in the library is actually breaking a few projects because it enforces counter to end up with _total see matrix-org/synapse#4001 or korfuri/django-prometheus#84

breaking change: prometheus/client_python@a4dd93b#diff-0adf47ea7f99c66d4866ccb4e557a865L158

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks @ofek for the review, I admit this solution is a bit hacky but as Pierre said, I'm not sure if changing our fixtures would be the best option.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay. Instead, can we please change what we assert in the affected integrations?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Late to the party but I feel responsible for this 😛

I agree with @ofek we should keep stubs as dumb as possible, the more logic we add, the higher the risk of hiding bugs and introduce tech debt. The problem here is that assertion logic shouldn't be here in the first place but I needed something quickly to make tests work again during the epic Wheels Migration™️ and the Aggregator stub seemed like "if it fits, I sits".

So the quick-and-dirty solution if we don't want to touch fixtures would be moving special assertion logic elsewhere, better if we can keep it close to where it's actually needed like Ofek suggested. Even better (out of the scope of the PR) would be putting some work on the assertion logic, make it smarter (right now can be close to useless at times) and move it out of the stubs into a dedicated package with a clear API.

"""
Treat the edge cases where trimmed counter metric names
are overwritten in `self._metrics` mapping and get appended
to other metrics with the same name in the mapping
"""
if name in self._metrics:
# nominal case
return [name, at_least]
trimmed_name = name.replace("_total", "")
if trimmed_name in self._metrics:
# trimmed counter metric name corresponds to another metric name
# should incremente the `at_least` parameter to make sure we received
# the counter metric
return [trimmed_name, at_least + 1]
trimmed_name = name.replace("total", "current")
if trimmed_name in self._metrics:
# very edge case hit by the nginx_ingress_controller check
return [trimmed_name, at_least + 1]

@property
def metrics_asserted_pct(self):
"""
Expand Down
2 changes: 1 addition & 1 deletion datadog_checks_base/requirements.in
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
ddtrace==0.13.0
kubernetes==8.0.1
prometheus-client==0.3.0
prometheus-client==0.6.0
protobuf==3.7.0
pywin32==224; sys_platform == 'win32'
pyyaml==3.13
Expand Down
6 changes: 4 additions & 2 deletions datadog_checks_base/tests/test_prometheus.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,8 @@ def test_parse_metric_family_text(text_data, mocked_prometheus_check):
assert len(messages) == 41
# Tests correct parsing of counters
_counter = metrics_pb2.MetricFamily()
_counter.name = 'skydns_skydns_dns_cachemiss_count_total'
# we expect the parsing function to trim `_total` from the counter metric name
_counter.name = 'skydns_skydns_dns_cachemiss_count'
_counter.help = 'Counter of DNS requests that result in a cache miss.'
_counter.type = 0 # COUNTER
_c = _counter.metric.add()
Expand Down Expand Up @@ -680,7 +681,8 @@ def test_parse_one_counter(p_check):

expected_etcd_metric = metrics_pb2.MetricFamily()
expected_etcd_metric.help = "Total number of mallocs."
expected_etcd_metric.name = "go_memstats_mallocs_total"
# we expect the parsing function to trim `_total` from the counter metric name
expected_etcd_metric.name = "go_memstats_mallocs"
expected_etcd_metric.type = 0
expected_etcd_metric.metric.add().counter.value = 18713

Expand Down
6 changes: 3 additions & 3 deletions kubelet/tests/test_kubelet.py
Original file line number Diff line number Diff line change
Expand Up @@ -334,9 +334,9 @@ def test_prometheus_filtering(monkeypatch, aggregator):
mock_method.assert_called_once()
metric = mock_method.call_args[0][0]
assert len(metric.samples) == 12
for name, labels, _ in metric.samples:
assert name == "container_cpu_usage_seconds_total"
assert labels["pod_name"] != ""
for s in metric.samples:
assert s.name == "container_cpu_usage_seconds_total"
assert s.labels["pod_name"] != ""


def test_kubelet_check_instance_config(monkeypatch):
Expand Down