DataDog · ahmed-mez · Apr 30, 2019 · Apr 30, 2019 · Apr 30, 2019 · ofek
@@ -234,7 +234,7 @@ def parse_metric_family(self, response, scraper_config):
             input_gen = self._text_filter_input(input_gen, scraper_config)
 
         for metric in text_fd_to_metric_families(input_gen):
-            metric.type = scraper_config['type_overrides'].get(metric.name, metric.type)
+            metric.type = self._match_metric_in_mapping(metric, scraper_config['type_overrides']) or metric.type
             if metric.type not in self.METRIC_TYPES:
                 continue
             metric.name = self._remove_metric_prefix(metric.name, scraper_config)
@@ -301,8 +301,9 @@ def process(self, scraper_config, metric_transformers=None):
 
     def _store_labels(self, metric, scraper_config):
         # If targeted metric, store labels
-        if metric.name in scraper_config['label_joins']:
-            matching_label = scraper_config['label_joins'][metric.name]['label_to_match']
+        join_config = self._match_metric_in_mapping(metric, scraper_config['label_joins'])
+        if join_config:
+            matching_label = join_config['label_to_match']
             for sample in metric.samples:
                 # metadata-only metrics that are used for label joins are always equal to 1
                 # this is required for metrics where all combinations of a state are sent
@@ -315,7 +316,7 @@ def _store_labels(self, metric, scraper_config):
                 for label_name, label_value in iteritems(sample[self.SAMPLE_LABELS]):
                     if label_name == matching_label:
                         matching_value = label_value
-                    elif label_name in scraper_config['label_joins'][metric.name]['labels_to_get']:
+                    elif label_name in join_config['labels_to_get']:
                         label_dict[label_name] = label_value
                 try:
                     if scraper_config['_label_mapping'][matching_label].get(matching_value):
@@ -357,7 +358,7 @@ def process_metric(self, metric, scraper_config, metric_transformers=None):
         # If targeted metric, store labels
         self._store_labels(metric, scraper_config)
 
-        if metric.name in scraper_config['ignore_metrics']:
+        if self._is_metric_in_list(metric, scraper_config['ignore_metrics']):
             return  # Ignore the metric
 
         # Filter metric to see if we can enrich with joined labels
@@ -366,31 +367,32 @@ def process_metric(self, metric, scraper_config, metric_transformers=None):
         if scraper_config['_dry_run']:
             return
 
-        try:
-            self.submit_openmetric(scraper_config['metrics_mapper'][metric.name], metric, scraper_config)
-        except KeyError:
-            if metric_transformers is not None:
-                if metric.name in metric_transformers:
-                    try:
-                        # Get the transformer function for this specific metric
-                        transformer = metric_transformers[metric.name]
-                        transformer(metric, scraper_config)
-                    except Exception as err:
-                        self.log.warning("Error handling metric: {} - error: {}".format(metric.name, err))
-                else:
-                    self.log.debug(
-                        "Unable to handle metric: {0} - error: "
-                        "No handler function named '{0}' defined".format(metric.name)
-                    )
+        mapped_name = self._match_metric_in_mapping(metric, scraper_config['metrics_mapper'])
+        if mapped_name:
+            self.submit_openmetric(mapped_name, metric, scraper_config)
+            return
+
+        if metric_transformers is not None:
+            transformer = self._match_metric_in_mapping(metric, metric_transformers)
+            if transformer:
+                try:
+                    transformer(metric, scraper_config)
+                except Exception as err:
+                    self.log.warning("Error handling metric: {} - error: {}".format(metric.name, err))
             else:
-                # build the wildcard list if first pass
-                if scraper_config['_metrics_wildcards'] is None:
-                    scraper_config['_metrics_wildcards'] = [x for x in scraper_config['metrics_mapper'] if '*' in x]
+                self.log.debug(
+                    "Unable to handle metric: {0} - error: "
+                    "No handler function named '{0}' defined".format(metric.name)
+                )
+        else:
+            # build the wildcard list if first pass
+            if scraper_config['_metrics_wildcards'] is None:
+                scraper_config['_metrics_wildcards'] = [x for x in scraper_config['metrics_mapper'] if '*' in x]
 
-                # try matching wildcard (generic check)
-                for wildcard in scraper_config['_metrics_wildcards']:
-                    if fnmatchcase(metric.name, wildcard):
-                        self.submit_openmetric(metric.name, metric, scraper_config)
+            # try matching wildcard (generic check)
+            for wildcard in scraper_config['_metrics_wildcards']:
+                if fnmatchcase(metric.name, wildcard):
+                    self.submit_openmetric(metric.name, metric, scraper_config)
 
     def poll(self, scraper_config, headers=None):
         """
@@ -622,3 +624,26 @@ def _metric_tags(self, metric_name, val, sample, scraper_config, hostname=None):
 
     def _is_value_valid(self, val):
         return not (isnan(val) or isinf(val))
+
+    @staticmethod
+    def _match_metric_in_mapping(metric, mapping):
+        """
+        Lookup a metric name in a mapping
+        If the lookup fails for a counter metric, add `_total`suffix
+        to the metric name and perform a second lookup
+        """
+        if metric.name in mapping:
+            return mapping.get(metric.name)
+        elif metric.type == "counter":
+            return mapping.get(metric.name + "_total")
+
+    @staticmethod
+    def _is_metric_in_list(metric, name_list):
+        """
+        Lookup a metric name in a list
+        If the lookup fails for a counter metric, add `_total`suffix
+        to the metric name and perform a second lookup
+        """
+        if metric.name in name_list:
+            return True
+        return metric.type == "counter" and metric.name + "_total" in name_list
@@ -214,10 +214,18 @@ def parse_metric_family(self, response):
             for metric in text_fd_to_metric_families(input_gen):
                 metric.name = self.remove_metric_prefix(metric.name)
                 metric_name = "%s_bucket" % metric.name if metric.type == "histogram" else metric.name
-                metric_type = self.type_overrides.get(metric_name, metric.type)
+                metric_type = self._match_metric_in_mapping(metric, self.type_overrides) or metric.type
                 if metric_type == "untyped" or metric_type not in self.METRIC_TYPES:
                     continue
 
+                if metric.name in obj_map and metric.type == "counter":
+                    # we already received a metric with this name
+                    # it means "_total" was trimmed when parsing
+                    # the counter metric, we readd the "_total" to avoid
+                    # overwriting the metric in `obj_map` and `obj_help` and `messages`
+                    metric.name += "_total"
+                    metric_name += "_total"
+
                 for sample in metric.samples:
                     if (sample[0].endswith("_sum") or sample[0].endswith("_count")) and metric_type in [
                         "histogram",
@@ -400,15 +408,16 @@ def process(self, endpoint, **kwargs):
 
     def store_labels(self, message):
         # If targeted metric, store labels
-        if message.name in self.label_joins:
+        join_config = self._match_metric_in_mapping(message, self.label_joins)
+        if join_config:
             matching_label = self.label_joins[message.name]['label_to_match']
             for metric in message.metric:
                 labels_list = []
                 matching_value = None
                 for label in metric.label:
                     if label.name == matching_label:
                         matching_value = label.value
-                    elif label.name in self.label_joins[message.name]['labels_to_get']:
+                    elif label.name in join_config['labels_to_get']:
                         labels_list.append((label.name, label.value))
                 try:
                     self._label_mapping[matching_label][matching_value] = labels_list
@@ -448,7 +457,7 @@ def process_metric(self, message, **kwargs):
         # If targeted metric, store labels
         self.store_labels(message)
 
-        if message.name in self.ignore_metrics:
+        if self._is_metric_in_list(message, self.ignore_metrics):
             return  # Ignore the metric
 
         # Filter metric to see if we can enrich with joined labels
@@ -461,15 +470,11 @@ def process_metric(self, message, **kwargs):
 
         try:
             if not self._dry_run:
-                try:
-                    self._submit(
-                        self.metrics_mapper[message.name],
-                        message,
-                        send_histograms_buckets,
-                        send_monotonic_counter,
-                        custom_tags,
-                    )
-                except KeyError:
+                mapped_name = self._match_metric_in_mapping(message, self.metrics_mapper)
+                if mapped_name:
+                    self._submit(mapped_name, message, send_histograms_buckets, send_monotonic_counter, custom_tags)
+                    return
+                else:
                     if not ignore_unmapped:
                         # call magic method (non-generic check)
                         handler = getattr(self, message.name)  # Lookup will throw AttributeError if not found
@@ -701,3 +706,26 @@ def _is_value_valid(self, val):
     def set_prometheus_timeout(self, instance, default_value=10):
         """ extract `prometheus_timeout` directly from the instance configuration """
         self.prometheus_timeout = instance.get('prometheus_timeout', default_value)
+
+    @staticmethod
+    def _match_metric_in_mapping(metric, mapping):
+        """
+        Lookup a metric name in a mapping
+        If the lookup fails for a counter metric, add `_total`suffix
+        to the metric name and perform a second lookup
+        """
+        if metric.name in mapping:
+            return mapping.get(metric.name)
+        elif metric.type == "counter":
+            return mapping.get(metric.name + "_total")
+
+    @staticmethod
+    def _is_metric_in_list(metric, name_list):
+        """
+        Lookup a metric name in a list
+        If the lookup fails for a counter metric, add `_total`suffix
+        to the metric name and perform a second lookup
+        """
+        if metric.name in name_list:
+            return True
+        return metric.type == "counter" and metric.name + "_total" in name_list
@@ -28,7 +28,7 @@ openstacksdk==0.24.0
 paramiko==2.1.5
 pg8000==1.10.1
 ply==3.10
-prometheus-client==0.3.0
+prometheus-client==0.6.0
 protobuf==3.7.0
 psutil==5.6.2
 psycopg2-binary==2.8.2

@@ -147,11 +147,12 @@ def assert_metric(self, name, value=None, tags=None, count=None, at_least=1, hos
         """
         Assert a metric was processed by this stub
         """
-        self._asserted.add(name)
+        mapped_name, at_least = self._match_metric_name_in_mapping(name, at_least) or [name, at_least]
+        self._asserted.add(mapped_name)
         tags = normalize_tags(tags, sort=True)
 
         candidates = []
-        for metric in self.metrics(name):
+        for metric in self.metrics(mapped_name):
             if value is not None and not self.is_aggregate(metric.type) and value != metric.value:
                 continue
 
@@ -244,6 +245,26 @@ def assert_metric_has_tag_prefix(self, metric_name, tag_prefix, count=None, at_l
         else:
             assert len(candidates) >= at_least
 
+    def _match_metric_name_in_mapping(self, name, at_least):
+        """
+        Treat the edge cases where trimmed counter metric names
+        are overwritten in `self._metrics` mapping and get appended
+        to other metrics with the same name in the mapping
+        """
+        if name in self._metrics:
+            # nominal case
+            return [name, at_least]
+        trimmed_name = name.replace("_total", "")
+        if trimmed_name in self._metrics:
+            # trimmed counter metric name corresponds to another metric name
+            # should incremente the `at_least` parameter to make sure we received
+            # the counter metric
+            return [trimmed_name, at_least + 1]
+        trimmed_name = name.replace("total", "current")
+        if trimmed_name in self._metrics:
+            # very edge case hit by the nginx_ingress_controller check
+            return [trimmed_name, at_least + 1]
+
     @property
     def metrics_asserted_pct(self):
         """

@@ -1,6 +1,6 @@
 ddtrace==0.13.0
 kubernetes==8.0.1
-prometheus-client==0.3.0
+prometheus-client==0.6.0
 protobuf==3.7.0
 pywin32==224; sys_platform == 'win32'
 pyyaml==3.13

@@ -163,7 +163,8 @@ def test_parse_metric_family_text(text_data, mocked_prometheus_check):
     assert len(messages) == 41
     # Tests correct parsing of counters
     _counter = metrics_pb2.MetricFamily()
-    _counter.name = 'skydns_skydns_dns_cachemiss_count_total'
+    # we expect the parsing function to trim `_total` from the counter metric name
+    _counter.name = 'skydns_skydns_dns_cachemiss_count'
     _counter.help = 'Counter of DNS requests that result in a cache miss.'
     _counter.type = 0  # COUNTER
     _c = _counter.metric.add()
@@ -680,7 +681,8 @@ def test_parse_one_counter(p_check):
 
     expected_etcd_metric = metrics_pb2.MetricFamily()
     expected_etcd_metric.help = "Total number of mallocs."
-    expected_etcd_metric.name = "go_memstats_mallocs_total"
+    # we expect the parsing function to trim `_total` from the counter metric name
+    expected_etcd_metric.name = "go_memstats_mallocs"
     expected_etcd_metric.type = 0
     expected_etcd_metric.metric.add().counter.value = 18713
 

@@ -334,9 +334,9 @@ def test_prometheus_filtering(monkeypatch, aggregator):
         mock_method.assert_called_once()
         metric = mock_method.call_args[0][0]
         assert len(metric.samples) == 12
-        for name, labels, _ in metric.samples:
-            assert name == "container_cpu_usage_seconds_total"
-            assert labels["pod_name"] != ""
+        for s in metric.samples:
+            assert s.name == "container_cpu_usage_seconds_total"
+            assert s.labels["pod_name"] != ""
 
 
 def test_kubelet_check_instance_config(monkeypatch):