Add collect of metrics for buffer cache usage

The pg_buffercache extension provides a pg_buffercache view that give a detailed report the usage of shared buffer: which relation is using it, whether it's dirty and how many backends are pinning this buffer. This patch adds the capacity to collect data from pg_buffercache to provide visibility on shared buffer usage.
DataDog · Jul 3, 2024 · 9035410 · 9035410
1 parent 23f0c89
commit 9035410
Show file tree

Hide file tree

Showing 13 changed files with 110 additions and 3 deletions.
diff --git a/postgres/assets/configuration/spec.yaml b/postgres/assets/configuration/spec.yaml
@@ -256,6 +256,13 @@ files:
         example:
           - application_name
         default: []
+    - name: collect_buffercache_metrics
+      description: |
+        If set to true, collects metrics regarding buffer cache usage from pg_buffercache.
+        pg_buffercache extension must be installed.
+      value:
+        type: boolean
+        example: false
     - name: collect_database_size_metrics
       description: Collect database size metrics.
       value:

diff --git a/postgres/changelog.d/17960.added b/postgres/changelog.d/17960.added
@@ -0,0 +1 @@
+Add collect of metrics for buffer cache usage
diff --git a/postgres/datadog_checks/postgres/config.py b/postgres/datadog_checks/postgres/config.py
@@ -81,6 +81,7 @@ def __init__(self, instance, init_config):
         self.ssl_key = instance.get('ssl_key', None)
         self.ssl_password = instance.get('ssl_password', None)
         self.table_count_limit = instance.get('table_count_limit', TABLE_COUNT_LIMIT)
+        self.collect_buffercache_metrics = is_affirmative(instance.get('collect_buffercache_metrics', False))
         self.collect_function_metrics = is_affirmative(instance.get('collect_function_metrics', False))
         # Default value for `count_metrics` is True for backward compatibility
         self.collect_count_metrics = is_affirmative(instance.get('collect_count_metrics', True))

diff --git a/postgres/datadog_checks/postgres/config_models/defaults.py b/postgres/datadog_checks/postgres/config_models/defaults.py
@@ -28,6 +28,10 @@ def instance_collect_bloat_metrics():
     return False
 
 
+def instance_collect_buffercache_metrics():
+    return False
+
+
 def instance_collect_checksum_metrics():
     return False
 

diff --git a/postgres/datadog_checks/postgres/config_models/instance.py b/postgres/datadog_checks/postgres/config_models/instance.py
@@ -200,6 +200,7 @@ class InstanceConfig(BaseModel):
     azure: Optional[Azure] = None
     collect_activity_metrics: Optional[bool] = None
     collect_bloat_metrics: Optional[bool] = None
+    collect_buffercache_metrics: Optional[bool] = None
     collect_checksum_metrics: Optional[bool] = None
     collect_count_metrics: Optional[bool] = None
     collect_database_size_metrics: Optional[bool] = None

diff --git a/postgres/datadog_checks/postgres/data/conf.yaml.example b/postgres/datadog_checks/postgres/data/conf.yaml.example
@@ -197,6 +197,12 @@ instances:
     # activity_metrics_excluded_aggregations:
     #   - application_name
 
+    ## @param collect_buffercache_metrics - boolean - optional - default: false
+    ## If set to true, collects metrics regarding buffer cache usage from pg_buffercache.
+    ## pg_buffercache extension must be installed.
+    #
+    # collect_buffercache_metrics: false
+
     ## @param collect_database_size_metrics - boolean - optional - default: true
     ## Collect database size metrics.
     #

diff --git a/postgres/datadog_checks/postgres/postgres.py b/postgres/datadog_checks/postgres/postgres.py
@@ -41,6 +41,7 @@
     ANALYZE_PROGRESS_METRICS,
     AWS_RDS_HOSTNAME_SUFFIX,
     AZURE_DEPLOYMENT_TYPE_TO_RESOURCE_TYPE,
+    BUFFERCACHE_METRICS,
     CLUSTER_VACUUM_PROGRESS_METRICS,
     CONNECTION_METRICS,
     COUNT_METRICS,
@@ -301,6 +302,8 @@ def dynamic_queries(self):
                 if self._config.collect_wal_metrics is not False:
                     # collect wal metrics for pg >= 10 only if the user has not explicitly disabled it
                     queries.append(WAL_FILE_METRICS)
+            if self._config.collect_buffercache_metrics:
+                queries.append(BUFFERCACHE_METRICS)
             queries.append(QUERY_PG_REPLICATION_SLOTS)
             queries.append(VACUUM_PROGRESS_METRICS)
             queries.append(STAT_SUBSCRIPTION_METRICS)

diff --git a/postgres/datadog_checks/postgres/relationsmanager.py b/postgres/datadog_checks/postgres/relationsmanager.py
@@ -150,7 +150,7 @@
 }
 
 # We used to rely on pg_stat_user_tables to get tuples and scan metrics
-# However, using this view is inefficient as it groups by aggregation on oid and schema
+# However, using this view is inefficient as it group by aggregation on oid and schema
 # behind the hood, leading to possible temporary bytes being written to handle the sort.
 # To avoid this, we need to directly call the pg_stat_* functions
 QUERY_PG_CLASS = {

diff --git a/postgres/datadog_checks/postgres/util.py b/postgres/datadog_checks/postgres/util.py
@@ -695,6 +695,52 @@ def get_list_chunks(lst, n):
     "max(age(backend_xmin))",
 ]
 
+# pg_buffercache is implemented with a function scan. Thus, the planner doesn't
+# have much reliable estimation on the number of rows returned by pg_buffercache.
+# The function's pgproc.prorows is used and 1000 is used as a default value.
+# On top of that, the function is volatile, preventing possible inlining and
+# optimisation.
+# It is very likely that we have way more buffers than relations: 16GB of shared_buffers
+# will have 2097152 buffers returned by pg_buffercache while pg_class will mostly be
+# around thousands of rows. Therefore, we write the query as a CTE aggregating on reldatabase
+# and relfilenode. Given that the function is volatile, this will force the CTE to be
+# materialized and we should have less or the same cardinality as output as pg_class's
+# rows.
+# This is more efficient than the cte-less version which will rely on a merge join and thus
+# sort the output of pg_buffercache.
+BUFFERCACHE_METRICS = {
+    'name': 'buffercache_metrics',
+    'query': """
+WITH buffer_by_relfilenode AS (
+    SELECT reldatabase, relfilenode,
+        NULLIF(COUNT(CASE WHEN relfilenode IS NOT NULL THEN 1 END), 0) as used,
+        COUNT(CASE WHEN relfilenode IS NULL THEN 1 END) as unused,
+        SUM(usagecount) as sum_usagecount,
+        NULLIF(SUM(isdirty::int), 0) as sum_dirty,
+        NULLIF(SUM(pinning_backends), 0) as sum_pinning
+    FROM pg_buffercache
+    GROUP BY reldatabase, relfilenode
+)
+SELECT COALESCE(d.datname, 'shared'), n.nspname, c.relname,
+       used, unused, sum_usagecount, sum_dirty, sum_pinning
+  FROM buffer_by_relfilenode b
+  LEFT JOIN pg_database d ON b.reldatabase = d.oid
+  LEFT JOIN pg_class c ON b.relfilenode = pg_relation_filenode(c.oid)
+  LEFT JOIN pg_namespace n ON n.oid = c.relnamespace;
+""",
+    'columns': [
+        {'name': 'db', 'type': 'tag'},
+        {'name': 'schema', 'type': 'tag_not_null'},
+        {'name': 'relation', 'type': 'tag_not_null'},
+        {'name': 'used_buffers', 'type': 'gauge'},
+        {'name': 'unused_buffers', 'type': 'gauge'},
+        {'name': 'usage_count', 'type': 'gauge'},
+        {'name': 'dirty_buffers', 'type': 'gauge'},
+        {'name': 'pinning_backends', 'type': 'gauge'},
+    ],
+    'metric_prefix': 'postgresql.buffercache',
+}
+
 # The metrics we retrieve from pg_stat_activity when the postgres version >= 9.6
 ACTIVITY_METRICS_9_6 = [
     "SUM(CASE WHEN xact_start IS NOT NULL THEN 1 ELSE 0 END)",

diff --git a/postgres/metadata.csv b/postgres/metadata.csv
@@ -25,6 +25,11 @@ postgresql.bgwriter.maxwritten_clean,count,,,,The number of times the background
 postgresql.bgwriter.sync_time,count,,millisecond,,The total amount of checkpoint processing time spent synchronizing files to disk.,0,postgres,bgw sync time,
 postgresql.bgwriter.write_time,count,,millisecond,,The total amount of checkpoint processing time spent writing files to disk.,0,postgres,bgw wrt time,
 postgresql.buffer_hit,gauge,,hit,second,"The number of times disk blocks were found in the buffer cache, preventing the need to read from the database. This metric is tagged with db.",1,postgres,buff hit,
+postgresql.buffercache.dirty_buffers,gauge,,buffer,,"Number of dirty shared buffers. pg_buffercache extension needs to be installed. This metric is tagged by db, schema and relation.",0,postgres,buffercache dirty buffers,
+postgresql.buffercache.pinning_backends,gauge,,,,"Number of backends pinning shared buffers. pg_buffercache extension needs to be installed. This metric is tagged by db, schema and relation.",0,postgres,buffercache pinning backends,
+postgresql.buffercache.unused_buffers,gauge,,buffer,,"Number of unused shared buffers. pg_buffercache extension needs to be installed.",0,postgres,buffercache unused buffers,
+postgresql.buffercache.usage_count,gauge,,,,"Sum of shared buffers' usage_count. pg_buffercache extension needs to be installed. This metric is tagged by db, schema and relation.",0,postgres,buffercache usage count,
+postgresql.buffercache.used_buffers,gauge,,buffer,,"Number of shared buffers. pg_buffercache extension needs to be installed. This metric is tagged by db, schema and relation.",0,postgres,buffercache buffers,
 postgresql.checksums.checksum_failures,count,,,,"The number of checksum failures in this database. This metric is tagged with db.",0,postgres,checksums,
 postgresql.checksums.enabled,count,,,,"Whether database checksums are enabled. Value is always 1 and tagged with enabled:true or enabled:false. This metric is tagged with db.",0,postgres,checksums.enabled,
 postgresql.cluster_vacuum.heap_blks_scanned,gauge,,block,,"Number of heap blocks scanned. This counter only advances when the phase is seq scanning heap. Only available with PostgreSQL 12 and newer. This metric is tagged with db, table, command, phase, index.",0,postgres,postgres cluster blk_scanned,

diff --git a/postgres/tests/common.py b/postgres/tests/common.py
@@ -119,10 +119,13 @@
 
 def _iterate_metric_name(query):
     if 'columns' in query:
+        metric_prefix = ''
+        if 'metric_prefix' in query:
+            metric_prefix = f'{query["metric_prefix"]}.'
         for column in query['columns']:
             if column['type'].startswith('tag'):
                 continue
-            yield column['name']
+            yield f'{metric_prefix}{column["name"]}'
     else:
         for metric in query['metrics'].values():
             yield metric[0]

diff --git a/postgres/tests/compose/resources/02_setup.sh b/postgres/tests/compose/resources/02_setup.sh
@@ -12,6 +12,7 @@ EOSQL
 fi
 
 psql -v ON_ERROR_STOP=1 --username "$POSTGRES_USER" "datadog_test" <<-'EOSQL'
+    CREATE EXTENSION pg_buffercache SCHEMA public;
     CREATE EXTENSION pg_stat_statements SCHEMA public;
     GRANT SELECT ON pg_stat_statements TO datadog;
 

diff --git a/postgres/tests/test_pg_integration.py b/postgres/tests/test_pg_integration.py
@@ -14,7 +14,7 @@
 from datadog_checks.base.stubs import datadog_agent
 from datadog_checks.postgres import PostgreSql
 from datadog_checks.postgres.__about__ import __version__
-from datadog_checks.postgres.util import DatabaseHealthCheckError, PartialFormatter, fmt
+from datadog_checks.postgres.util import BUFFERCACHE_METRICS, DatabaseHealthCheckError, PartialFormatter, fmt
 
 from .common import (
     COMMON_METRICS,
@@ -25,6 +25,7 @@
     POSTGRES_VERSION,
     USER_ADMIN,
     _get_expected_tags,
+    _iterate_metric_name,
     assert_metric_at_least,
     check_activity_metrics,
     check_bgw_metrics,
@@ -339,6 +340,34 @@ def test_connections_metrics(aggregator, integration_check, pg_instance):
     aggregator.assert_metric('postgresql.connections', count=1, tags=expected_tags)
 
 
+@requires_over_10
+def test_buffercache_metrics(aggregator, integration_check, pg_instance):
+    pg_instance['collect_buffercache_metrics'] = True
+    check = integration_check(pg_instance)
+
+    with _get_superconn(pg_instance) as conn:
+        with conn.cursor() as cur:
+            # Generate some usage on persons relation
+            cur.execute('select * FROM persons;')
+
+    check.check(pg_instance)
+    base_tags = _get_expected_tags(check, pg_instance)
+
+    # Check specific persons relation
+    persons_tags = base_tags + ['relation:persons', 'db:datadog_test', 'schema:public']
+    metrics_not_emitted_if_zero = ['postgresql.buffercache.pinning_backends', 'postgresql.buffercache.dirty_buffers']
+    for metric in _iterate_metric_name(BUFFERCACHE_METRICS):
+        if metric in metrics_not_emitted_if_zero:
+            aggregator.assert_metric(metric, count=0, tags=persons_tags)
+        else:
+            aggregator.assert_metric(metric, count=1, tags=persons_tags)
+
+    # Check metric reported for unused buffers
+    unused_buffers_tags = base_tags + ['db:shared']
+    unused_metric = 'postgresql.buffercache.unused_buffers'
+    aggregator.assert_metric(unused_metric, count=1, tags=unused_buffers_tags)
+
+
 def test_locks_metrics_no_relations(aggregator, integration_check, pg_instance):
     """
     Since 4.0.0, to prevent tag explosion, lock metrics are not collected anymore unless relations are specified