rabbitmq · michaelklishin · Jul 18, 2024 · Jul 18, 2024 · Jul 18, 2024
diff --git a/deps/rabbitmq_prometheus/src/collectors/prometheus_rabbitmq_core_metrics_collector.erl b/deps/rabbitmq_prometheus/src/collectors/prometheus_rabbitmq_core_metrics_collector.erl
@@ -57,7 +57,7 @@
 
 -define(METRICS_RAW, [
 
-%%% Those are global, i.e. they contain no reference to queue/vhost/channel
+    %% Global metrics, as in, they contain no references to queues, virtual hosts or channel
     {connection_churn_metrics, [
         {2, undefined, connections_opened_total, counter, "Total number of connections opened"},
         {3, undefined, connections_closed_total, counter, "Total number of connections closed or terminated"},
@@ -86,6 +86,35 @@
         {2, ?MILLISECOND, erlang_uptime_seconds, gauge, "Node uptime", uptime}
     ]},
 
+    {node_memory, [
+        {2, undefined, memory_code_module_bytes, gauge, "Code module memory footprint", code},
+        {2, undefined, memory_client_connection_reader_bytes, gauge, "Client connection reader processes footprint in bytes", connection_readers},
+        {2, undefined, memory_client_connection_writer_bytes, gauge, "Client connection writer processes footprint in bytes", connection_writers},
+        {2, undefined, memory_client_connection_channel_bytes, gauge, "Client connection channel processes footprint in bytes", connection_channels},
+        {2, undefined, memory_client_connection_other_bytes, gauge, "Client connection other processes footprint in bytes", connection_other},
+        {2, undefined, memory_classic_queue_erlang_process_bytes, gauge, "Classic queue processes footprint in bytes", queue_procs},
+        {2, undefined, memory_quorum_queue_erlang_process_bytes, gauge, "Quorum queue processes footprint in bytes", quorum_queue_procs},
+        {2, undefined, memory_quorum_queue_dlx_erlang_process_bytes, gauge, "Quorum queue DLX worker processes footprint in bytes", quorum_queue_dlx_procs},
+        {2, undefined, memory_stream_erlang_process_bytes, gauge, "Stream processes footprint in bytes", stream_queue_procs},
+        {2, undefined, memory_stream_replica_reader_erlang_process_bytes, gauge, "Stream replica reader processes footprint in bytes", stream_queue_replica_reader_procs},
+        {2, undefined, memory_stream_coordinator_erlang_process_bytes, gauge, "Stream coordinator processes footprint in bytes", stream_queue_coordinator_procs},
+        {2, undefined, memory_plugin_bytes, gauge, "Total plugin footprint in bytes", plugins},
+        {2, undefined, memory_modern_metadata_store_bytes, gauge, "Modern metadata store footprint in bytes", metadata_store},
+        {2, undefined, memory_other_erlang_process_bytes, gauge, "Other processes footprint in bytes", other_proc},
+        {2, undefined, memory_metrics_bytes, gauge, "Metric table footprint in bytes", metrics},
+        {2, undefined, memory_management_stats_db_bytes, gauge, "Management stats database footprint in bytes", mgmt_db},
+        {2, undefined, memory_classic_metadata_store_bytes, gauge, "Classic metadata store footprint in bytes", mnesia},
+        {2, undefined, memory_quorum_queue_ets_table_bytes, gauge, "Quorum queue ETS tables footprint in bytes", quorum_ets},
+        {2, undefined, memory_modern_metadata_store_ets_table_bytes, gauge, "Modern metadata store ETS tables footprint in bytes", metadata_store_ets},
+        {2, undefined, memory_other_ets_table_bytes, gauge, "Other ETS tables footprint in bytes", other_ets},
+        {2, undefined, memory_binary_heap_bytes, gauge, "Binary heap size in bytes", binary},
+        {2, undefined, memory_message_index_bytes, gauge, "Message index footprint in bytes", msg_index},
+        {2, undefined, memory_atom_table_bytes, gauge, "Atom table size in bytes", atom},
+        {2, undefined, memory_other_system_bytes, gauge, "Other runtime footprint in bytes", other_system},
+        {2, undefined, memory_runtime_allocated_unused_bytes, gauge, "Runtime allocated but unused blocks size in bytes", allocated_unused},
+        {2, undefined, memory_runtime_reserved_unallocated_bytes, gauge, "Runtime reserved but unallocated blocks size in bytes", reserved_unallocated}
+    ]},
+
     {node_persister_metrics, [
         {2, undefined, io_read_ops_total, counter, "Total number of I/O read operations", io_read_count},
         {2, undefined, io_read_bytes_total, counter, "Total number of I/O bytes read", io_read_bytes},
@@ -127,7 +156,7 @@
         {4, undefined, auth_attempts_detailed_failed_total, counter, "Total number of failed authentication attempts with source info"}
     ]},
 
-%%% Those metrics have reference only to a queue name. This is the only group where filtering (e.g. by vhost) makes sense.
+    %%% These metrics only reference a queue name. This is the only group where filtering (e.g. by vhost) makes sense.
     {queue_coarse_metrics, [
         {2, undefined, queue_messages_ready, gauge, "Messages ready to be delivered to consumers"},
         {3, undefined, queue_messages_unacked, gauge, "Messages delivered to consumers but not yet acknowledged"},
@@ -601,6 +630,38 @@ get_data(vhost_status, _, _) ->
             false -> 0
         end}
       || VHost <- rabbit_vhost:list()  ];
+get_data(node_memory, _, _) ->
+    BreakdownPL = rabbit_vm:memory(),
+    KeysOfInterest = [
+        code,
+        connection_readers,
+        connection_writers,
+        connection_channels,
+        connection_other,
+        queue_procs,
+        quorum_queue_procs,
+        quorum_queue_dlx_procs,
+        stream_queue_procs,
+        stream_queue_replica_reader_procs,
+        stream_queue_coordinator_procs,
+        plugins,
+        metadata_store,
+        other_proc,
+        metrics,
+        mgmt_db,
+        mnesia,
+        quorum_ets,
+        metadata_store_ets,
+        other_ets,
+        binary,
+        msg_index,
+        atom,
+        other_system,
+        allocated_unused,
+        reserved_unallocated
+    ],
+    Data = maps:to_list(maps:with(KeysOfInterest, maps:from_list(BreakdownPL))),
+    [{node_memory, Data}];
 get_data(exchange_bindings, _, _) ->
     Exchanges = lists:foldl(fun
                                 (#exchange{internal = true}, Acc) ->

diff --git a/deps/rabbitmq_prometheus/test/rabbit_prometheus_http_SUITE.erl b/deps/rabbitmq_prometheus/test/rabbit_prometheus_http_SUITE.erl
@@ -381,6 +381,10 @@ aggregated_metrics_test(Config) ->
     ?assertEqual(match, re:run(Body, "^rabbitmq_queue_consumers ", [{capture, none}, multiline])),
     ?assertEqual(match, re:run(Body, "TYPE rabbitmq_auth_attempts_total", [{capture, none}, multiline])),
     ?assertEqual(nomatch, re:run(Body, "TYPE rabbitmq_auth_attempts_detailed_total", [{capture, none}, multiline])),
+    %% Memory breakdown
+    ?assertEqual(match, re:run(Body, "^rabbitmq_memory_quorum_queue_erlang_process_bytes ", [{capture, none}, multiline])),
+    ?assertEqual(match, re:run(Body, "^rabbitmq_memory_classic_queue_erlang_process_bytes ", [{capture, none}, multiline])),
+    ?assertEqual(match, re:run(Body, "^rabbitmq_memory_binary_heap_bytes ", [{capture, none}, multiline])),
     %% Check the first metric value in each ETS table that requires converting
     ?assertEqual(match, re:run(Body, "^rabbitmq_erlang_uptime_seconds ", [{capture, none}, multiline])),
     ?assertEqual(match, re:run(Body, "^rabbitmq_io_read_time_seconds_total ", [{capture, none}, multiline])),