Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Prometheus: expose memory breakdown metrics #11746

Merged
merged 2 commits into from
Jul 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@

-define(METRICS_RAW, [

%%% Those are global, i.e. they contain no reference to queue/vhost/channel
%% Global metrics, as in, they contain no references to queues, virtual hosts or channel
{connection_churn_metrics, [
{2, undefined, connections_opened_total, counter, "Total number of connections opened"},
{3, undefined, connections_closed_total, counter, "Total number of connections closed or terminated"},
Expand Down Expand Up @@ -86,6 +86,35 @@
{2, ?MILLISECOND, erlang_uptime_seconds, gauge, "Node uptime", uptime}
]},

{node_memory, [
{2, undefined, memory_code_module_bytes, gauge, "Code module memory footprint", code},
{2, undefined, memory_client_connection_reader_bytes, gauge, "Client connection reader processes footprint in bytes", connection_readers},
{2, undefined, memory_client_connection_writer_bytes, gauge, "Client connection writer processes footprint in bytes", connection_writers},
{2, undefined, memory_client_connection_channel_bytes, gauge, "Client connection channel processes footprint in bytes", connection_channels},
{2, undefined, memory_client_connection_other_bytes, gauge, "Client connection other processes footprint in bytes", connection_other},
{2, undefined, memory_classic_queue_erlang_process_bytes, gauge, "Classic queue processes footprint in bytes", queue_procs},
{2, undefined, memory_quorum_queue_erlang_process_bytes, gauge, "Quorum queue processes footprint in bytes", quorum_queue_procs},
{2, undefined, memory_quorum_queue_dlx_erlang_process_bytes, gauge, "Quorum queue DLX worker processes footprint in bytes", quorum_queue_dlx_procs},
{2, undefined, memory_stream_erlang_process_bytes, gauge, "Stream processes footprint in bytes", stream_queue_procs},
{2, undefined, memory_stream_replica_reader_erlang_process_bytes, gauge, "Stream replica reader processes footprint in bytes", stream_queue_replica_reader_procs},
{2, undefined, memory_stream_coordinator_erlang_process_bytes, gauge, "Stream coordinator processes footprint in bytes", stream_queue_coordinator_procs},
{2, undefined, memory_plugin_bytes, gauge, "Total plugin footprint in bytes", plugins},
{2, undefined, memory_modern_metadata_store_bytes, gauge, "Modern metadata store footprint in bytes", metadata_store},
{2, undefined, memory_other_erlang_process_bytes, gauge, "Other processes footprint in bytes", other_proc},
{2, undefined, memory_metrics_bytes, gauge, "Metric table footprint in bytes", metrics},
{2, undefined, memory_management_stats_db_bytes, gauge, "Management stats database footprint in bytes", mgmt_db},
{2, undefined, memory_classic_metadata_store_bytes, gauge, "Classic metadata store footprint in bytes", mnesia},
{2, undefined, memory_quorum_queue_ets_table_bytes, gauge, "Quorum queue ETS tables footprint in bytes", quorum_ets},
{2, undefined, memory_modern_metadata_store_ets_table_bytes, gauge, "Modern metadata store ETS tables footprint in bytes", metadata_store_ets},
{2, undefined, memory_other_ets_table_bytes, gauge, "Other ETS tables footprint in bytes", other_ets},
{2, undefined, memory_binary_heap_bytes, gauge, "Binary heap size in bytes", binary},
{2, undefined, memory_message_index_bytes, gauge, "Message index footprint in bytes", msg_index},
{2, undefined, memory_atom_table_bytes, gauge, "Atom table size in bytes", atom},
{2, undefined, memory_other_system_bytes, gauge, "Other runtime footprint in bytes", other_system},
{2, undefined, memory_runtime_allocated_unused_bytes, gauge, "Runtime allocated but unused blocks size in bytes", allocated_unused},
{2, undefined, memory_runtime_reserved_unallocated_bytes, gauge, "Runtime reserved but unallocated blocks size in bytes", reserved_unallocated}
]},

{node_persister_metrics, [
{2, undefined, io_read_ops_total, counter, "Total number of I/O read operations", io_read_count},
{2, undefined, io_read_bytes_total, counter, "Total number of I/O bytes read", io_read_bytes},
Expand Down Expand Up @@ -127,7 +156,7 @@
{4, undefined, auth_attempts_detailed_failed_total, counter, "Total number of failed authentication attempts with source info"}
]},

%%% Those metrics have reference only to a queue name. This is the only group where filtering (e.g. by vhost) makes sense.
%%% These metrics only reference a queue name. This is the only group where filtering (e.g. by vhost) makes sense.
{queue_coarse_metrics, [
{2, undefined, queue_messages_ready, gauge, "Messages ready to be delivered to consumers"},
{3, undefined, queue_messages_unacked, gauge, "Messages delivered to consumers but not yet acknowledged"},
Expand Down Expand Up @@ -601,6 +630,38 @@ get_data(vhost_status, _, _) ->
false -> 0
end}
|| VHost <- rabbit_vhost:list() ];
get_data(node_memory, _, _) ->
BreakdownPL = rabbit_vm:memory(),
KeysOfInterest = [
code,
connection_readers,
connection_writers,
connection_channels,
connection_other,
queue_procs,
quorum_queue_procs,
quorum_queue_dlx_procs,
stream_queue_procs,
stream_queue_replica_reader_procs,
stream_queue_coordinator_procs,
plugins,
metadata_store,
other_proc,
metrics,
mgmt_db,
mnesia,
quorum_ets,
metadata_store_ets,
other_ets,
binary,
msg_index,
atom,
other_system,
allocated_unused,
reserved_unallocated
],
Data = maps:to_list(maps:with(KeysOfInterest, maps:from_list(BreakdownPL))),
[{node_memory, Data}];
get_data(exchange_bindings, _, _) ->
Exchanges = lists:foldl(fun
(#exchange{internal = true}, Acc) ->
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -381,6 +381,10 @@ aggregated_metrics_test(Config) ->
?assertEqual(match, re:run(Body, "^rabbitmq_queue_consumers ", [{capture, none}, multiline])),
?assertEqual(match, re:run(Body, "TYPE rabbitmq_auth_attempts_total", [{capture, none}, multiline])),
?assertEqual(nomatch, re:run(Body, "TYPE rabbitmq_auth_attempts_detailed_total", [{capture, none}, multiline])),
%% Memory breakdown
?assertEqual(match, re:run(Body, "^rabbitmq_memory_quorum_queue_erlang_process_bytes ", [{capture, none}, multiline])),
?assertEqual(match, re:run(Body, "^rabbitmq_memory_classic_queue_erlang_process_bytes ", [{capture, none}, multiline])),
?assertEqual(match, re:run(Body, "^rabbitmq_memory_binary_heap_bytes ", [{capture, none}, multiline])),
%% Check the first metric value in each ETS table that requires converting
?assertEqual(match, re:run(Body, "^rabbitmq_erlang_uptime_seconds ", [{capture, none}, multiline])),
?assertEqual(match, re:run(Body, "^rabbitmq_io_read_time_seconds_total ", [{capture, none}, multiline])),
Expand Down
Loading