From 91e52a9d05a774d894e1f3b0e1db675183cace5f Mon Sep 17 00:00:00 2001 From: Aaron Benton Date: Tue, 21 May 2024 17:15:46 -0400 Subject: [PATCH] Added Support for Alloy Module Imports Resolves #537 --- charts/k8s-monitoring/README.md | 7 + charts/k8s-monitoring/templates/_configs.tpl | 4 + .../alloy_config/_alloy-modules.alloy.txt | 103 + charts/k8s-monitoring/values.schema.json | 11 + charts/k8s-monitoring/values.yaml | 36 + examples/metric-module-imports/README.md | 43 + examples/metric-module-imports/events.alloy | 48 + examples/metric-module-imports/logs.alloy | 155 + examples/metric-module-imports/metrics.alloy | 1001 + examples/metric-module-imports/output.yaml | 51415 ++++++++++++++++ examples/metric-module-imports/profiles.alloy | 0 examples/metric-module-imports/values.yaml | 38 + 12 files changed, 52861 insertions(+) create mode 100644 charts/k8s-monitoring/templates/alloy_config/_alloy-modules.alloy.txt create mode 100644 examples/metric-module-imports/README.md create mode 100644 examples/metric-module-imports/events.alloy create mode 100644 examples/metric-module-imports/logs.alloy create mode 100644 examples/metric-module-imports/metrics.alloy create mode 100644 examples/metric-module-imports/output.yaml create mode 100644 examples/metric-module-imports/profiles.alloy create mode 100644 examples/metric-module-imports/values.yaml diff --git a/charts/k8s-monitoring/README.md b/charts/k8s-monitoring/README.md index cf4b4fb8e..605bf0986 100644 --- a/charts/k8s-monitoring/README.md +++ b/charts/k8s-monitoring/README.md @@ -373,6 +373,13 @@ The Prometheus and Loki services may be hosted on the same cluster, or remotely | metrics.alloy.metricsTuning.useIntegrationAllowList | bool | `false` | Filter the list of metrics from Grafana Alloy to the minimal set required for Kubernetes Monitoring as well as the Grafana Alloy integration. | | metrics.alloy.scrapeInterval | string | 60s | How frequently to scrape metrics from Grafana Alloy. Overrides metrics.scrapeInterval | +### Metrics Job: Alloy Modules + +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| metrics.alloyModules.connections | list | `[]` | List of connection configurations used by modules. Configures the import.git component ([docs](https://grafana.com/docs/alloy/latest/reference/components/import.git/)
- `alias: ""` the alias of the connection
- `repository: ""` URL of the Git repository containing the module.
- `revision: ""` Branch, tag, or commit to be checked out.
- `pull_frequency: 15m` How often the module should check for updates.
- `default: true` If true, this connection is used as the default when none is specified.
- `basic_auth: {}` Credentials for basic authentication if needed. ([docs](https://grafana.com/docs/alloy/latest/reference/config-blocks/import.git/#basic_auth-block))
- `ssh_key: {}` Provides SSH key details for secure connections. ([docs](https://grafana.com/docs/alloy/latest/reference/config-blocks/import.git/#ssh_key-block)) | +| metrics.alloyModules.modules | list | `[]` | List of Modules to import. Each module is expected to have a "kubernetes" module and a "scrape" component. Each module can have the following properties:
- `path: ""` the path to the alloy module
- `connection: ""` (optional) the alias of the connection to use, if not specified the default connection is used
- `targets: {}` (optional) Additional arguments to be passed to the modules kubernetes component
- `scrape: {}` (optional) Additional arguments to be passed to the modules scrape component
- `extraRelabelingRules: ""` additional relabeling rules for the discovery.relabel component
- `extraMetricRelabelingRules:` additional relabeling rules for the prometheus.relabel component | + ### Metrics Job: ApiServer | Key | Type | Default | Description | diff --git a/charts/k8s-monitoring/templates/_configs.tpl b/charts/k8s-monitoring/templates/_configs.tpl index 359093a25..d9c8ee76a 100644 --- a/charts/k8s-monitoring/templates/_configs.tpl +++ b/charts/k8s-monitoring/templates/_configs.tpl @@ -77,6 +77,10 @@ {{- include "alloy.config.service_monitors" . }} {{- end }} + {{- if len .Values.metrics.alloyModules.modules }} + {{- include "alloy.config.alloyMetricModules" . }} + {{- end }} + {{- include "alloy.config.metricsService" . }} {{- end }} diff --git a/charts/k8s-monitoring/templates/alloy_config/_alloy-modules.alloy.txt b/charts/k8s-monitoring/templates/alloy_config/_alloy-modules.alloy.txt new file mode 100644 index 000000000..fbd91a008 --- /dev/null +++ b/charts/k8s-monitoring/templates/alloy_config/_alloy-modules.alloy.txt @@ -0,0 +1,103 @@ +{{ define "alloy.config.alloyMetricModules" }} +// Alloy Modules +{{- $connections := .Values.metrics.alloyModules.connections }} +{{- /* Determine the default connection */}} +{{- $defaultConnection := dict }} +{{- range $connections }} + {{- if .default }} + {{- $defaultConnection = . }} + {{- end }} +{{- end }} +{{- $extraRelabelingRules := .Values.metrics.extraRelabelingRules | default "" }} +{{- $extraMetricRelabelingRules := .Values.metrics.extraMetricRelabelingRules | default "" }} +{{- $defaultMaxCacheSize := .Values.metrics.maxCacheSize }} +{{- $defaultScrapeInterval := .Values.metrics.scrapeInterval }} +{{- $clustering := .Values.alloy.alloy.clustering.enabled }} + +{{- range $module := .Values.metrics.alloyModules.modules }} + {{- $moduleName := $module.alias }} + {{- /* Find the connection for the module or use the default */}} + {{- $moduleConnection := $defaultConnection }} + {{- range $conn := $connections }} + {{- if and $module.connection (eq $conn.alias $module.connection) }} + {{- $moduleConnection = $conn }} + {{- end }} + {{- end }} + +// {{ $moduleName }}: import the module +import.git "module_{{ $moduleName }}" { + repository = "{{ $moduleConnection.repository }}" + revision = "{{ $moduleConnection.revision }}" + path = "{{ $module.path }}" + pull_frequency = "{{ $moduleConnection.pull_frequency }}" + + {{- with $moduleConnection.basic_auth }} + basic_auth { + {{ if .username }}username = "{{ .username }}"{{ end }} + {{ if .password }}password = "{{ .password }}"{{ end }} + {{ if .password_file }}password_file = "{{ .password_file }}"{{ end }} + } + {{- end }} + + {{- with $moduleConnection.ssh_key }} + ssh_key { + {{ if .username }}username = "{{ .username }}"{{ end }} + {{ if .key }}key = "{{ .key }}"{{ end }} + {{ if .key_file }}key_file = "{{ .key_file }}"{{ end }} + {{ if .passphrase }}passphrase = "{{ .passphrase }}"{{ end }} + } + {{- end }} +} + +// {{ $moduleName }}: call the targets component +module_{{ $moduleName }}.kubernetes "targets" { + {{- if $module.targets }} + {{- range $key, $value := $module.targets }} + {{ $key }} = {{ if kindIs "slice" $value }}[{{ range $index, $item := $value }}"{{ $item }}"{{ if lt $index (sub (len $value) 1) }}, {{ end }}{{ end }}]{{ else }}"{{ $value }}"{{ end }} + {{- end }} + {{- end }} +} + +// {{ $moduleName }}: additional service discovery relabelings +discovery.relabel "module_{{ $moduleName }}" { + targets = module_{{ $moduleName }}.kubernetes.targets.output + // global discovery relabelings + {{- if $extraRelabelingRules }} + {{ $extraRelabelingRules | indent 2 }} + {{- end }} + // {{ $moduleName }}: additional discovery relabelings + {{- if $module.extraRelabelingRules }} + {{ $module.extraRelabelingRules | indent 2 }} + {{- end }} +} + +// {{ $moduleName }}: call the scrape component +module_{{ $moduleName }}.scrape "metrics" { + targets = discovery.relabel.module_{{ $moduleName }}.output + forward_to = [prometheus.relabel.module_{{ $moduleName }}.receiver] + clustering = {{ $clustering }} + scrape_interval = "{{ if and (hasKey $module "scrape") (hasKey $module.scrape "scrape_interval") }}{{ index $module.scrape "scrape_interval" }}{{- else }}{{ $defaultScrapeInterval }}{{- end }}" + {{- if $module.scrape }} + {{- range $key, $value := $module.scrape }} + {{- if not (eq $key "scrape_interval") }} + {{ $key }} = {{ if kindIs "slice" $value }}[{{ range $index, $item := $value }}"{{ $item }}"{{ if lt $index (sub (len $value) 1) }}, {{ end }}{{ end }}]{{ else }}"{{ $value }}"{{ end }} + {{- end }} + {{- end }} + {{- end }} +} + +// {{ $moduleName }}: additional metric relabelings +prometheus.relabel "module_{{ $moduleName }}" { + max_cache_size = {{ if and (hasKey $module "scrape") (hasKey $module.scrape "max_cache_size") }}{{ index $module.scrape "max_cache_size" | int }}{{- else }}{{ $defaultMaxCacheSize | int }}{{- end }} + // global metric relabelings + {{- if $extraMetricRelabelingRules }} + {{ $extraMetricRelabelingRules | indent 2 }} + {{- end }} + // additional {{ $moduleName }} module specific metric relabelings + {{- if $module.extraMetricRelabelingRules }} + {{ $module.extraMetricRelabelingRules | indent 2 }} + {{- end }} + forward_to = [prometheus.relabel.metrics_service.receiver] +} +{{- end }} +{{ end }} diff --git a/charts/k8s-monitoring/values.schema.json b/charts/k8s-monitoring/values.schema.json index f393c04af..733247fdd 100644 --- a/charts/k8s-monitoring/values.schema.json +++ b/charts/k8s-monitoring/values.schema.json @@ -796,6 +796,17 @@ } } }, + "alloyModules": { + "type": "object", + "properties": { + "connections": { + "type": "array" + }, + "modules": { + "type": "array" + } + } + }, "apiserver": { "type": "object", "properties": { diff --git a/charts/k8s-monitoring/values.yaml b/charts/k8s-monitoring/values.yaml index 082ffa8e6..d04654aea 100644 --- a/charts/k8s-monitoring/values.yaml +++ b/charts/k8s-monitoring/values.yaml @@ -1158,6 +1158,41 @@ metrics: # @section -- Metrics Job: Kubernetes Monitoring Telemetry enabled: true + # Alloy Modules + # Modules can be invoked using metrics.extraConfig, this block is consuming opinionated modules from the grafana/alloy-modules repository + # or any other repository that follows the same module structure. Each module is expected to have a "kubernetes" module and a "scrape" module. + alloyModules: + # -- List of connection configurations used by modules. Configures the import.git component + # ([docs](https://grafana.com/docs/alloy/latest/reference/components/import.git/) + #
- `alias: ""` the alias of the connection + #
- `repository: ""` URL of the Git repository containing the module. + #
- `revision: ""` Branch, tag, or commit to be checked out. + #
- `pull_frequency: 15m` How often the module should check for updates. + #
- `default: true` If true, this connection is used as the default when none is specified. + #
- `basic_auth: {}` Credentials for basic authentication if needed. ([docs](https://grafana.com/docs/alloy/latest/reference/config-blocks/import.git/#basic_auth-block)) + #
- `ssh_key: {}` Provides SSH key details for secure connections. ([docs](https://grafana.com/docs/alloy/latest/reference/config-blocks/import.git/#ssh_key-block)) + # @section -- Metrics Job: Alloy Modules + connections: [] + # - alias: grafana + # repository: https://github.com/grafana/alloy-modules.git + # revision: main + # pull_frequency: 15m + # default: true + + # -- List of Modules to import. Each module is expected to have a "kubernetes" module and a "scrape" component. + # Each module can have the following properties: + #
- `path: ""` the path to the alloy module + #
- `connection: ""` (optional) the alias of the connection to use, if not specified the default connection is used + #
- `targets: {}` (optional) Additional arguments to be passed to the modules kubernetes component + #
- `scrape: {}` (optional) Additional arguments to be passed to the modules scrape component + #
- `extraRelabelingRules: ""` additional relabeling rules for the discovery.relabel component + #
- `extraMetricRelabelingRules:` additional relabeling rules for the prometheus.relabel component + # @section -- Metrics Job: Alloy Modules + modules: [] + # - alias: memcached + # path: modules/databases/kv/memcached/metrics.alloy + + # Settings related to metrics ingested via receivers # @section -- Metrics -> OTEL Receivers receiver: @@ -1274,6 +1309,7 @@ logs: # -- Stage blocks to be added to the loki.process component for cluster events. # ([docs](https://grafana.com/docs/alloy/latest/reference/components/loki.process/#blocks)) # This value is templated so that you can refer to other values from this file. + # @section -- Logs Scrape: Cluster Events extraStageBlocks: "" # -- Logs the cluster events to stdout. Useful for debugging. diff --git a/examples/metric-module-imports/README.md b/examples/metric-module-imports/README.md new file mode 100644 index 000000000..b4c8ce74d --- /dev/null +++ b/examples/metric-module-imports/README.md @@ -0,0 +1,43 @@ +# Alloy Modules + +This example shows a how to leverage the [Alloy Modules](https://github.com/grafana/alloy-modules) for collecting metrics. These modules are opinionated, where each module has at least the following two components defined: + +1. `kubernetes` Used to find targets in the cluster +2. `scrape` Used to scrape the found targets. + +```yaml +cluster: + name: metric-module-imports + +externalServices: + prometheus: + host: https://prometheus.example.com + basicAuth: + username: 12345 + password: "It's a secret to everyone" + loki: + host: https://loki.example.com + basicAuth: + username: 12345 + password: "It's a secret to everyone" + +metrics: + alloyModules: + connections: + - alias: grafana + repository: https://github.com/grafana/alloy-modules.git + revision: main + pull_frequency: 15m + default: true + modules: + - alias: memcached + path: modules/databases/kv/memcached/metrics.alloy + - alias: loki + path: modules/databases/timeseries/loki/metrics.alloy + - alias: mimir + path: modules/databases/timeseries/mimir/metrics.alloy + - alias: tempo + path: modules/databases/timeseries/tempo/metrics.alloy + - alias: grafana + path: modules/ui/grafana/metrics.alloy +``` diff --git a/examples/metric-module-imports/events.alloy b/examples/metric-module-imports/events.alloy new file mode 100644 index 000000000..02aad127e --- /dev/null +++ b/examples/metric-module-imports/events.alloy @@ -0,0 +1,48 @@ +// Cluster Events +loki.source.kubernetes_events "cluster_events" { + job_name = "integrations/kubernetes/eventhandler" + log_format = "logfmt" + forward_to = [ + loki.process.cluster_events.receiver, + ] +} + +loki.process "cluster_events" { + forward_to = [ + loki.process.logs_service.receiver, + ] +} + +// Logs Service +remote.kubernetes.secret "logs_service" { + name = "loki-k8s-monitoring" + namespace = "default" +} + +loki.process "logs_service" { + stage.static_labels { + values = { + cluster = "metric-module-imports", + } + } + forward_to = [loki.write.logs_service.receiver] +} + +// Loki +loki.write "logs_service" { + endpoint { + url = nonsensitive(remote.kubernetes.secret.logs_service.data["host"]) + "/loki/api/v1/push" + tenant_id = nonsensitive(remote.kubernetes.secret.logs_service.data["tenantId"]) + + basic_auth { + username = nonsensitive(remote.kubernetes.secret.logs_service.data["username"]) + password = remote.kubernetes.secret.logs_service.data["password"] + } + } +} + + +logging { + level = "info" + format = "logfmt" +} diff --git a/examples/metric-module-imports/logs.alloy b/examples/metric-module-imports/logs.alloy new file mode 100644 index 000000000..f0d95f3b0 --- /dev/null +++ b/examples/metric-module-imports/logs.alloy @@ -0,0 +1,155 @@ +// Pod Logs +discovery.kubernetes "pods" { + role = "pod" + selectors { + role = "pod" + field = "spec.nodeName=" + env("HOSTNAME") + } +} + +discovery.relabel "pod_logs" { + targets = discovery.kubernetes.pods.targets + rule { + source_labels = ["__meta_kubernetes_namespace"] + action = "replace" + target_label = "namespace" + } + + rule { + source_labels = ["__meta_kubernetes_pod_name"] + action = "replace" + target_label = "pod" + } + rule { + source_labels = ["__meta_kubernetes_pod_container_name"] + action = "replace" + target_label = "container" + } + rule { + source_labels = ["__meta_kubernetes_namespace", "__meta_kubernetes_pod_container_name"] + separator = "/" + action = "replace" + replacement = "$1" + target_label = "job" + } + rule { + source_labels = ["__meta_kubernetes_pod_uid", "__meta_kubernetes_pod_container_name"] + separator = "/" + action = "replace" + replacement = "/var/log/pods/*$1/*.log" + target_label = "__path__" + } + + // set the container runtime as a label + rule { + action = "replace" + source_labels = ["__meta_kubernetes_pod_container_id"] + regex = "^(\\S+):\\/\\/.+$" + replacement = "$1" + target_label = "tmp_container_runtime" + } +} + +discovery.relabel "filtered_pod_logs" { + targets = discovery.relabel.pod_logs.output + rule { // Drop anything with a "falsy" annotation value + source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_logs_autogather"] + regex = "(false|no|skip)" + action = "drop" + } +} + +local.file_match "pod_logs" { + path_targets = discovery.relabel.filtered_pod_logs.output +} + +loki.source.file "pod_logs" { + targets = local.file_match.pod_logs.targets + forward_to = [loki.process.pod_logs.receiver] +} + +loki.process "pod_logs" { + stage.match { + selector = "{tmp_container_runtime=\"containerd\"}" + // the cri processing stage extracts the following k/v pairs: log, stream, time, flags + stage.cri {} + + // Set the extract flags and stream values as labels + stage.labels { + values = { + flags = "", + stream = "", + } + } + } + + stage.match { + selector = "{tmp_container_runtime=\"cri-o\"}" + // the cri processing stage extracts the following k/v pairs: log, stream, time, flags + stage.cri {} + + // Set the extract flags and stream values as labels + stage.labels { + values = { + flags = "", + stream = "", + } + } + } + + // if the label tmp_container_runtime from above is docker parse using docker + stage.match { + selector = "{tmp_container_runtime=\"docker\"}" + // the docker processing stage extracts the following k/v pairs: log, stream, time + stage.docker {} + + // Set the extract stream value as a label + stage.labels { + values = { + stream = "", + } + } + } + + // Drop the filename label, since it's not really useful in the context of Kubernetes, where we already have + // cluster, namespace, pod, and container labels. + // Also drop the temporary container runtime label as it is no longer needed. + stage.label_drop { + values = ["filename", "tmp_container_runtime"] + } + forward_to = [loki.process.logs_service.receiver] +} + +// Logs Service +remote.kubernetes.secret "logs_service" { + name = "loki-k8s-monitoring" + namespace = "default" +} + +loki.process "logs_service" { + stage.static_labels { + values = { + cluster = "metric-module-imports", + } + } + forward_to = [loki.write.logs_service.receiver] +} + +// Loki +loki.write "logs_service" { + endpoint { + url = nonsensitive(remote.kubernetes.secret.logs_service.data["host"]) + "/loki/api/v1/push" + tenant_id = nonsensitive(remote.kubernetes.secret.logs_service.data["tenantId"]) + + basic_auth { + username = nonsensitive(remote.kubernetes.secret.logs_service.data["username"]) + password = remote.kubernetes.secret.logs_service.data["password"] + } + } +} + + +logging { + level = "info" + format = "logfmt" +} diff --git a/examples/metric-module-imports/metrics.alloy b/examples/metric-module-imports/metrics.alloy new file mode 100644 index 000000000..862807c17 --- /dev/null +++ b/examples/metric-module-imports/metrics.alloy @@ -0,0 +1,1001 @@ +discovery.kubernetes "nodes" { + role = "node" +} + +discovery.kubernetes "services" { + role = "service" +} + +discovery.kubernetes "endpoints" { + role = "endpoints" +} + +discovery.kubernetes "pods" { + role = "pod" +} + +// OTLP Receivers +otelcol.receiver.otlp "receiver" { + debug_metrics { + disable_high_cardinality_metrics = true + } + + grpc { + endpoint = "0.0.0.0:4317" + } + + http { + endpoint = "0.0.0.0:4318" + } + output { + metrics = [otelcol.processor.resourcedetection.default.input] + logs = [otelcol.processor.resourcedetection.default.input] + } +} + + + + +// Processors +otelcol.processor.transform "add_metric_datapoint_attributes" { + // Grafana Cloud Kubernetes monitoring expects Loki labels `cluster`, `pod`, and `namespace` + error_mode = "ignore" + metric_statements { + context = "datapoint" + statements = [ + "set(attributes[\"deployment.environment\"], resource.attributes[\"deployment.environment\"])", + "set(attributes[\"service.version\"], resource.attributes[\"service.version\"])", + ] + } + output { + metrics = [otelcol.processor.k8sattributes.default.input] + } +} + +otelcol.processor.resourcedetection "default" { + detectors = ["env", "system"] + + system { + hostname_sources = ["os"] + } + + output { + metrics = [otelcol.processor.transform.add_metric_datapoint_attributes.input] + logs = [otelcol.processor.k8sattributes.default.input] + } +} + +otelcol.processor.k8sattributes "default" { + extract { + metadata = ["k8s.namespace.name","k8s.pod.name","k8s.deployment.name","k8s.statefulset.name","k8s.daemonset.name","k8s.cronjob.name","k8s.job.name","k8s.node.name","k8s.pod.uid","k8s.pod.start_time"] + } + pod_association { + source { + from = "connection" + } + } + + output { + metrics = [otelcol.processor.transform.default.input] + logs = [otelcol.processor.transform.default.input] + } +} + +otelcol.processor.transform "default" { + // Grafana Cloud Kubernetes monitoring expects Loki labels `cluster`, `pod`, and `namespace` + error_mode = "ignore" + metric_statements { + context = "resource" + statements = [ + "set(attributes[\"k8s.cluster.name\"], \"metric-module-imports\") where attributes[\"k8s.cluster.name\"] == nil", + ] + } + log_statements { + context = "resource" + statements = [ + "set(attributes[\"pod\"], attributes[\"k8s.pod.name\"])", + "set(attributes[\"namespace\"], attributes[\"k8s.namespace.name\"])", + "set(attributes[\"loki.resource.labels\"], \"cluster, namespace, job, pod\")", + "set(attributes[\"k8s.cluster.name\"], \"metric-module-imports\") where attributes[\"k8s.cluster.name\"] == nil", + ] + } + output { + metrics = [otelcol.processor.filter.default.input] + logs = [otelcol.processor.filter.default.input] + } +} + +otelcol.processor.filter "default" { + error_mode = "ignore" + + output { + metrics = [otelcol.processor.batch.batch_processor.input] + logs = [otelcol.processor.batch.batch_processor.input] + } +} + +otelcol.processor.batch "batch_processor" { + send_batch_size = 16384 + send_batch_max_size = 0 + timeout = "2s" + output { + metrics = [otelcol.exporter.prometheus.metrics_converter.input] + logs = [otelcol.exporter.loki.logs_converter.input] + } +} +otelcol.exporter.prometheus "metrics_converter" { + forward_to = [prometheus.relabel.metrics_service.receiver] +} +otelcol.exporter.loki "logs_converter" { + forward_to = [loki.process.pod_logs.receiver] +} +// Annotation Autodiscovery +discovery.relabel "annotation_autodiscovery_pods" { + targets = discovery.kubernetes.pods.targets + rule { + source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_scrape"] + regex = "true" + action = "keep" + } + rule { + source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_job"] + action = "replace" + target_label = "job" + } + rule { + source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_instance"] + action = "replace" + target_label = "instance" + } + rule { + source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_metrics_path"] + action = "replace" + target_label = "__metrics_path__" + } + + // Choose the pod port + // The discovery generates a target for each declared container port of the pod. + // If the metricsPortName annotation has value, keep only the target where the port name matches the one of the annotation. + rule { + source_labels = ["__meta_kubernetes_pod_container_port_name"] + target_label = "__tmp_port" + } + rule { + source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_metrics_portName"] + regex = "(.+)" + target_label = "__tmp_port" + } + rule { + source_labels = ["__meta_kubernetes_pod_container_port_name"] + action = "keepequal" + target_label = "__tmp_port" + } + + // If the metrics port number annotation has a value, override the target address to use it, regardless whether it is + // one of the declared ports on that Pod. + rule { + source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_metrics_portNumber", "__meta_kubernetes_pod_ip"] + regex = "(\\d+);(([A-Fa-f0-9]{1,4}::?){1,7}[A-Fa-f0-9]{1,4})" + replacement = "[$2]:$1" // IPv6 + target_label = "__address__" + } + rule { + source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_metrics_portNumber", "__meta_kubernetes_pod_ip"] + regex = "(\\d+);((([0-9]+?)(\\.|$)){4})" // IPv4, takes priority over IPv6 when both exists + replacement = "$2:$1" + target_label = "__address__" + } + + rule { + source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_metrics_scheme"] + action = "replace" + target_label = "__scheme__" + } +} + +discovery.relabel "annotation_autodiscovery_services" { + targets = discovery.kubernetes.services.targets + rule { + source_labels = ["__meta_kubernetes_service_annotation_k8s_grafana_com_scrape"] + regex = "true" + action = "keep" + } + rule { + source_labels = ["__meta_kubernetes_service_annotation_k8s_grafana_com_job"] + action = "replace" + target_label = "job" + } + rule { + source_labels = ["__meta_kubernetes_service_annotation_k8s_grafana_com_instance"] + action = "replace" + target_label = "instance" + } + rule { + source_labels = ["__meta_kubernetes_service_annotation_k8s_grafana_com_metrics_path"] + action = "replace" + target_label = "__metrics_path__" + } + + // Choose the service port + rule { + source_labels = ["__meta_kubernetes_service_port_name"] + target_label = "__tmp_port" + } + rule { + source_labels = ["__meta_kubernetes_service_annotation_k8s_grafana_com_metrics_portName"] + regex = "(.+)" + target_label = "__tmp_port" + } + rule { + source_labels = ["__meta_kubernetes_service_port_name"] + action = "keepequal" + target_label = "__tmp_port" + } + + rule { + source_labels = ["__meta_kubernetes_service_port_number"] + target_label = "__tmp_port" + } + rule { + source_labels = ["__meta_kubernetes_service_annotation_k8s_grafana_com_metrics_portNumber"] + regex = "(.+)" + target_label = "__tmp_port" + } + rule { + source_labels = ["__meta_kubernetes_service_port_number"] + action = "keepequal" + target_label = "__tmp_port" + } + + rule { + source_labels = ["__meta_kubernetes_service_annotation_k8s_grafana_com_metrics_scheme"] + action = "replace" + target_label = "__scheme__" + } +} + +discovery.relabel "annotation_autodiscovery_http" { + targets = concat(discovery.relabel.annotation_autodiscovery_pods.output, discovery.relabel.annotation_autodiscovery_services.output) + rule { + source_labels = ["__scheme__"] + regex = "https" + action = "drop" + } +} + +discovery.relabel "annotation_autodiscovery_https" { + targets = concat(discovery.relabel.annotation_autodiscovery_pods.output, discovery.relabel.annotation_autodiscovery_services.output) + rule { + source_labels = ["__scheme__"] + regex = "https" + action = "keep" + } +} + +prometheus.scrape "annotation_autodiscovery_http" { + targets = discovery.relabel.annotation_autodiscovery_http.output + honor_labels = true + clustering { + enabled = true + } + forward_to = [prometheus.relabel.annotation_autodiscovery.receiver] +} + +prometheus.scrape "annotation_autodiscovery_https" { + targets = discovery.relabel.annotation_autodiscovery_https.output + honor_labels = true + bearer_token_file = "/var/run/secrets/kubernetes.io/serviceaccount/token" + tls_config { + insecure_skip_verify = true + } + clustering { + enabled = true + } + forward_to = [prometheus.relabel.annotation_autodiscovery.receiver] +} + +prometheus.relabel "annotation_autodiscovery" { + max_cache_size = 100000 + forward_to = [prometheus.relabel.metrics_service.receiver] +} + +// Grafana Alloy +discovery.relabel "alloy" { + targets = discovery.kubernetes.pods.targets + rule { + source_labels = ["__meta_kubernetes_pod_label_app_kubernetes_io_instance"] + regex = "k8smon" + action = "keep" + } + rule { + source_labels = ["__meta_kubernetes_pod_label_app_kubernetes_io_name"] + regex = "alloy.*" + action = "keep" + } + rule { + source_labels = ["__meta_kubernetes_pod_container_port_name"] + regex = "http-metrics" + action = "keep" + } + rule { + source_labels = ["__meta_kubernetes_namespace"] + target_label = "namespace" + } + rule { + source_labels = ["__meta_kubernetes_pod_name"] + target_label = "pod" + } + rule { + source_labels = ["__meta_kubernetes_pod_container_name"] + target_label = "container" + } +} + +prometheus.scrape "alloy" { + job_name = "integrations/alloy" + targets = discovery.relabel.alloy.output + scrape_interval = "60s" + forward_to = [prometheus.relabel.alloy.receiver] + clustering { + enabled = true + } +} + +prometheus.relabel "alloy" { + max_cache_size = 100000 + rule { + source_labels = ["__name__"] + regex = "up|alloy_build_info" + action = "keep" + } + forward_to = [prometheus.relabel.metrics_service.receiver] +} + +// Kubernetes Monitoring Telemetry +prometheus.exporter.unix "kubernetes_monitoring_telemetry" { + set_collectors = ["textfile"] + textfile { + directory = "/etc/kubernetes-monitoring-telemetry" + } +} + +prometheus.scrape "kubernetes_monitoring_telemetry" { + job_name = "integrations/kubernetes/kubernetes_monitoring_telemetry" + targets = prometheus.exporter.unix.kubernetes_monitoring_telemetry.targets + scrape_interval = "60s" + clustering { + enabled = true + } + forward_to = [prometheus.relabel.kubernetes_monitoring_telemetry.receiver] +} + +prometheus.relabel "kubernetes_monitoring_telemetry" { + max_cache_size = 100000 + rule { + target_label = "job" + action = "replace" + replacement = "integrations/kubernetes/kubernetes_monitoring_telemetry" + } + rule { + target_label = "instance" + action = "replace" + replacement = "k8smon" + } + rule { + source_labels = ["__name__"] + regex = "up|grafana_kubernetes_monitoring_.*" + action = "keep" + } + forward_to = [prometheus.relabel.metrics_service.receiver] +} + +// Kubelet +discovery.relabel "kubelet" { + targets = discovery.kubernetes.nodes.targets +} + +prometheus.scrape "kubelet" { + job_name = "integrations/kubernetes/kubelet" + targets = discovery.relabel.kubelet.output + scheme = "https" + scrape_interval = "60s" + bearer_token_file = "/var/run/secrets/kubernetes.io/serviceaccount/token" + tls_config { + insecure_skip_verify = true + } + clustering { + enabled = true + } + forward_to = [prometheus.relabel.kubelet.receiver] +} + +prometheus.relabel "kubelet" { + max_cache_size = 100000 + rule { + source_labels = ["__name__"] + regex = "up|container_cpu_usage_seconds_total|kubelet_certificate_manager_client_expiration_renew_errors|kubelet_certificate_manager_client_ttl_seconds|kubelet_certificate_manager_server_ttl_seconds|kubelet_cgroup_manager_duration_seconds_bucket|kubelet_cgroup_manager_duration_seconds_count|kubelet_node_config_error|kubelet_node_name|kubelet_pleg_relist_duration_seconds_bucket|kubelet_pleg_relist_duration_seconds_count|kubelet_pleg_relist_interval_seconds_bucket|kubelet_pod_start_duration_seconds_bucket|kubelet_pod_start_duration_seconds_count|kubelet_pod_worker_duration_seconds_bucket|kubelet_pod_worker_duration_seconds_count|kubelet_running_container_count|kubelet_running_containers|kubelet_running_pod_count|kubelet_running_pods|kubelet_runtime_operations_errors_total|kubelet_runtime_operations_total|kubelet_server_expiration_renew_errors|kubelet_volume_stats_available_bytes|kubelet_volume_stats_capacity_bytes|kubelet_volume_stats_inodes|kubelet_volume_stats_inodes_used|kubernetes_build_info|namespace_workload_pod|rest_client_requests_total|storage_operation_duration_seconds_count|storage_operation_errors_total|volume_manager_total_volumes" + action = "keep" + } + forward_to = [prometheus.relabel.metrics_service.receiver] +} + +// cAdvisor +discovery.relabel "cadvisor" { + targets = discovery.kubernetes.nodes.targets + rule { + replacement = "/metrics/cadvisor" + target_label = "__metrics_path__" + } +} + +prometheus.scrape "cadvisor" { + job_name = "integrations/kubernetes/cadvisor" + targets = discovery.relabel.cadvisor.output + scheme = "https" + scrape_interval = "60s" + bearer_token_file = "/var/run/secrets/kubernetes.io/serviceaccount/token" + tls_config { + insecure_skip_verify = true + } + clustering { + enabled = true + } + forward_to = [prometheus.relabel.cadvisor.receiver] +} + +prometheus.relabel "cadvisor" { + max_cache_size = 100000 + rule { + source_labels = ["__name__"] + regex = "up|container_cpu_cfs_periods_total|container_cpu_cfs_throttled_periods_total|container_cpu_usage_seconds_total|container_fs_reads_bytes_total|container_fs_reads_total|container_fs_writes_bytes_total|container_fs_writes_total|container_memory_cache|container_memory_rss|container_memory_swap|container_memory_working_set_bytes|container_network_receive_bytes_total|container_network_receive_packets_dropped_total|container_network_receive_packets_total|container_network_transmit_bytes_total|container_network_transmit_packets_dropped_total|container_network_transmit_packets_total|machine_memory_bytes" + action = "keep" + } + // Drop empty container labels, addressing https://github.com/google/cadvisor/issues/2688 + rule { + source_labels = ["__name__","container"] + separator = "@" + regex = "(container_cpu_.*|container_fs_.*|container_memory_.*)@" + action = "drop" + } + // Drop empty image labels, addressing https://github.com/google/cadvisor/issues/2688 + rule { + source_labels = ["__name__","image"] + separator = "@" + regex = "(container_cpu_.*|container_fs_.*|container_memory_.*|container_network_.*)@" + action = "drop" + } + // Normalizing unimportant labels (not deleting to continue satisfying