Skip to content

Commit

Permalink
Merge pull request grafana/cortex-jsonnet#319 from grafana/darrenjane…
Browse files Browse the repository at this point in the history
…czek/config-job-aggregation

refactor: config for job aggregation strings
  • Loading branch information
pracucci authored Jun 10, 2021
2 parents edd68a4 + 3b6693d commit 77718f5
Show file tree
Hide file tree
Showing 8 changed files with 75 additions and 21 deletions.
2 changes: 1 addition & 1 deletion jsonnet/mimir-mixin/alerts.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,5 @@
(import 'alerts/compactor.libsonnet')
else {}) +

{ _config:: $._config },
{ _config:: $._config + $._group_config },
}
2 changes: 1 addition & 1 deletion jsonnet/mimir-mixin/alerts/alerts.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
{
alert: 'CortexRequestLatency',
expr: |||
cluster_namespace_job_route:cortex_request_duration_seconds:99quantile{route!~"metrics|/frontend.Frontend/Process|ready|/schedulerpb.SchedulerForFrontend/FrontendLoop|/schedulerpb.SchedulerForQuerier/QuerierLoop"}
%(group_prefix_jobs)s_route:cortex_request_duration_seconds:99quantile{route!~"metrics|/frontend.Frontend/Process|ready|/schedulerpb.SchedulerForFrontend/FrontendLoop|/schedulerpb.SchedulerForQuerier/QuerierLoop"}
>
%(cortex_p99_latency_threshold_seconds)s
||| % $._config,
Expand Down
7 changes: 4 additions & 3 deletions jsonnet/mimir-mixin/config.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,10 @@
compactor: 'compactor.*', // Match also custom compactor deployments.
},

// Labels used to in alert aggregations - should uniquely identify
// a single Cortex cluster.
alert_aggregation_labels: 'cluster, namespace',
// Grouping labels, to uniquely identify and group by {jobs, clusters}
job_labels: ['cluster', 'namespace', 'job'],
cluster_labels: ['cluster', 'namespace'],

cortex_p99_latency_threshold_seconds: 2.5,

// Whether resources dashboards are enabled (based on cAdvisor metrics).
Expand Down
2 changes: 1 addition & 1 deletion jsonnet/mimir-mixin/dashboards.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -31,5 +31,5 @@
(import 'dashboards/writes-resources.libsonnet') +
(import 'dashboards/alertmanager-resources.libsonnet')) +

{ _config:: $._config },
{ _config:: $._config + $._group_config },
}
15 changes: 11 additions & 4 deletions jsonnet/mimir-mixin/dashboards/writes.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,22 @@ local utils = import 'mixin-utils/utils.libsonnet';
})
.addPanel(
$.panel('Samples / s') +
$.statPanel('sum(cluster_namespace_job:cortex_distributor_received_samples:rate5m{%s})' % $.jobMatcher($._config.job_names.distributor), format='reqps')
$.statPanel(
'sum(%(group_prefix_jobs)s:cortex_distributor_received_samples:rate5m{%(job)s})' % (
$._config {
job: $.jobMatcher($._config.job_names.distributor),
}
),
format='reqps'
)
)
.addPanel(
$.panel('Active Series') +
$.statPanel(|||
sum(cortex_ingester_memory_series{%(ingester)s}
/ on(namespace) group_left
max by (namespace) (cortex_distributor_replication_factor{%(distributor)s}))
||| % {
/ on(%(group_by_cluster)s) group_left
max by (%(group_by_cluster)s) (cortex_distributor_replication_factor{%(distributor)s}))
||| % ($._config) {
ingester: $.jobMatcher($._config.job_names.ingester),
distributor: $.jobMatcher($._config.job_names.distributor),
}, format='short')
Expand Down
45 changes: 45 additions & 0 deletions jsonnet/mimir-mixin/groups.libsonnet
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
{
local makePrefix(groups) = std.join('_', groups),
local makeGroupBy(groups) = std.join(', ', groups),

local group_by_cluster = makeGroupBy($._config.cluster_labels),

_group_config+:: {
// Each group prefix is composed of `_`-separated labels
group_prefix_jobs: makePrefix($._config.job_labels),
group_prefix_clusters: makePrefix($._config.cluster_labels),

// Each group-by label list is `, `-separated and unique identifies
group_by_job: makeGroupBy($._config.job_labels),
group_by_cluster: group_by_cluster,
},

// The following works around the deprecation of `$._config.alert_aggregation_labels`
// - If an override of that value is detected, a warning will be printed
// - If no override was detected, it will be set to the `group_by_cluster` value,
// which will replace it altogether in the future.
local alert_aggregation_labels_override = (
{
alert_aggregation_labels: null,
} + super._config
).alert_aggregation_labels,

_config+:: {
alert_aggregation_labels:
if alert_aggregation_labels_override != null
then std.trace(
|||
Deprecated: _config.alert_aggregation_labels
This field has been explicitly overridden to "%s".
Instead, express the override in terms of _config.cluster_labels.
E.g., cluster_labels: %s will automatically convert to "%s".
||| % [
alert_aggregation_labels_override,
$._config.cluster_labels,
group_by_cluster,
],
alert_aggregation_labels_override
)
else group_by_cluster,
},
}
1 change: 1 addition & 0 deletions jsonnet/mimir-mixin/mixin.libsonnet
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
(import 'config.libsonnet') +
(import 'groups.libsonnet') +
(import 'dashboards.libsonnet') +
(import 'alerts.libsonnet') +
(import 'recording_rules.libsonnet')
22 changes: 11 additions & 11 deletions jsonnet/mimir-mixin/recording_rules.libsonnet
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
local utils = import 'mixin-utils/utils.libsonnet';

{
local _config = {
max_series_per_ingester: 1.5e6,
max_samples_per_sec_per_ingester: 80e3,
max_samples_per_sec_per_distributor: 240e3,
limit_utilisation_target: 0.6,
} + $._config + $._group_config,
prometheusRules+:: {
groups+: [
{
Expand Down Expand Up @@ -51,20 +57,14 @@ local utils = import 'mixin-utils/utils.libsonnet';
name: 'cortex_received_samples',
rules: [
{
record: 'cluster_namespace_job:cortex_distributor_received_samples:rate5m',
record: '%(group_prefix_jobs)s:cortex_distributor_received_samples:rate5m' % _config,
expr: |||
sum by (cluster, namespace, job) (rate(cortex_distributor_received_samples_total[5m]))
|||,
sum by (%(group_by_job)s) (rate(cortex_distributor_received_samples_total[5m]))
||| % _config,
},
],
},
{
local _config = {
max_series_per_ingester: 1.5e6,
max_samples_per_sec_per_ingester: 80e3,
max_samples_per_sec_per_distributor: 240e3,
limit_utilisation_target: 0.6,
},
name: 'cortex_scaling_rules',
rules: [
{
Expand All @@ -89,7 +89,7 @@ local utils = import 'mixin-utils/utils.libsonnet';
ceil(
quantile_over_time(0.99,
sum by (cluster, namespace) (
cluster_namespace_job:cortex_distributor_received_samples:rate5m
%(group_prefix_jobs)s:cortex_distributor_received_samples:rate5m
)[24h:]
)
/ %(max_samples_per_sec_per_distributor)s
Expand Down Expand Up @@ -123,7 +123,7 @@ local utils = import 'mixin-utils/utils.libsonnet';
ceil(
quantile_over_time(0.99,
sum by (cluster, namespace) (
cluster_namespace_job:cortex_distributor_received_samples:rate5m
%(group_prefix_jobs)s:cortex_distributor_received_samples:rate5m
)[24h:]
)
* 3 / %(max_samples_per_sec_per_ingester)s
Expand Down

0 comments on commit 77718f5

Please sign in to comment.