diff --git a/istio/assets/dashboards/overview.json b/istio/assets/dashboards/overview.json new file mode 100644 index 0000000000000..ff03ba37b1a16 --- /dev/null +++ b/istio/assets/dashboards/overview.json @@ -0,0 +1,3855 @@ +{ + "author_name": "Datadog", + "description": "Istio is a Kubernetes-based, open source service mesh that provides advanced networking, security, and observability features for microservices in a cloud-native environment. This dashboard is crafted to provide you with an overview of, detailed information about, and performance data for your Istio services.\n\n**Further reading:**\n\n- [Datadog Istio Integration Documentation](https://docs.datadoghq.com/integrations/istio/)\n- [Istio Docs](https://istio.io/latest/)\n- [Istio Metrics Docs](https://istio.io/latest/docs/reference/config/metrics/)", + "layout_type": "ordered", + "template_variables": [ + { + "available_values": [], + "default": "pilot", + "name": "istiod_image", + "prefix": "short_image" + }, + { + "available_values": [], + "default": "proxyv2", + "name": "istio_proxy_image", + "prefix": "short_image" + }, + { + "available_values": [], + "default": "*", + "name": "cluster_name", + "prefix": "kube_cluster_name" + }, + { + "available_values": [], + "default": "*", + "name": "destination_service_name", + "prefix": "destination_service_name" + }, + { + "available_values": [], + "default": "*", + "name": "destination_service", + "prefix": "destination_service" + }, + { + "available_values": [], + "default": "*", + "name": "destination_workload_namespace", + "prefix": "destination_workload_namespace" + }, + { + "available_values": [], + "default": "*", + "name": "destination_workload", + "prefix": "destination_workload" + }, + { + "available_values": [], + "default": "*", + "name": "source_workload", + "prefix": "source_workload" + }, + { + "available_values": [], + "default": "*", + "name": "source_workload_namespace", + "prefix": "source_workload_namespace" + }, + { + "available_values": [], + "default": "*", + "name": "response_code", + "prefix": "status_code" + } + ], + "title": "Istio Overview", + "widgets": [ + { + "definition": { + "layout_type": "ordered", + "show_title": true, + "title": "", + "type": "group", + "widgets": [ + { + "definition": { + "has_background": true, + "has_border": false, + "horizontal_align": "center", + "sizing": "cover", + "type": "image", + "url": "/static/images/logos/istio_large.svg", + "url_dark_theme": "/static/images/logos/istio_reversed_large.svg", + "vertical_align": "center" + }, + "id": 2192576039659578, + "layout": { + "height": 3, + "width": 6, + "x": 0, + "y": 0 + } + }, + { + "definition": { + "background_color": "transparent", + "content": "Istio is a Kubernetes-based, open source service mesh that provides advanced networking, security, and observability features for microservices in a cloud-native environment. This dashboard is crafted to provide you with an overview of, detailed information about, and performance data for your Istio services.", + "font_size": "14", + "has_padding": true, + "show_tick": false, + "text_align": "left", + "tick_edge": "left", + "tick_pos": "50%", + "type": "note", + "vertical_align": "center" + }, + "id": 4559139518898252, + "layout": { + "height": 2, + "width": 6, + "x": 0, + "y": 3 + } + }, + { + "definition": { + "background_color": "transparent", + "content": "**Further reading:**\n\n- [Datadog Istio Integration Documentation](https://docs.datadoghq.com/integrations/istio/)\n- [Istio Docs](https://istio.io/latest/)\n- [Istio Metrics Docs](https://istio.io/latest/docs/reference/config/metrics/)", + "font_size": "14", + "has_padding": true, + "show_tick": false, + "text_align": "left", + "tick_edge": "left", + "tick_pos": "50%", + "type": "note", + "vertical_align": "center" + }, + "id": 1358687777465532, + "layout": { + "height": 2, + "width": 6, + "x": 0, + "y": 5 + } + } + ] + }, + "id": 1750497109975664, + "layout": { + "height": 8, + "width": 6, + "x": 0, + "y": 0 + } + }, + { + "definition": { + "background_color": "vivid_blue", + "layout_type": "ordered", + "show_title": true, + "title": "Istio Monitoring Overview", + "type": "group", + "widgets": [ + { + "definition": { + "background_color": "blue", + "content": "Istio Overview shows a summary of all reporting Istio instances, as well as related monitors and their statuses.", + "font_size": "14", + "has_padding": true, + "show_tick": false, + "text_align": "center", + "tick_edge": "left", + "tick_pos": "50%", + "type": "note", + "vertical_align": "center" + }, + "id": 4493765391477664, + "layout": { + "height": 1, + "width": 6, + "x": 0, + "y": 0 + } + }, + { + "definition": { + "autoscale": true, + "precision": 2, + "requests": [ + { + "formulas": [ + { + "formula": "query1" + } + ], + "queries": [ + { + "aggregator": "last", + "data_source": "metrics", + "name": "query1", + "query": "count:kubernetes.containers.running{$istiod_image,$cluster_name}" + } + ], + "response_format": "scalar" + } + ], + "title": "Istiod Instances", + "title_align": "left", + "title_size": "16", + "type": "query_value" + }, + "id": 5086027032989668, + "layout": { + "height": 2, + "width": 3, + "x": 0, + "y": 1 + } + }, + { + "definition": { + "autoscale": true, + "precision": 2, + "requests": [ + { + "formulas": [ + { + "formula": "query1" + } + ], + "queries": [ + { + "aggregator": "last", + "data_source": "metrics", + "name": "query1", + "query": "count:kubernetes.containers.running{$istio_proxy_image,$cluster_name}" + } + ], + "response_format": "scalar" + } + ], + "title": "Istio Proxy Instances", + "title_align": "left", + "title_size": "16", + "type": "query_value" + }, + "id": 8580626733162374, + "layout": { + "height": 2, + "width": 3, + "x": 3, + "y": 1 + } + }, + { + "definition": { + "color_preference": "text", + "count": 50, + "display_format": "countsAndList", + "hide_zero_counts": true, + "query": "Istio", + "show_last_triggered": false, + "show_priority": false, + "sort": "status,asc", + "start": 0, + "summary_type": "monitors", + "title": "Istio Monitor Summary", + "type": "manage_status" + }, + "id": 2729417714992040, + "layout": { + "height": 4, + "width": 6, + "x": 0, + "y": 3 + } + } + ] + }, + "id": 3634066390776612, + "layout": { + "height": 8, + "width": 6, + "x": 6, + "y": 0 + } + }, + { + "definition": { + "background_color": "vivid_blue", + "layout_type": "ordered", + "show_title": true, + "title": "Istiod", + "type": "group", + "widgets": [ + { + "definition": { + "background_color": "blue", + "content": "Citadel Stats\n\nMetrics for security and identity management", + "font_size": "16", + "has_padding": false, + "show_tick": false, + "text_align": "center", + "tick_edge": "left", + "tick_pos": "50%", + "type": "note", + "vertical_align": "center" + }, + "id": 3627922134121938, + "layout": { + "height": 1, + "width": 12, + "x": 0, + "y": 0 + } + }, + { + "definition": { + "legend_columns": [ + "avg", + "min", + "max", + "value", + "sum" + ], + "legend_layout": "auto", + "requests": [ + { + "display_type": "bars", + "formulas": [ + { + "alias": "Requests Received", + "formula": "query1" + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "sum:istio.citadel.server.csr_count.count{$cluster_name,$istiod_image} by {kube_cluster_name}.as_count()" + } + ], + "response_format": "timeseries", + "style": { + "line_type": "solid", + "line_width": "normal", + "order_by": "values", + "palette": "dog_classic" + } + } + ], + "show_legend": true, + "title": "Cert Signing Requests Received", + "title_align": "left", + "title_size": "16", + "type": "timeseries" + }, + "id": 4422363340888746, + "layout": { + "height": 3, + "width": 4, + "x": 0, + "y": 1 + } + }, + { + "definition": { + "legend_columns": [ + "avg", + "min", + "max", + "value", + "sum" + ], + "legend_layout": "auto", + "requests": [ + { + "display_type": "bars", + "formulas": [ + { + "alias": "Certs Issued", + "formula": "query1" + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "sum:istio.citadel.server.success_cert_issuance_count.count{$cluster_name,$istiod_image} by {kube_cluster_name}.as_count()" + } + ], + "response_format": "timeseries", + "style": { + "line_type": "solid", + "line_width": "normal", + "order_by": "values", + "palette": "dog_classic" + } + } + ], + "show_legend": true, + "title": "Cert Successfully Issued", + "title_align": "left", + "title_size": "16", + "type": "timeseries" + }, + "id": 5361710131825236, + "layout": { + "height": 3, + "width": 4, + "x": 4, + "y": 1 + } + }, + { + "definition": { + "legend_columns": [ + "avg", + "min", + "max", + "value", + "sum" + ], + "legend_layout": "auto", + "requests": [ + { + "display_type": "line", + "formulas": [ + { + "alias": "Success Rate", + "formula": "query2 / query1 * 100", + "number_format": { + "unit": { + "type": "canonical_unit", + "unit_name": "percent" + } + } + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query2", + "query": "sum:istio.citadel.server.success_cert_issuance_count.sum{$cluster_name,$istiod_image} by {kube_cluster_name}.as_count()" + }, + { + "data_source": "metrics", + "name": "query1", + "query": "sum:istio.citadel.server.csr_count.count{$cluster_name,$istiod_image} by {kube_cluster_name}.as_count()" + } + ], + "response_format": "timeseries", + "style": { + "line_type": "solid", + "line_width": "normal", + "order_by": "values", + "palette": "dog_classic" + } + } + ], + "show_legend": true, + "title": "Cert Issuance Success Rate", + "title_align": "left", + "title_size": "16", + "type": "timeseries" + }, + "id": 2045196564988994, + "layout": { + "height": 3, + "width": 4, + "x": 8, + "y": 1 + } + }, + { + "definition": { + "background_color": "blue", + "content": "Galley Stats\n\nMetrics for configuration validation, distribution, and discovery", + "font_size": "16", + "has_padding": false, + "show_tick": false, + "text_align": "center", + "tick_edge": "left", + "tick_pos": "50%", + "type": "note", + "vertical_align": "center" + }, + "id": 8816077523674974, + "layout": { + "height": 1, + "width": 12, + "x": 0, + "y": 4 + } + }, + { + "definition": { + "legend_columns": [ + "avg", + "min", + "max", + "value", + "sum" + ], + "legend_layout": "auto", + "requests": [ + { + "display_type": "bars", + "formulas": [ + { + "alias": "Events Processed", + "formula": "query1" + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "sum:istio.galley.runtime_processor.events_processed.count{$cluster_name,$istiod_image} by {kube_cluster_name}.as_count()" + } + ], + "response_format": "timeseries", + "style": { + "line_type": "solid", + "line_width": "normal", + "order_by": "values", + "palette": "dog_classic" + } + } + ], + "show_legend": true, + "title": "Events Processed", + "title_align": "left", + "title_size": "16", + "type": "timeseries" + }, + "id": 1235689914365076, + "layout": { + "height": 3, + "width": 4, + "x": 0, + "y": 5 + } + }, + { + "definition": { + "legend_columns": [ + "avg", + "min", + "max", + "value", + "sum" + ], + "legend_layout": "auto", + "requests": [ + { + "display_type": "bars", + "formulas": [ + { + "alias": "Snapshots published", + "formula": "query1" + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "sum:istio.galley.runtime_processor.snapshots_published.count{$cluster_name,$istiod_image} by {kube_cluster_name}.as_count()" + } + ], + "response_format": "timeseries", + "style": { + "line_type": "solid", + "line_width": "normal", + "order_by": "values", + "palette": "dog_classic" + } + } + ], + "show_legend": true, + "title": "Snapshot Published", + "title_align": "left", + "title_size": "16", + "type": "timeseries" + }, + "id": 8908168037129674, + "layout": { + "height": 3, + "width": 4, + "x": 4, + "y": 5 + } + }, + { + "definition": { + "requests": [ + { + "formulas": [ + { + "formula": "query1" + } + ], + "queries": [ + { + "aggregator": "last", + "data_source": "metrics", + "name": "query1", + "query": "sum:istio.galley.validation.passed.count{$cluster_name,$istiod_image} by {kube_cluster_name}.as_count()" + } + ], + "response_format": "scalar", + "sort": { + "count": 10, + "order_by": [ + { + "index": 0, + "order": "desc", + "type": "formula" + } + ] + } + } + ], + "style": { + "display": { + "legend": "automatic", + "type": "stacked" + }, + "palette": "dog_classic" + }, + "title": "Resource Validation Passes", + "title_align": "left", + "title_size": "16", + "type": "toplist" + }, + "id": 5500579784685662, + "layout": { + "height": 3, + "width": 4, + "x": 8, + "y": 5 + } + }, + { + "definition": { + "legend_columns": [ + "avg", + "min", + "max", + "value", + "sum" + ], + "legend_layout": "auto", + "requests": [ + { + "display_type": "bars", + "formulas": [ + { + "alias": "Configs Updated", + "formula": "query1" + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "sum:istio.galley.validation.config_update.count{$cluster_name,$istiod_image}.as_count()" + } + ], + "response_format": "timeseries", + "style": { + "line_type": "solid", + "line_width": "normal", + "order_by": "values", + "palette": "dog_classic" + } + } + ], + "show_legend": true, + "title": "Webhook Configuration Updates", + "title_align": "left", + "title_size": "16", + "type": "timeseries" + }, + "id": 2136467378227916, + "layout": { + "height": 3, + "width": 4, + "x": 0, + "y": 8 + } + }, + { + "definition": { + "legend_columns": [ + "avg", + "min", + "max", + "value", + "sum" + ], + "legend_layout": "auto", + "requests": [ + { + "display_type": "bars", + "formulas": [ + { + "alias": "Update Errors", + "formula": "query1" + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "sum:istio.galley.validation.config_update_error.count{$cluster_name,$istiod_image}.as_count()" + } + ], + "response_format": "timeseries", + "style": { + "line_type": "solid", + "line_width": "normal", + "order_by": "values", + "palette": "dog_classic" + } + } + ], + "show_legend": true, + "title": "Webhook Configuration Update Errors", + "title_align": "left", + "title_size": "16", + "type": "timeseries" + }, + "id": 3522568668961966, + "layout": { + "height": 3, + "width": 4, + "x": 4, + "y": 8 + } + }, + { + "definition": { + "requests": [ + { + "formulas": [ + { + "formula": "query1" + } + ], + "queries": [ + { + "aggregator": "avg", + "data_source": "metrics", + "name": "query1", + "query": "sum:istio.galley.validation.failed.count{$cluster_name,$istiod_image} by {reason}.as_count()" + } + ], + "response_format": "scalar", + "sort": { + "count": 10, + "order_by": [ + { + "index": 0, + "order": "desc", + "type": "formula" + } + ] + } + } + ], + "style": { + "display": { + "legend": "automatic", + "type": "stacked" + }, + "palette": "dog_classic" + }, + "title": "Resource Validation Failures", + "title_align": "left", + "title_size": "16", + "type": "toplist" + }, + "id": 3763553681295482, + "layout": { + "height": 3, + "width": 4, + "x": 8, + "y": 8 + } + }, + { + "definition": { + "background_color": "blue", + "content": "Pilot Stats\n\nMetrics for service discovery, configuration distribution to Envoy proxies, traffic management, and resiliency features", + "font_size": "16", + "has_padding": false, + "show_tick": false, + "text_align": "center", + "tick_edge": "left", + "tick_pos": "50%", + "type": "note", + "vertical_align": "center" + }, + "id": 6970496165669798, + "layout": { + "height": 1, + "width": 12, + "x": 0, + "y": 11 + } + }, + { + "definition": { + "legend_columns": [ + "avg", + "min", + "max", + "value", + "sum" + ], + "legend_layout": "auto", + "requests": [ + { + "display_type": "bars", + "formulas": [ + { + "alias": "xDS Pushes", + "formula": "query1" + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "sum:istio.pilot.xds.pushes.count{$cluster_name,$istiod_image} by {kube_cluster_name,type}.as_count()" + } + ], + "response_format": "timeseries", + "style": { + "line_type": "solid", + "line_width": "normal", + "order_by": "values", + "palette": "dog_classic" + } + } + ], + "show_legend": true, + "title": "xDS Pushes", + "title_align": "left", + "title_size": "16", + "type": "timeseries" + }, + "id": 4245198672419888, + "layout": { + "height": 3, + "width": 4, + "x": 0, + "y": 12 + } + }, + { + "definition": { + "legend_columns": [ + "avg", + "min", + "max", + "value", + "sum" + ], + "legend_layout": "auto", + "requests": [ + { + "display_type": "bars", + "formulas": [ + { + "alias": "Send Errors", + "formula": "query1" + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "sum:istio.pilot.xds.pushes.count{$cluster_name,$istiod_image,type:*senderr} by {kube_cluster_name,type}.as_count()" + } + ], + "response_format": "timeseries", + "style": { + "line_type": "solid", + "line_width": "normal", + "order_by": "values", + "palette": "warm" + } + } + ], + "show_legend": true, + "title": "xDS Pushes Errors", + "title_align": "left", + "title_size": "16", + "type": "timeseries" + }, + "id": 2416271735897122, + "layout": { + "height": 3, + "width": 4, + "x": 4, + "y": 12 + } + }, + { + "definition": { + "legend_columns": [ + "avg", + "min", + "max", + "value", + "sum" + ], + "legend_layout": "auto", + "requests": [ + { + "display_type": "bars", + "formulas": [ + { + "formula": "query2 / query1", + "number_format": { + "unit": { + "type": "canonical_unit", + "unit_name": "percent" + } + } + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query2", + "query": "sum:istio.pilot.xds.pushes.count{$cluster_name,$istiod_image,type:*senderr} by {kube_cluster_name}.as_count()" + }, + { + "data_source": "metrics", + "name": "query1", + "query": "sum:istio.pilot.xds.pushes.count{$cluster_name,$istiod_image} by {kube_cluster_name}.as_count()" + } + ], + "response_format": "timeseries", + "style": { + "line_type": "solid", + "line_width": "normal", + "order_by": "values", + "palette": "red" + } + } + ], + "show_legend": true, + "title": "xDS Push Error Rate", + "title_align": "left", + "title_size": "16", + "type": "timeseries" + }, + "id": 5891948212949074, + "layout": { + "height": 3, + "width": 4, + "x": 8, + "y": 12 + } + }, + { + "definition": { + "legend_columns": [ + "avg", + "min", + "max", + "value", + "sum" + ], + "legend_layout": "auto", + "requests": [ + { + "display_type": "line", + "formulas": [ + { + "alias": "Envoy Clusters", + "formula": "query1" + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "avg:istio.pilot.duplicate_envoy_clusters{$cluster_name,$istiod_image} by {kube_cluster_name}" + } + ], + "response_format": "timeseries", + "style": { + "line_type": "solid", + "line_width": "normal", + "order_by": "values", + "palette": "dog_classic" + } + } + ], + "show_legend": true, + "title": "Duplicate Envoy Clusters", + "title_align": "left", + "title_size": "16", + "type": "timeseries" + }, + "id": 7168533379665800, + "layout": { + "height": 3, + "width": 4, + "x": 0, + "y": 15 + } + }, + { + "definition": { + "legend_columns": [ + "avg", + "min", + "max", + "value", + "sum" + ], + "legend_layout": "auto", + "requests": [ + { + "display_type": "bars", + "formulas": [ + { + "alias": "Services", + "formula": "query1" + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "sum:istio.pilot.services{$cluster_name,$istiod_image} by {kube_cluster_name}.fill(null)" + } + ], + "response_format": "timeseries", + "style": { + "line_type": "solid", + "line_width": "normal", + "order_by": "values", + "palette": "dog_classic" + } + } + ], + "show_legend": true, + "title": "Services Known to Pilot", + "title_align": "left", + "title_size": "16", + "type": "timeseries" + }, + "id": 6917840161139368, + "layout": { + "height": 3, + "width": 4, + "x": 4, + "y": 15 + } + }, + { + "definition": { + "legend_columns": [ + "avg", + "min", + "max", + "value", + "sum" + ], + "legend_layout": "auto", + "requests": [ + { + "display_type": "bars", + "formulas": [ + { + "alias": "Push Triggers", + "formula": "query1" + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "sum:istio.pilot.push.triggers.count{$cluster_name,$istiod_image} by {type,kube_cluster_name}.as_count()" + } + ], + "response_format": "timeseries", + "style": { + "line_type": "solid", + "line_width": "normal", + "order_by": "values", + "palette": "dog_classic" + } + } + ], + "show_legend": true, + "title": "Push Triggers", + "title_align": "left", + "title_size": "16", + "type": "timeseries" + }, + "id": 7280351189474090, + "layout": { + "height": 3, + "width": 4, + "x": 8, + "y": 15 + } + }, + { + "definition": { + "legend_columns": [ + "avg", + "min", + "max", + "value", + "sum" + ], + "legend_layout": "auto", + "requests": [ + { + "display_type": "bars", + "formulas": [ + { + "alias": "Updates Received", + "formula": "query1" + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "sum:istio.pilot.inbound_updates.count{$cluster_name,$istiod_image} by {type,kube_cluster_name}.as_count()" + } + ], + "response_format": "timeseries", + "style": { + "line_type": "solid", + "line_width": "normal", + "order_by": "values", + "palette": "dog_classic" + } + } + ], + "show_legend": true, + "title": "Inbound Updates Received", + "title_align": "left", + "title_size": "16", + "type": "timeseries" + }, + "id": 7912852234152576, + "layout": { + "height": 3, + "width": 4, + "x": 0, + "y": 18 + } + }, + { + "definition": { + "legend_columns": [ + "avg", + "min", + "max", + "value", + "sum" + ], + "legend_layout": "auto", + "requests": [ + { + "display_type": "bars", + "formulas": [ + { + "alias": "Events Received", + "formula": "query1" + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "sum:istio.pilot.k8s.cfg_events.count{$cluster_name,$istiod_image} by {type,event,kube_cluster_name}.as_count()" + } + ], + "response_format": "timeseries", + "style": { + "line_type": "solid", + "line_width": "normal", + "order_by": "values", + "palette": "dog_classic" + } + } + ], + "show_legend": true, + "title": "Kubernetes Configuration Events Received", + "title_align": "left", + "title_size": "16", + "type": "timeseries" + }, + "id": 6385890192194846, + "layout": { + "height": 3, + "width": 4, + "x": 4, + "y": 18 + } + }, + { + "definition": { + "legend_columns": [ + "avg", + "min", + "max", + "value", + "sum" + ], + "legend_layout": "auto", + "requests": [ + { + "display_type": "line", + "formulas": [ + { + "alias": "Inbound Listeners", + "formula": "query1" + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "sum:istio.pilot.conflict.inbound_listener{$cluster_name,$istiod_image}" + } + ], + "response_format": "timeseries", + "style": { + "line_type": "solid", + "line_width": "normal", + "order_by": "values", + "palette": "dog_classic" + } + } + ], + "show_legend": true, + "title": "Conflicting Inbound Listeners", + "title_align": "left", + "title_size": "16", + "type": "timeseries" + }, + "id": 627560008960808, + "layout": { + "height": 3, + "width": 4, + "x": 8, + "y": 18 + } + }, + { + "definition": { + "requests": [ + { + "formulas": [ + { + "formula": "query1" + } + ], + "queries": [ + { + "aggregator": "last", + "data_source": "metrics", + "name": "query1", + "query": "avg:istio.pilot.no_ip{$cluster_name,$istiod_image} by {kube_cluster_name}" + } + ], + "response_format": "scalar", + "sort": { + "count": 10, + "order_by": [ + { + "index": 0, + "order": "desc", + "type": "formula" + } + ] + } + } + ], + "style": { + "display": { + "legend": "automatic", + "type": "stacked" + }, + "palette": "dog_classic" + }, + "title": "Pods Not in Endpoint Table", + "title_align": "left", + "title_size": "16", + "type": "toplist" + }, + "id": 6497335918580626, + "layout": { + "height": 4, + "width": 2, + "x": 0, + "y": 21 + } + }, + { + "definition": { + "requests": [ + { + "formulas": [ + { + "formula": "query1" + } + ], + "queries": [ + { + "aggregator": "avg", + "data_source": "metrics", + "name": "query1", + "query": "avg:istio.pilot.destrule_subsets{$cluster_name,$istiod_image} by {kube_cluster_name}" + } + ], + "response_format": "scalar", + "sort": { + "count": 10, + "order_by": [ + { + "index": 0, + "order": "desc", + "type": "formula" + } + ] + } + } + ], + "style": { + "display": { + "legend": "automatic", + "type": "stacked" + }, + "palette": "dog_classic" + }, + "title": "Duplicate Subsets", + "title_align": "left", + "title_size": "16", + "type": "toplist" + }, + "id": 2751676326368262, + "layout": { + "height": 4, + "width": 2, + "x": 2, + "y": 21 + } + }, + { + "definition": { + "requests": [ + { + "formulas": [ + { + "formula": "query1" + } + ], + "queries": [ + { + "aggregator": "avg", + "data_source": "metrics", + "name": "query1", + "query": "avg:istio.pilot.eds_no_instances{$cluster_name,$istiod_image} by {kube_cluster_name}" + } + ], + "response_format": "scalar", + "sort": { + "count": 500, + "order_by": [ + { + "index": 0, + "order": "desc", + "type": "formula" + } + ] + } + } + ], + "style": { + "display": { + "legend": "automatic", + "type": "stacked" + } + }, + "title": "Clusters Without EDS Instances", + "title_align": "left", + "title_size": "16", + "type": "toplist" + }, + "id": 246806102548318, + "layout": { + "height": 4, + "width": 2, + "x": 4, + "y": 21 + } + }, + { + "definition": { + "requests": [ + { + "formulas": [ + { + "formula": "query1" + } + ], + "queries": [ + { + "aggregator": "last", + "data_source": "metrics", + "name": "query1", + "query": "avg:istio.pilot.conflict.outbound_listener.http_over_current_tcp{$cluster_name,$istiod_image} by {kube_cluster_name}" + } + ], + "response_format": "scalar", + "sort": { + "count": 10, + "order_by": [ + { + "index": 0, + "order": "desc", + "type": "formula" + } + ] + } + } + ], + "style": { + "display": { + "legend": "automatic", + "type": "stacked" + }, + "palette": "dog_classic" + }, + "title": "Conflicting Wildcard http Listeners", + "title_align": "left", + "title_size": "16", + "type": "toplist" + }, + "id": 1143161583498956, + "layout": { + "height": 4, + "width": 2, + "x": 6, + "y": 21 + } + }, + { + "definition": { + "requests": [ + { + "formulas": [ + { + "formula": "query1" + } + ], + "queries": [ + { + "aggregator": "avg", + "data_source": "metrics", + "name": "query1", + "query": "sum:istio.pilot.virt_services{$cluster_name,$istiod_image} by {kube_cluster_name}" + } + ], + "response_format": "scalar", + "sort": { + "count": 10, + "order_by": [ + { + "index": 0, + "order": "desc", + "type": "formula" + } + ] + } + } + ], + "style": { + "display": { + "legend": "automatic", + "type": "stacked" + }, + "palette": "dog_classic" + }, + "title": "Virtual Services", + "title_align": "left", + "title_size": "16", + "type": "toplist" + }, + "id": 4225916981982476, + "layout": { + "height": 4, + "width": 2, + "x": 8, + "y": 21 + } + }, + { + "definition": { + "requests": [ + { + "formulas": [ + { + "formula": "query1" + } + ], + "queries": [ + { + "aggregator": "avg", + "data_source": "metrics", + "name": "query1", + "query": "sum:istio.pilot.vservice_dup_domain{$cluster_name,$istiod_image} by {kube_cluster_name}" + } + ], + "response_format": "scalar", + "sort": { + "count": 10, + "order_by": [ + { + "index": 0, + "order": "desc", + "type": "formula" + } + ] + } + } + ], + "style": { + "display": { + "legend": "automatic", + "type": "stacked" + }, + "palette": "dog_classic" + }, + "title": "Virtual Services with Duplicate Domains", + "title_align": "left", + "title_size": "16", + "type": "toplist" + }, + "id": 5912405031057558, + "layout": { + "height": 4, + "width": 2, + "x": 10, + "y": 21 + } + }, + { + "definition": { + "background_color": "blue", + "content": "Endpoint Stats", + "font_size": "18", + "has_padding": true, + "show_tick": false, + "text_align": "center", + "tick_edge": "left", + "tick_pos": "50%", + "type": "note", + "vertical_align": "center" + }, + "id": 6335743618824886, + "layout": { + "height": 1, + "width": 12, + "x": 0, + "y": 25 + } + }, + { + "definition": { + "legend_columns": [ + "avg", + "min", + "max", + "value", + "sum" + ], + "legend_layout": "auto", + "requests": [ + { + "display_type": "line", + "formulas": [ + { + "alias": "Without Pods", + "formula": "query1" + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "avg:istio.galley.endpoint_no_pod{$cluster_name,$istiod_image} by {kube_cluster_name}" + } + ], + "response_format": "timeseries", + "style": { + "line_type": "solid", + "line_width": "normal", + "order_by": "values", + "palette": "dog_classic" + } + } + ], + "show_legend": true, + "title": "Endpoints without Pods", + "title_align": "left", + "title_size": "16", + "type": "timeseries" + }, + "id": 480404961300200, + "layout": { + "height": 3, + "width": 4, + "x": 0, + "y": 26 + } + }, + { + "definition": { + "legend_columns": [ + "avg", + "min", + "max", + "value", + "sum" + ], + "legend_layout": "auto", + "requests": [ + { + "display_type": "line", + "formulas": [ + { + "alias": "Not Ready", + "formula": "query1" + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "avg:istio.pilot.endpoint_not_ready{$cluster_name,$istiod_image} by {kube_cluster_name}" + } + ], + "response_format": "timeseries", + "style": { + "line_type": "solid", + "line_width": "normal", + "order_by": "values", + "palette": "dog_classic" + } + } + ], + "show_legend": true, + "title": "Endpoints not Ready", + "title_align": "left", + "title_size": "16", + "type": "timeseries" + }, + "id": 3295607529193048, + "layout": { + "height": 3, + "width": 4, + "x": 4, + "y": 26 + } + }, + { + "definition": { + "legend_columns": [ + "avg", + "min", + "max", + "value", + "sum" + ], + "legend_layout": "auto", + "requests": [ + { + "display_type": "bars", + "formulas": [ + { + "alias": "Using XDS", + "formula": "query1" + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "sum:istio.pilot.xds{$cluster_name,$istiod_image} by {kube_cluster_name}" + } + ], + "response_format": "timeseries", + "style": { + "line_type": "solid", + "line_width": "normal", + "order_by": "values", + "palette": "dog_classic" + } + } + ], + "show_legend": true, + "title": "Endpoints Using xDS", + "title_align": "left", + "title_size": "16", + "type": "timeseries" + }, + "id": 1747118352723258, + "layout": { + "height": 3, + "width": 4, + "x": 8, + "y": 26 + } + }, + { + "definition": { + "background_color": "blue", + "content": "Latency Stats", + "font_size": "18", + "has_padding": true, + "show_tick": false, + "text_align": "center", + "tick_edge": "left", + "tick_pos": "50%", + "type": "note", + "vertical_align": "center" + }, + "id": 8828435255005804, + "layout": { + "height": 1, + "width": 12, + "x": 0, + "y": 29 + } + }, + { + "definition": { + "legend_columns": [ + "avg", + "min", + "max", + "value", + "sum" + ], + "legend_layout": "auto", + "requests": [ + { + "display_type": "line", + "formulas": [ + { + "alias": "Push Latency", + "formula": "autosmooth(query1 / query2)", + "number_format": { + "unit": { + "type": "canonical_unit", + "unit_name": "second" + } + } + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "sum:istio.pilot.xds.push.time.sum{$cluster_name,$istiod_image} by {type}.as_count()" + }, + { + "data_source": "metrics", + "name": "query2", + "query": "sum:istio.pilot.xds.push.time.count{$cluster_name,$istiod_image} by {type}.as_count()" + } + ], + "response_format": "timeseries", + "style": { + "line_type": "solid", + "line_width": "normal", + "order_by": "values", + "palette": "dog_classic" + } + } + ], + "show_legend": true, + "title": "XDS Push", + "title_align": "left", + "title_size": "16", + "type": "timeseries" + }, + "id": 4858650798546170, + "layout": { + "height": 3, + "width": 4, + "x": 0, + "y": 30 + } + }, + { + "definition": { + "legend_columns": [ + "avg", + "min", + "max", + "value", + "sum" + ], + "legend_layout": "auto", + "requests": [ + { + "display_type": "line", + "formulas": [ + { + "alias": "In Queue Latency", + "formula": "query1 / query2" + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "sum:istio.pilot.proxy_queue_time.sum{$cluster_name,$istiod_image} by {kube_cluster_name}.as_count()" + }, + { + "data_source": "metrics", + "name": "query2", + "query": "sum:istio.pilot.proxy_queue_time.count{$cluster_name,$istiod_image} by {kube_cluster_name}.as_count()" + } + ], + "response_format": "timeseries", + "style": { + "line_type": "solid", + "line_width": "normal", + "order_by": "values", + "palette": "dog_classic" + } + } + ], + "show_legend": true, + "title": "Time Proxy is in Push Queue", + "title_align": "left", + "title_size": "16", + "type": "timeseries" + }, + "id": 1636856362300, + "layout": { + "height": 3, + "width": 4, + "x": 4, + "y": 30 + } + }, + { + "definition": { + "legend_columns": [ + "avg", + "min", + "max", + "value", + "sum" + ], + "legend_layout": "auto", + "requests": [ + { + "display_type": "line", + "formulas": [ + { + "alias": "Config Change Latency", + "formula": "query1 / query2" + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "sum:istio.pilot.proxy_convergence_time.sum{$cluster_name,$istiod_image} by {kube_cluster_name}.as_count()" + }, + { + "data_source": "metrics", + "name": "query2", + "query": "sum:istio.pilot.proxy_convergence_time.count{$cluster_name,$istiod_image} by {kube_cluster_name}.as_count()" + } + ], + "response_format": "timeseries", + "style": { + "line_type": "solid", + "line_width": "normal", + "order_by": "values", + "palette": "dog_classic" + } + } + ], + "show_legend": true, + "title": "Proxy Convergence Config Change", + "title_align": "left", + "title_size": "16", + "type": "timeseries" + }, + "id": 2615643406003470, + "layout": { + "height": 3, + "width": 4, + "x": 8, + "y": 30 + } + }, + { + "definition": { + "background_color": "blue", + "content": "Sidecar Stats", + "font_size": "18", + "has_padding": true, + "show_tick": false, + "text_align": "center", + "tick_edge": "left", + "tick_pos": "50%", + "type": "note", + "vertical_align": "center" + }, + "id": 3419796175501478, + "layout": { + "height": 1, + "width": 12, + "x": 0, + "y": 33 + } + }, + { + "definition": { + "legend_columns": [ + "avg", + "min", + "max", + "value", + "sum" + ], + "legend_layout": "auto", + "requests": [ + { + "display_type": "bars", + "formulas": [ + { + "alias": "Injection Requests", + "formula": "query1" + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "sum:istio.sidecar_injection.requests.count{$cluster_name,$istiod_image}.as_count()" + } + ], + "response_format": "timeseries", + "style": { + "line_type": "solid", + "line_width": "normal", + "order_by": "values", + "palette": "dog_classic" + } + } + ], + "show_legend": true, + "title": "Sidecar Injection Requests", + "title_align": "left", + "title_size": "16", + "type": "timeseries" + }, + "id": 2111826177063454, + "layout": { + "height": 3, + "width": 4, + "x": 0, + "y": 34 + } + }, + { + "definition": { + "legend_columns": [ + "avg", + "min", + "max", + "value", + "sum" + ], + "legend_layout": "auto", + "requests": [ + { + "display_type": "bars", + "formulas": [ + { + "alias": "Sidecar Injections", + "formula": "query1" + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "sum:istio.sidecar_injection.success.count{$cluster_name,$istiod_image}.as_count()" + } + ], + "response_format": "timeseries", + "style": { + "line_type": "solid", + "line_width": "normal", + "order_by": "values", + "palette": "dog_classic" + } + } + ], + "show_legend": true, + "title": "Succesful Injection Requests", + "title_align": "left", + "title_size": "16", + "type": "timeseries" + }, + "id": 5358928247276534, + "layout": { + "height": 3, + "width": 4, + "x": 4, + "y": 34 + } + }, + { + "definition": { + "custom_links": [], + "legend_columns": [ + "avg", + "min", + "max", + "value", + "sum" + ], + "legend_layout": "auto", + "requests": [ + { + "display_type": "bars", + "formulas": [ + { + "alias": "Failed Injections", + "formula": "query1" + }, + { + "alias": "Skipped Injections", + "formula": "query2" + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "sum:istio.sidecar_injection.failure.count{$cluster_name,$istiod_image}.as_count()" + }, + { + "data_source": "metrics", + "name": "query2", + "query": "sum:istio.sidecar_injection.skip.count{$cluster_name,$istiod_image}.as_count()" + } + ], + "response_format": "timeseries", + "style": { + "line_type": "solid", + "line_width": "normal", + "palette": "dog_classic" + } + } + ], + "show_legend": false, + "title": "Failed or Skipped Sidecar Injection Requests", + "title_align": "left", + "title_size": "16", + "type": "timeseries", + "yaxis": { + "include_zero": true, + "label": "", + "max": "auto", + "min": "auto", + "scale": "linear" + } + }, + "id": 5252419092822202, + "layout": { + "height": 3, + "width": 4, + "x": 8, + "y": 34 + } + }, + { + "definition": { + "background_color": "blue", + "content": "Istiod Logs", + "font_size": "18", + "has_padding": true, + "show_tick": false, + "text_align": "center", + "tick_edge": "left", + "tick_pos": "50%", + "type": "note", + "vertical_align": "center" + }, + "id": 1375658328165226, + "layout": { + "height": 1, + "width": 12, + "x": 0, + "y": 37 + } + }, + { + "definition": { + "requests": [ + { + "columns": [ + { + "field": "status_line", + "width": "auto" + }, + { + "field": "matches", + "width": "auto" + }, + { + "field": "volume", + "width": "auto" + }, + { + "field": "status", + "width": "auto" + }, + { + "field": "service", + "width": "auto" + }, + { + "field": "message", + "width": "auto" + } + ], + "query": { + "data_source": "logs_pattern_stream", + "group_by": [ + { + "facet": "status" + }, + { + "facet": "service" + } + ], + "indexes": [], + "query_string": "source:$istiod_image.value kube_cluster_name:$cluster_name.value " + }, + "response_format": "event_list" + } + ], + "title": "", + "title_align": "left", + "title_size": "16", + "type": "list_stream" + }, + "id": 4188771970394504, + "layout": { + "height": 5, + "width": 12, + "x": 0, + "y": 38 + } + } + ] + }, + "id": 3915105066981490, + "layout": { + "height": 44, + "width": 12, + "x": 0, + "y": 8 + } + }, + { + "definition": { + "background_color": "vivid_blue", + "layout_type": "ordered", + "show_title": true, + "title": "Istio Proxy", + "type": "group", + "widgets": [ + { + "definition": { + "background_color": "blue", + "content": "Requests and Responses\n\nMetrics for both requests and responses. These metrics help monitor the performance and behavior of the traffic within the service mesh.", + "font_size": "16", + "has_padding": false, + "show_tick": false, + "text_align": "center", + "tick_edge": "left", + "tick_pos": "50%", + "type": "note", + "vertical_align": "center" + }, + "id": 1021322802120984, + "layout": { + "height": 1, + "width": 12, + "x": 0, + "y": 0 + } + }, + { + "definition": { + "legend": { + "type": "automatic" + }, + "requests": [ + { + "formulas": [ + { + "formula": "query1" + } + ], + "queries": [ + { + "aggregator": "sum", + "data_source": "metrics", + "name": "query1", + "query": "sum:istio.mesh.request.count.total{$cluster_name,$istio_proxy_image,$destination_service_name,$destination_service,$destination_workload_namespace,$destination_workload,$source_workload,$source_workload_namespace,$response_code} by {destination_service,source_workload,source_workload_namespace,destination_workload,destination_workload_namespace,response_code,destination_service_name,destination_service_namespace,destination_version}.as_count()" + } + ], + "response_format": "scalar", + "sort": { + "count": 500, + "order_by": [ + { + "index": 0, + "order": "desc", + "type": "formula" + } + ] + }, + "style": { + "palette": "semantic" + } + } + ], + "title": "Requests Count", + "title_align": "left", + "title_size": "16", + "type": "sunburst" + }, + "id": 4718569899162558, + "layout": { + "height": 4, + "width": 12, + "x": 0, + "y": 1 + } + }, + { + "definition": { + "legend_columns": [ + "avg", + "min", + "max", + "value", + "sum" + ], + "legend_layout": "auto", + "requests": [ + { + "display_type": "bars", + "formulas": [ + { + "alias": "Okay Requests", + "formula": "query1", + "style": { + "palette": "cool" + } + }, + { + "alias": "Failed Requests", + "formula": "query2", + "style": { + "palette": "warm" + } + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "sum:istio.mesh.request.count.total{(response_code:2* OR response_code:3*) AND $cluster_name AND $istio_proxy_image AND $destination_service_name AND $destination_service AND $destination_workload_namespace AND $destination_workload AND $source_workload AND $source_workload_namespace AND $response_code} by {destination_service,source_workload,source_workload_namespace,destination_workload,destination_workload_namespace,response_code,destination_service_name,destination_service_namespace,destination_version}.as_count()" + }, + { + "data_source": "metrics", + "name": "query2", + "query": "sum:istio.mesh.request.count.total{(response_code:4* OR response_code:5* OR response_code:0) AND $cluster_name AND $istio_proxy_image AND $destination_service_name AND $destination_service AND $destination_workload_namespace AND $destination_workload AND $source_workload AND $source_workload_namespace AND $response_code} by {destination_service,source_workload,source_workload_namespace,destination_workload,destination_workload_namespace,response_code,destination_service_name,destination_service_namespace,destination_version}.as_count()" + } + ], + "response_format": "timeseries", + "style": { + "line_type": "solid", + "line_width": "normal", + "order_by": "values", + "palette": "dog_classic" + } + } + ], + "show_legend": true, + "title": "Requests Count", + "title_align": "left", + "title_size": "16", + "type": "timeseries" + }, + "id": 992999333859954, + "layout": { + "height": 4, + "width": 12, + "x": 0, + "y": 5 + } + }, + { + "definition": { + "legend_columns": [ + "avg", + "min", + "max", + "value", + "sum" + ], + "legend_layout": "auto", + "requests": [ + { + "display_type": "bars", + "formulas": [ + { + "alias": "Request Errors", + "formula": "query1" + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "sum:istio.mesh.request.count.total{(response_code:4* OR response_code:5* OR response_code:0) AND $cluster_name AND $istio_proxy_image AND $destination_service_name AND $destination_service AND $destination_workload_namespace AND $destination_workload AND $source_workload AND $source_workload_namespace AND $response_code} by {destination_service,source_workload,source_workload_namespace,destination_workload,destination_workload_namespace,response_code,destination_service_name,destination_service_namespace,destination_version}.as_count()" + } + ], + "response_format": "timeseries", + "style": { + "line_type": "solid", + "line_width": "normal", + "order_by": "values", + "palette": "warm" + } + } + ], + "show_legend": true, + "title": "Requests Error Count", + "title_align": "left", + "title_size": "16", + "type": "timeseries" + }, + "id": 2333921351693680, + "layout": { + "height": 4, + "width": 8, + "x": 0, + "y": 9 + } + }, + { + "definition": { + "legend_columns": [ + "avg", + "min", + "max", + "value", + "sum" + ], + "legend_layout": "auto", + "requests": [ + { + "display_type": "bars", + "formulas": [ + { + "alias": "Error Percentage", + "formula": "(query2 / query1) * 100", + "number_format": { + "unit": { + "type": "canonical_unit", + "unit_name": "percent" + } + } + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query2", + "query": "sum:istio.mesh.request.count.total{(response_code:4* OR response_code:5* OR response_code:0) AND $cluster_name AND $istio_proxy_image AND $destination_service_name AND $destination_service AND $destination_workload_namespace AND $destination_workload AND $source_workload AND $source_workload_namespace AND $response_code}.as_count()" + }, + { + "data_source": "metrics", + "name": "query1", + "query": "sum:istio.mesh.request.count.total{$cluster_name,$istio_proxy_image,$destination_service_name,$destination_service,$destination_workload_namespace,$destination_workload,$source_workload,$source_workload_namespace,$response_code}.as_count()" + } + ], + "response_format": "timeseries", + "style": { + "line_type": "solid", + "line_width": "normal", + "order_by": "values", + "palette": "red" + } + } + ], + "show_legend": true, + "title": "Requests Errors Percent", + "title_align": "left", + "title_size": "16", + "type": "timeseries" + }, + "id": 2578742449760160, + "layout": { + "height": 4, + "width": 4, + "x": 8, + "y": 9 + } + }, + { + "definition": { + "legend_columns": [ + "avg", + "min", + "max", + "value", + "sum" + ], + "legend_layout": "auto", + "requests": [ + { + "display_type": "line", + "formulas": [ + { + "alias": "Average Request Size", + "formula": "query1 / query2" + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "sum:istio.mesh.request.size.sum.total{$cluster_name,$istio_proxy_image,$destination_service_name,$destination_service,$destination_workload_namespace,$destination_workload,$source_workload,$source_workload_namespace,$response_code} by {destination_service,source_workload,source_workload_namespace,destination_workload,destination_workload_namespace,response_code,destination_service_name,destination_service_namespace,destination_version}.as_count()" + }, + { + "data_source": "metrics", + "name": "query2", + "query": "sum:istio.mesh.request.size.count.total{$cluster_name,upper_bound:none,$istio_proxy_image,$destination_service_name,$destination_service,$destination_workload_namespace,$destination_workload,$source_workload,$source_workload_namespace,$response_code} by {destination_service,source_workload,source_workload_namespace,destination_workload,destination_workload_namespace,response_code,destination_service_name,destination_service_namespace,destination_version}.as_count()" + } + ], + "response_format": "timeseries", + "style": { + "line_type": "solid", + "line_width": "normal", + "order_by": "values", + "palette": "dog_classic" + } + } + ], + "show_legend": true, + "title": "Request Size", + "title_align": "left", + "title_size": "16", + "type": "timeseries" + }, + "id": 8424224632319624, + "layout": { + "height": 3, + "width": 4, + "x": 0, + "y": 13 + } + }, + { + "definition": { + "legend_columns": [ + "avg", + "min", + "max", + "value", + "sum" + ], + "legend_layout": "auto", + "requests": [ + { + "display_type": "line", + "formulas": [ + { + "alias": "Average Request Latency", + "formula": "query1 / query2", + "number_format": { + "unit": { + "type": "canonical_unit", + "unit_name": "millisecond" + } + } + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "sum:istio.mesh.request.duration.milliseconds.sum.total{$cluster_name,$istio_proxy_image,$destination_service_name,$destination_service,$destination_workload_namespace,$destination_workload,$source_workload,$source_workload_namespace,$response_code} by {destination_service,source_workload,source_workload_namespace,destination_workload,destination_workload_namespace,response_code,destination_service_name,destination_service_namespace,destination_version}.as_count()" + }, + { + "data_source": "metrics", + "name": "query2", + "query": "sum:istio.mesh.request.duration.milliseconds.count.total{$cluster_name,upper_bound:none,$istio_proxy_image,$destination_service_name,$destination_service,$destination_workload_namespace,$destination_workload,$source_workload,$source_workload_namespace,$response_code} by {destination_service,source_workload,source_workload_namespace,destination_workload,destination_workload_namespace,response_code,destination_service_name,destination_service_namespace,destination_version}.as_count()" + } + ], + "response_format": "timeseries", + "style": { + "line_type": "solid", + "line_width": "normal", + "order_by": "values", + "palette": "dog_classic" + } + } + ], + "show_legend": true, + "title": "Request Latency", + "title_align": "left", + "title_size": "16", + "type": "timeseries" + }, + "id": 3211728510006402, + "layout": { + "height": 3, + "width": 4, + "x": 4, + "y": 13 + } + }, + { + "definition": { + "legend_columns": [ + "avg", + "min", + "max", + "value", + "sum" + ], + "legend_layout": "auto", + "requests": [ + { + "display_type": "line", + "formulas": [ + { + "alias": "Average Response Size", + "formula": "query1 / query2" + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "sum:istio.mesh.response.size.sum.total{$cluster_name,$istio_proxy_image,$destination_service_name,$destination_service,$destination_workload_namespace,$destination_workload,$source_workload,$source_workload_namespace,$response_code} by {destination_service,source_workload,source_workload_namespace,destination_workload,destination_workload_namespace,response_code,destination_service_name,destination_service_namespace,destination_version}.as_count()" + }, + { + "data_source": "metrics", + "name": "query2", + "query": "sum:istio.mesh.response.size.count.total{$cluster_name,upper_bound:none,$istio_proxy_image,$destination_service_name,$destination_service,$destination_workload_namespace,$destination_workload,$source_workload,$source_workload_namespace,$response_code} by {destination_service,source_workload,source_workload_namespace,destination_workload,destination_workload_namespace,response_code,destination_service_name,destination_service_namespace,destination_version}.as_count()" + } + ], + "response_format": "timeseries", + "style": { + "line_type": "solid", + "line_width": "normal", + "order_by": "values", + "palette": "dog_classic" + } + } + ], + "show_legend": true, + "title": "Response Size", + "title_align": "left", + "title_size": "16", + "type": "timeseries" + }, + "id": 1205930886238800, + "layout": { + "height": 3, + "width": 4, + "x": 8, + "y": 13 + } + }, + { + "definition": { + "background_color": "blue", + "content": "TCP Stats", + "font_size": "18", + "has_padding": true, + "show_tick": false, + "text_align": "center", + "tick_edge": "left", + "tick_pos": "50%", + "type": "note", + "vertical_align": "center" + }, + "id": 8852160322492380, + "layout": { + "height": 1, + "width": 12, + "x": 0, + "y": 16 + } + }, + { + "definition": { + "legend_columns": [ + "avg", + "min", + "max", + "value", + "sum" + ], + "legend_layout": "auto", + "requests": [ + { + "display_type": "bars", + "formulas": [ + { + "alias": "Connections Opened", + "formula": "query1" + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "sum:istio.mesh.tcp.connections_opened.total.total{$cluster_name,$istio_proxy_image,$destination_service_name,$destination_service,$destination_workload_namespace,$destination_workload,$source_workload,$source_workload_namespace} by {destination_service,source_workload,source_workload_namespace,destination_workload,destination_workload_namespace,destination_service_name,destination_service_namespace,destination_version,host}.as_count()" + } + ], + "response_format": "timeseries", + "style": { + "line_type": "solid", + "line_width": "normal", + "order_by": "values", + "palette": "dog_classic" + } + } + ], + "show_legend": true, + "title": "Connections Opened", + "title_align": "left", + "title_size": "16", + "type": "timeseries" + }, + "id": 1007378854095042, + "layout": { + "height": 3, + "width": 6, + "x": 0, + "y": 17 + } + }, + { + "definition": { + "legend_columns": [ + "avg", + "min", + "max", + "value", + "sum" + ], + "legend_layout": "auto", + "requests": [ + { + "display_type": "bars", + "formulas": [ + { + "alias": "Connection Closed", + "formula": "query1" + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "sum:istio.mesh.tcp.connections_closed.total.total{$cluster_name,$istio_proxy_image,$destination_service_name,$destination_service,$destination_workload_namespace,$destination_workload,$source_workload,$source_workload_namespace} by {destination_service,source_workload,source_workload_namespace,destination_workload,destination_workload_namespace,destination_service_name,destination_service_namespace,destination_version,host}.as_count()" + } + ], + "response_format": "timeseries", + "style": { + "line_type": "solid", + "line_width": "normal", + "order_by": "values", + "palette": "dog_classic" + } + } + ], + "show_legend": true, + "title": "Connection Closed", + "title_align": "left", + "title_size": "16", + "type": "timeseries" + }, + "id": 6305179201802850, + "layout": { + "height": 3, + "width": 6, + "x": 6, + "y": 17 + } + }, + { + "definition": { + "legend_columns": [ + "avg", + "min", + "max", + "value", + "sum" + ], + "legend_layout": "auto", + "requests": [ + { + "display_type": "bars", + "formulas": [ + { + "alias": "Bytes Received", + "formula": "query1" + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "sum:istio.mesh.tcp.received_bytes.total.total{$cluster_name,$istio_proxy_image,$destination_service_name,$destination_service,$destination_workload_namespace,$destination_workload,$source_workload,$source_workload_namespace} by {destination_service,source_workload,source_workload_namespace,destination_workload,destination_workload_namespace,destination_service_name,destination_service_namespace,destination_version,host}.as_count()" + } + ], + "response_format": "timeseries", + "style": { + "line_type": "solid", + "line_width": "normal", + "order_by": "values", + "palette": "dog_classic" + } + } + ], + "show_legend": true, + "title": "Bytes Received", + "title_align": "left", + "title_size": "16", + "type": "timeseries" + }, + "id": 3680746287476284, + "layout": { + "height": 3, + "width": 6, + "x": 0, + "y": 20 + } + }, + { + "definition": { + "legend_columns": [ + "avg", + "min", + "max", + "value", + "sum" + ], + "legend_layout": "auto", + "requests": [ + { + "display_type": "bars", + "formulas": [ + { + "alias": "Bytes Sent", + "formula": "query1" + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "sum:istio.mesh.tcp.send_bytes.total.total{$cluster_name,$istio_proxy_image,$destination_service_name,$destination_service,$destination_workload_namespace,$destination_workload,$source_workload,$source_workload_namespace} by {destination_service,source_workload,source_workload_namespace,destination_workload,destination_workload_namespace,destination_service_name,destination_service_namespace,destination_version,host}.as_count()" + } + ], + "response_format": "timeseries", + "style": { + "line_type": "solid", + "line_width": "normal", + "order_by": "values", + "palette": "dog_classic" + } + } + ], + "show_legend": true, + "title": "Bytes Sent", + "title_align": "left", + "title_size": "16", + "type": "timeseries" + }, + "id": 6093198942495060, + "layout": { + "height": 3, + "width": 6, + "x": 6, + "y": 20 + } + }, + { + "definition": { + "background_color": "blue", + "content": "Pilot Agent\n\nIstio Pilot agent runs alongside the application container within the same pod and is responsible for managing the Istio sidecar proxy", + "font_size": "16", + "has_padding": false, + "show_tick": false, + "text_align": "center", + "tick_edge": "left", + "tick_pos": "50%", + "type": "note", + "vertical_align": "center" + }, + "id": 4343335632101298, + "layout": { + "height": 1, + "width": 12, + "x": 0, + "y": 23 + } + }, + { + "definition": { + "legend_columns": [ + "avg", + "min", + "max", + "value", + "sum" + ], + "legend_layout": "auto", + "requests": [ + { + "display_type": "bars", + "formulas": [ + { + "alias": "XDS Errors", + "formula": "query1" + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "sum:istio.mesh.agent.pilot.xds.pushes.total{$cluster_name,$istio_proxy_image} by {type,kube_cluster_name}.as_count()" + } + ], + "response_format": "timeseries", + "style": { + "line_type": "solid", + "line_width": "normal", + "order_by": "values", + "palette": "dog_classic" + } + } + ], + "show_legend": true, + "title": "xDS Build and Send Errors", + "title_align": "left", + "title_size": "16", + "type": "timeseries" + }, + "id": 2866530856510654, + "layout": { + "height": 3, + "width": 4, + "x": 0, + "y": 24 + } + }, + { + "definition": { + "legend_columns": [ + "avg", + "min", + "max", + "value", + "sum" + ], + "legend_layout": "auto", + "requests": [ + { + "display_type": "line", + "formulas": [ + { + "alias": "XDS Push Latency", + "formula": "query1 / query2", + "number_format": { + "unit": { + "type": "canonical_unit", + "unit_name": "second" + } + } + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "sum:istio.mesh.agent.pilot.xds.push_time.sum.total{$cluster_name,$istio_proxy_image} by {kube_cluster_name}.as_count()" + }, + { + "data_source": "metrics", + "name": "query2", + "query": "sum:istio.mesh.agent.pilot.xds.push_time.count.total{upper_bound:none,$cluster_name,$istio_proxy_image} by {kube_cluster_name}.as_count()" + } + ], + "response_format": "timeseries", + "style": { + "line_type": "solid", + "line_width": "normal", + "order_by": "values", + "palette": "dog_classic" + } + } + ], + "show_legend": true, + "title": "xDS Push Latency", + "title_align": "left", + "title_size": "16", + "type": "timeseries" + }, + "id": 6130658737323934, + "layout": { + "height": 3, + "width": 4, + "x": 4, + "y": 24 + } + }, + { + "definition": { + "legend_columns": [ + "avg", + "min", + "max", + "value", + "sum" + ], + "legend_layout": "auto", + "requests": [ + { + "display_type": "line", + "formulas": [ + { + "alias": "XDS Send Latency", + "formula": "query1 / query2", + "number_format": { + "unit": { + "type": "canonical_unit", + "unit_name": "second" + } + } + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "sum:istio.mesh.agent.pilot.xds.send_time.sum.total{$cluster_name,$istio_proxy_image} by {kube_cluster_name}.as_count()" + }, + { + "data_source": "metrics", + "name": "query2", + "query": "sum:istio.mesh.agent.pilot.xds.send_time.count.total{upper_bound:none,$cluster_name,$istio_proxy_image} by {kube_cluster_name}.as_count()" + } + ], + "response_format": "timeseries", + "style": { + "line_type": "solid", + "line_width": "normal", + "order_by": "values", + "palette": "dog_classic" + } + } + ], + "show_legend": true, + "title": "xDS Send Time Latency ", + "title_align": "left", + "title_size": "16", + "type": "timeseries" + }, + "id": 1172210687345970, + "layout": { + "height": 3, + "width": 4, + "x": 8, + "y": 24 + } + }, + { + "definition": { + "legend_columns": [ + "avg", + "min", + "max", + "value", + "sum" + ], + "legend_layout": "auto", + "requests": [ + { + "display_type": "bars", + "formulas": [ + { + "alias": "Endpoints Without Pods", + "formula": "query1" + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "sum:istio.mesh.agent.endpoint_no_pod{$cluster_name,$istio_proxy_image} by {kube_cluster_name}" + } + ], + "response_format": "timeseries", + "style": { + "line_type": "solid", + "line_width": "normal", + "order_by": "values", + "palette": "dog_classic" + } + } + ], + "show_legend": true, + "title": "Endpoints Without Pods", + "title_align": "left", + "title_size": "16", + "type": "timeseries" + }, + "id": 6256000834898634, + "layout": { + "height": 3, + "width": 4, + "x": 0, + "y": 27 + } + }, + { + "definition": { + "legend_columns": [ + "avg", + "min", + "max", + "value", + "sum" + ], + "legend_layout": "auto", + "requests": [ + { + "display_type": "bars", + "formulas": [ + { + "alias": "XDS Endpoints", + "formula": "query1" + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "sum:istio.mesh.agent.pilot.xds{$cluster_name,$istio_proxy_image} by {kube_cluster_name}" + } + ], + "response_format": "timeseries", + "style": { + "line_type": "solid", + "line_width": "normal", + "order_by": "values", + "palette": "dog_classic" + } + } + ], + "show_legend": true, + "title": "Endpoints Using xDS", + "title_align": "left", + "title_size": "16", + "type": "timeseries" + }, + "id": 6571235405199462, + "layout": { + "height": 3, + "width": 4, + "x": 4, + "y": 27 + } + }, + { + "definition": { + "legend_columns": [ + "avg", + "min", + "max", + "value", + "sum" + ], + "legend_layout": "auto", + "requests": [ + { + "display_type": "bars", + "formulas": [ + { + "alias": "Endpoint Not Ready", + "formula": "query1" + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "sum:istio.mesh.agent.pilot.endpoint_not_ready{$cluster_name,$istio_proxy_image} by {kube_cluster_name}" + } + ], + "response_format": "timeseries", + "style": { + "line_type": "solid", + "line_width": "normal", + "order_by": "values", + "palette": "dog_classic" + } + } + ], + "show_legend": true, + "title": "Endpoints Not Ready", + "title_align": "left", + "title_size": "16", + "type": "timeseries" + }, + "id": 7474998809796252, + "layout": { + "height": 3, + "width": 4, + "x": 8, + "y": 27 + } + }, + { + "definition": { + "legend_columns": [ + "avg", + "min", + "max", + "value", + "sum" + ], + "legend_layout": "auto", + "requests": [ + { + "display_type": "bars", + "formulas": [ + { + "alias": "Outgoing Requests", + "formula": "query1" + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "sum:istio.mesh.agent.num_outgoing_requests.total{$cluster_name,$istio_proxy_image} by {request_type,kube_cluster_name}.as_count()" + } + ], + "response_format": "timeseries", + "style": { + "line_type": "solid", + "line_width": "normal", + "order_by": "values", + "palette": "dog_classic" + } + } + ], + "show_legend": true, + "title": "Outgoing Requests", + "title_align": "left", + "title_size": "16", + "type": "timeseries" + }, + "id": 3539640039648994, + "layout": { + "height": 3, + "width": 4, + "x": 0, + "y": 30 + } + }, + { + "definition": { + "legend_columns": [ + "avg", + "min", + "max", + "value", + "sum" + ], + "legend_layout": "auto", + "requests": [ + { + "display_type": "bars", + "formulas": [ + { + "alias": "Outgoing Latency", + "formula": "query1", + "number_format": { + "unit": { + "type": "canonical_unit", + "unit_name": "millisecond" + } + } + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "sum:istio.mesh.agent.outgoing_latency.total{$cluster_name,$istio_proxy_image} by {request_type,kube_cluster_name}.as_count()" + } + ], + "response_format": "timeseries", + "style": { + "line_type": "solid", + "line_width": "normal", + "order_by": "values", + "palette": "dog_classic" + } + } + ], + "show_legend": true, + "title": "Outgoing Latency", + "title_align": "left", + "title_size": "16", + "type": "timeseries" + }, + "id": 6015274400415964, + "layout": { + "height": 3, + "width": 4, + "x": 4, + "y": 30 + } + }, + { + "definition": { + "legend_columns": [ + "avg", + "min", + "max", + "value", + "sum" + ], + "legend_layout": "auto", + "requests": [ + { + "display_type": "line", + "formulas": [ + { + "alias": "Inbound Listeners", + "formula": "query1" + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "avg:istio.mesh.agent.conflict.inbound_listener{$cluster_name,$istio_proxy_image} by {kube_cluster_name}" + } + ], + "response_format": "timeseries", + "style": { + "line_type": "solid", + "line_width": "normal", + "order_by": "values", + "palette": "dog_classic" + } + } + ], + "show_legend": true, + "title": "Conflicting Inbound Listeners", + "title_align": "left", + "title_size": "16", + "type": "timeseries" + }, + "id": 1330553722561094, + "layout": { + "height": 3, + "width": 4, + "x": 8, + "y": 30 + } + }, + { + "definition": { + "legend_columns": [ + "avg", + "min", + "max", + "value", + "sum" + ], + "legend_layout": "auto", + "requests": [ + { + "display_type": "line", + "formulas": [ + { + "alias": "Startup Duration", + "formula": "query1" + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "avg:istio.mesh.agent.startup_duration_seconds{$cluster_name,$istio_proxy_image} by {kube_cluster_name}" + } + ], + "response_format": "timeseries", + "style": { + "line_type": "solid", + "line_width": "normal", + "order_by": "values", + "palette": "dog_classic" + } + } + ], + "show_legend": true, + "title": "Startup Duration", + "title_align": "left", + "title_size": "16", + "type": "timeseries" + }, + "id": 8892246811042650, + "layout": { + "height": 3, + "width": 4, + "x": 0, + "y": 33 + } + }, + { + "definition": { + "legend_columns": [ + "avg", + "min", + "max", + "value", + "sum" + ], + "legend_layout": "auto", + "requests": [ + { + "display_type": "line", + "formulas": [ + { + "alias": "Duplicate Envoy Clusters", + "formula": "query1" + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "avg:istio.mesh.agent.pilot.duplicate_envoy_clusters{$cluster_name,$istio_proxy_image} by {kube_cluster_name}" + } + ], + "response_format": "timeseries", + "style": { + "line_type": "solid", + "line_width": "normal", + "order_by": "values", + "palette": "dog_classic" + } + } + ], + "show_legend": true, + "title": "Duplicate Envoy Clusters", + "title_align": "left", + "title_size": "16", + "type": "timeseries" + }, + "id": 946034440299884, + "layout": { + "height": 3, + "width": 4, + "x": 4, + "y": 33 + } + }, + { + "definition": { + "legend_columns": [ + "avg", + "min", + "max", + "value", + "sum" + ], + "legend_layout": "auto", + "requests": [ + { + "display_type": "line", + "formulas": [ + { + "alias": "Average XDS Config Size", + "formula": "query1 / query2", + "number_format": { + "unit": { + "label": "bytes", + "type": "custom_unit_label" + } + } + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "sum:istio.mesh.agent.pilot.xds.config_size_bytes.sum.total{$cluster_name,$istio_proxy_image} by {kube_cluster_name}.as_count()" + }, + { + "data_source": "metrics", + "name": "query2", + "query": "sum:istio.mesh.agent.pilot.xds.config_size_bytes.count.total{upper_bound:none,$cluster_name,$istio_proxy_image} by {kube_cluster_name}.as_count()" + } + ], + "response_format": "timeseries", + "style": { + "line_type": "solid", + "line_width": "normal", + "order_by": "values", + "palette": "dog_classic" + } + } + ], + "show_legend": true, + "title": "Configuration Sized Pushed to Clients", + "title_align": "left", + "title_size": "16", + "type": "timeseries" + }, + "id": 1825657963147926, + "layout": { + "height": 3, + "width": 4, + "x": 8, + "y": 33 + } + }, + { + "definition": { + "requests": [ + { + "formulas": [ + { + "formula": "query1" + } + ], + "queries": [ + { + "aggregator": "last", + "data_source": "metrics", + "name": "query1", + "query": "avg:istio.mesh.agent.pilot.no_ip{$cluster_name,$istio_proxy_image} by {kube_cluster_name}" + } + ], + "response_format": "scalar", + "sort": { + "count": 10, + "order_by": [ + { + "index": 0, + "order": "desc", + "type": "formula" + } + ] + } + } + ], + "style": { + "display": { + "legend": "automatic", + "type": "stacked" + } + }, + "title": "Pods Not in Endpoint Table", + "title_align": "left", + "title_size": "16", + "type": "toplist" + }, + "id": 6664882467823550, + "layout": { + "height": 4, + "width": 2, + "x": 0, + "y": 36 + } + }, + { + "definition": { + "requests": [ + { + "formulas": [ + { + "formula": "query1" + } + ], + "queries": [ + { + "aggregator": "avg", + "data_source": "metrics", + "name": "query1", + "query": "avg:istio.mesh.agent.pilot.destrule_subsets{$cluster_name,$istio_proxy_image} by {kube_cluster_name}" + } + ], + "response_format": "scalar", + "sort": { + "count": 10, + "order_by": [ + { + "index": 0, + "order": "desc", + "type": "formula" + } + ] + } + } + ], + "style": { + "display": { + "legend": "automatic", + "type": "stacked" + } + }, + "title": "Duplicate Subsets", + "title_align": "left", + "title_size": "16", + "type": "toplist" + }, + "id": 2764452015327688, + "layout": { + "height": 4, + "width": 2, + "x": 2, + "y": 36 + } + }, + { + "definition": { + "requests": [ + { + "formulas": [ + { + "formula": "query1" + } + ], + "queries": [ + { + "aggregator": "last", + "data_source": "metrics", + "name": "query1", + "query": "avg:istio.mesh.agent.pilot.eds_no_instances{$cluster_name,$istio_proxy_image} by {kube_cluster_name}" + } + ], + "response_format": "scalar", + "sort": { + "count": 10, + "order_by": [ + { + "index": 0, + "order": "desc", + "type": "formula" + } + ] + } + } + ], + "style": { + "display": { + "legend": "automatic", + "type": "stacked" + } + }, + "title": "Clusters without EDS Instances", + "title_align": "left", + "title_size": "16", + "type": "toplist" + }, + "id": 355379840024986, + "layout": { + "height": 4, + "width": 2, + "x": 4, + "y": 36 + } + }, + { + "definition": { + "requests": [ + { + "formulas": [ + { + "formula": "query1" + } + ], + "queries": [ + { + "aggregator": "last", + "data_source": "metrics", + "name": "query1", + "query": "avg:istio.mesh.agent.conflict.outbound_listener.tcp_over_current_tcp{$cluster_name,$istio_proxy_image} by {kube_cluster_name}" + } + ], + "response_format": "scalar", + "sort": { + "count": 10, + "order_by": [ + { + "index": 0, + "order": "desc", + "type": "formula" + } + ] + } + } + ], + "style": { + "display": { + "legend": "automatic", + "type": "stacked" + } + }, + "title": "Conflicting TCP Listeners", + "title_align": "left", + "title_size": "16", + "type": "toplist" + }, + "id": 7634492193551210, + "layout": { + "height": 4, + "width": 2, + "x": 6, + "y": 36 + } + }, + { + "definition": { + "requests": [ + { + "formulas": [ + { + "formula": "query1" + } + ], + "queries": [ + { + "aggregator": "last", + "data_source": "metrics", + "name": "query1", + "query": "avg:istio.mesh.agent.pilot.virt_services{$cluster_name,$istio_proxy_image} by {kube_cluster_name}" + } + ], + "response_format": "scalar", + "sort": { + "count": 10, + "order_by": [ + { + "index": 0, + "order": "desc", + "type": "formula" + } + ] + } + } + ], + "style": { + "display": { + "legend": "automatic", + "type": "stacked" + } + }, + "title": "Virtual Services", + "title_align": "left", + "title_size": "16", + "type": "toplist" + }, + "id": 6135763419099144, + "layout": { + "height": 4, + "width": 2, + "x": 8, + "y": 36 + } + }, + { + "definition": { + "requests": [ + { + "formulas": [ + { + "formula": "query1" + } + ], + "queries": [ + { + "aggregator": "last", + "data_source": "metrics", + "name": "query1", + "query": "avg:istio.mesh.agent.wasm_cache_entries{$cluster_name,$istio_proxy_image} by {kube_cluster_name}" + } + ], + "response_format": "scalar", + "sort": { + "count": 10, + "order_by": [ + { + "index": 0, + "order": "desc", + "type": "formula" + } + ] + } + } + ], + "style": { + "display": { + "legend": "automatic", + "type": "stacked" + } + }, + "title": "Wasm Fetch Cache Entries", + "title_align": "left", + "title_size": "16", + "type": "toplist" + }, + "id": 4103579371186386, + "layout": { + "height": 4, + "width": 2, + "x": 10, + "y": 36 + } + }, + { + "definition": { + "background_color": "blue", + "content": "Istio Proxy Logs\n", + "font_size": "18", + "has_padding": true, + "show_tick": false, + "text_align": "center", + "tick_edge": "left", + "tick_pos": "50%", + "type": "note", + "vertical_align": "center" + }, + "id": 465279829336880, + "layout": { + "height": 1, + "width": 12, + "x": 0, + "y": 40 + } + }, + { + "definition": { + "requests": [ + { + "columns": [ + { + "field": "status_line", + "width": "auto" + }, + { + "field": "matches", + "width": "auto" + }, + { + "field": "volume", + "width": "auto" + }, + { + "field": "status", + "width": "auto" + }, + { + "field": "service", + "width": "auto" + }, + { + "field": "message", + "width": "auto" + } + ], + "query": { + "data_source": "logs_pattern_stream", + "group_by": [ + { + "facet": "status" + }, + { + "facet": "service" + } + ], + "indexes": [], + "query_string": "source:$istio_proxy_image.value kube_cluster_name:$cluster_name.value " + }, + "response_format": "event_list" + } + ], + "title": "Istio Proxy Logs", + "title_align": "left", + "title_size": "16", + "type": "list_stream" + }, + "id": 4102134957051482, + "layout": { + "height": 5, + "width": 12, + "x": 0, + "y": 41 + } + } + ] + }, + "id": 2451034760154916, + "layout": { + "height": 47, + "is_column_break": true, + "width": 12, + "x": 0, + "y": 52 + } + } + ] +} \ No newline at end of file diff --git a/istio/assets/monitors/request_error_rate.json b/istio/assets/monitors/request_error_rate.json new file mode 100644 index 0000000000000..07cc1745124ac --- /dev/null +++ b/istio/assets/monitors/request_error_rate.json @@ -0,0 +1,33 @@ +{ + "version": 2, + "created_at": "2024-07-08", + "last_updated_at": "2024-07-08", + "title": "Istio Proxy Requests Error Percentage", + "tags": [ + "integration:istio" + ], + "description": "The request count metric shows throughput between services in your mesh, incrementing with each HTTP or gRPC request an Envoy sidecar receives. This monitor alerts if over 40% of requests get 5xx, 4xx, or 0 status codes in the last 15 minutes, indicating possible issues.", + "definition": { + "message": "{{#is_alert}}\n\nIstio Proxy requests are reporting a high error percentage in {{cluster_name.name}}. This indicates that greater than 40% of requests are getting a status code of 5xx, 4xx or 0 in the last 15 minutes.\n\n{{/is_alert}}", + "name": "[Istio] Proxy Requests Error Percentage", + "options": { + "thresholds": { + "critical": 40 + }, + "notify_audit": false, + "include_tags": true, + "new_group_delay": 60, + "silenced": {}, + "avalanche_window": 10, + "notify_no_data": false, + "renotify_interval": 0, + "require_full_window": false + }, + "priority": null, + "query": "sum(last_15m):sum:istio.mesh.request.count.total{(response_code:4* OR response_code:5* OR response_code:0)} by {cluster_name}.as_count() / sum:istio.mesh.request.count.total{*} by {cluster_name}.as_count() * 100 > 40", + "tags": [ + "integration:istio" + ], + "type": "query alert" + } +} \ No newline at end of file diff --git a/istio/assets/monitors/xds_push_error_rate.json b/istio/assets/monitors/xds_push_error_rate.json new file mode 100644 index 0000000000000..0e3139639a727 --- /dev/null +++ b/istio/assets/monitors/xds_push_error_rate.json @@ -0,0 +1,33 @@ +{ + "version": 2, + "created_at": "2024-07-08", + "last_updated_at": "2024-07-08", + "title": "Istio xDS Push Error Rate", + "tags": [ + "integration:istio" + ], + "description": "Pilot pushes configuration changes to Envoy proxies using Envoy’s xDS APIs. This monitor alerts you when the xDS push error rate is high, indicating that Istio may be having trouble with xDS messages, which can impact the stability and performance of the entire service mesh.", + "definition": { + "message": "{{#is_alert}}\n\nIstio is reporting a high xDS push error rate of {{value}}% in the last 15 minutes in {{kube_cluster_name.name}}. This could indicate that Istio is having trouble creating or pushing messages to the different xDS APIs.\n\n{{/is_alert}}", + "name": "[Istio] xDS Push Error Rate", + "options": { + "thresholds": { + "critical": 25 + }, + "notify_audit": false, + "include_tags": true, + "new_group_delay": 60, + "silenced": {}, + "avalanche_window": 10, + "notify_no_data": false, + "renotify_interval": 0, + "require_full_window": false + }, + "priority": null, + "query": "sum(last_15m):sum:istio.pilot.xds.pushes.count{type:*senderr} by {kube_cluster_name}.as_count() / sum:istio.pilot.xds.pushes.count{*} by {kube_cluster_name}.as_count() * 100 > 25", + "tags": [ + "integration:istio" + ], + "type": "query alert" + } +} \ No newline at end of file diff --git a/istio/manifest.json b/istio/manifest.json index b3c362f058807..268a4d1f24afe 100644 --- a/istio/manifest.json +++ b/istio/manifest.json @@ -60,10 +60,13 @@ "dashboards": { "Istio base dashboard": "assets/dashboards/istio_overview.json", "Istio Overview 1.5": "assets/dashboards/istio_1_5_overview.json", - "Istio Overview 1.5 (OpenMetrics)": "assets/dashboards/istio_1_5_openmetrics_overview.json" + "Istio Overview 1.5 (OpenMetrics)": "assets/dashboards/istio_1_5_openmetrics_overview.json", + "Istio Overview": "assets/dashboards/overview.json" }, "monitors": { - "Failed sidecar injections": "assets/monitors/failed_sidecar_injection.json" + "Failed sidecar injections": "assets/monitors/failed_sidecar_injection.json", + "xDS Push Error Rate": "assets/monitors/xds_push_error_rate.json", + "Istio Proxy Requests Error Percentage": "assets/monitors/request_error_rate.json" }, "saved_views": { "Istio Overview": "assets/saved_views/istio_overview.json",