From 11cdd5e24ed9528361af68351ddbf0053b2e79eb Mon Sep 17 00:00:00 2001 From: Sinny Kumari Date: Wed, 28 Sep 2022 12:48:11 +0200 Subject: [PATCH] Add os_image_url_override metric from MCO to telemetry --- Documentation/data-collection.md | 5 +++++ Documentation/sample-metrics.md | 2 +- Documentation/telemetry/telemeter_query | 2 +- manifests/0000_50_cluster-monitoring-operator_04-config.yaml | 5 +++++ 4 files changed, 12 insertions(+), 2 deletions(-) diff --git a/Documentation/data-collection.md b/Documentation/data-collection.md index 10ae1afd88..886dd5c9d8 100644 --- a/Documentation/data-collection.md +++ b/Documentation/data-collection.md @@ -847,6 +847,11 @@ data: # ocs_advanced_feature_usage shows whether the cluster is using any of the advanced # features, like external cluster mode or KMS/PV Encryption etc - '{__name__="ocs_advanced_feature_usage"}' + # + # owners: (https://github.com/openshift/machine-config-operator/) + # + # os_image_url_override:sum tells whether cluster is using default OS image or has been overridden by user + - '{__name__="os_image_url_override:sum"}' kind: ConfigMap metadata: name: telemetry-config diff --git a/Documentation/sample-metrics.md b/Documentation/sample-metrics.md index fac9317d5f..928d32e8cc 100644 --- a/Documentation/sample-metrics.md +++ b/Documentation/sample-metrics.md @@ -13,7 +13,7 @@ return the full set of metrics that the Telemeter client captures: [embedmd]:# (telemetry/telemeter_query txt) ```txt -{__name__=~"cluster:usage:.*|count:up0|count:up1|cluster_version|cluster_version_available_updates|cluster_version_capability|cluster_operator_up|cluster_operator_conditions|cluster_version_payload|cluster_installer|cluster_infrastructure_provider|cluster_feature_set|instance:etcd_object_counts:sum|ALERTS|code:apiserver_request_total:rate:sum|cluster:capacity_cpu_cores:sum|cluster:capacity_memory_bytes:sum|cluster:cpu_usage_cores:sum|cluster:memory_usage_bytes:sum|openshift:cpu_usage_cores:sum|openshift:memory_usage_bytes:sum|workload:cpu_usage_cores:sum|workload:memory_usage_bytes:sum|cluster:virt_platform_nodes:sum|cluster:node_instance_type_count:sum|cnv:vmi_status_running:count|cluster:vmi_request_cpu_cores:sum|node_role_os_version_machine:cpu_capacity_cores:sum|node_role_os_version_machine:cpu_capacity_sockets:sum|subscription_sync_total|olm_resolution_duration_seconds|csv_succeeded|csv_abnormal|cluster:kube_persistentvolumeclaim_resource_requests_storage_bytes:provisioner:sum|cluster:kubelet_volume_stats_used_bytes:provisioner:sum|ceph_cluster_total_bytes|ceph_cluster_total_used_raw_bytes|ceph_health_status|odf_system_raw_capacity_total_bytes|odf_system_raw_capacity_used_bytes|odf_system_health_status|job:ceph_osd_metadata:count|job:kube_pv:count|job:odf_system_pvs:count|job:ceph_pools_iops:total|job:ceph_pools_iops_bytes:total|job:ceph_versions_running:count|job:noobaa_total_unhealthy_buckets:sum|job:noobaa_bucket_count:sum|job:noobaa_total_object_count:sum|odf_system_bucket_count|odf_system_objects_total|noobaa_accounts_num|noobaa_total_usage|console_url|cluster:ovnkube_master_egress_routing_via_host:max|cluster:network_attachment_definition_instances:max|cluster:network_attachment_definition_enabled_instance_up:max|cluster:ingress_controller_aws_nlb_active:sum|insightsclient_request_send_total|cam_app_workload_migrations|cluster:apiserver_current_inflight_requests:sum:max_over_time:2m|cluster:alertmanager_integrations:max|cluster:telemetry_selected_series:count|openshift:prometheus_tsdb_head_series:sum|openshift:prometheus_tsdb_head_samples_appended_total:sum|monitoring:container_memory_working_set_bytes:sum|namespace_job:scrape_series_added:topk3_sum1h|namespace_job:scrape_samples_post_metric_relabeling:topk3|monitoring:haproxy_server_http_responses_total:sum|rhmi_status|cluster_legacy_scheduler_policy|cluster_master_schedulable|che_workspace_status|che_workspace_started_total|che_workspace_failure_total|che_workspace_start_time_seconds_sum|che_workspace_start_time_seconds_count|cco_credentials_mode|cluster:kube_persistentvolume_plugin_type_counts:sum|visual_web_terminal_sessions_total|acm_managed_cluster_info|cluster:vsphere_vcenter_info:sum|cluster:vsphere_esxi_version_total:sum|cluster:vsphere_node_hw_version_total:sum|openshift:build_by_strategy:sum|rhods_aggregate_availability|rhods_total_users|instance:etcd_disk_wal_fsync_duration_seconds:histogram_quantile|instance:etcd_mvcc_db_total_size_in_bytes:sum|instance:etcd_network_peer_round_trip_time_seconds:histogram_quantile|instance:etcd_mvcc_db_total_size_in_use_in_bytes:sum|instance:etcd_disk_backend_commit_duration_seconds:histogram_quantile|jaeger_operator_instances_storage_types|jaeger_operator_instances_strategies|jaeger_operator_instances_agent_strategies|appsvcs:cores_by_product:sum|nto_custom_profiles:count|openshift_csi_share_configmap|openshift_csi_share_secret|openshift_csi_share_mount_failures_total|openshift_csi_share_mount_requests_total|cluster:velero_backup_total:max|cluster:velero_restore_total:max|eo_es_storage_info|eo_es_redundancy_policy_info|eo_es_defined_delete_namespaces_total|eo_es_misconfigured_memory_resources_info|cluster:eo_es_data_nodes_total:max|cluster:eo_es_documents_created_total:sum|cluster:eo_es_documents_deleted_total:sum|pod:eo_es_shards_total:max|eo_es_cluster_management_state_info|imageregistry:imagestreamtags_count:sum|imageregistry:operations_count:sum|log_logging_info|log_collector_error_count_total|log_forwarder_pipeline_info|log_forwarder_input_info|log_forwarder_output_info|cluster:log_collected_bytes_total:sum|cluster:log_logged_bytes_total:sum|cluster:kata_monitor_running_shim_count:sum|platform:hypershift_hostedclusters:max|platform:hypershift_nodepools:max|namespace:noobaa_unhealthy_bucket_claims:max|namespace:noobaa_buckets_claims:max|namespace:noobaa_unhealthy_namespace_resources:max|namespace:noobaa_namespace_resources:max|namespace:noobaa_unhealthy_namespace_buckets:max|namespace:noobaa_namespace_buckets:max|namespace:noobaa_accounts:max|namespace:noobaa_usage:max|namespace:noobaa_system_health_status:max|ocs_advanced_feature_usage",alertstate=~"firing|",quantile=~"0.99|0.99|0.99|",system_type=~"OCS|OCS|",system_vendor=~"Red Hat|Red Hat|"} +{__name__=~"cluster:usage:.*|count:up0|count:up1|cluster_version|cluster_version_available_updates|cluster_version_capability|cluster_operator_up|cluster_operator_conditions|cluster_version_payload|cluster_installer|cluster_infrastructure_provider|cluster_feature_set|instance:etcd_object_counts:sum|ALERTS|code:apiserver_request_total:rate:sum|cluster:capacity_cpu_cores:sum|cluster:capacity_memory_bytes:sum|cluster:cpu_usage_cores:sum|cluster:memory_usage_bytes:sum|openshift:cpu_usage_cores:sum|openshift:memory_usage_bytes:sum|workload:cpu_usage_cores:sum|workload:memory_usage_bytes:sum|cluster:virt_platform_nodes:sum|cluster:node_instance_type_count:sum|cnv:vmi_status_running:count|cluster:vmi_request_cpu_cores:sum|node_role_os_version_machine:cpu_capacity_cores:sum|node_role_os_version_machine:cpu_capacity_sockets:sum|subscription_sync_total|olm_resolution_duration_seconds|csv_succeeded|csv_abnormal|cluster:kube_persistentvolumeclaim_resource_requests_storage_bytes:provisioner:sum|cluster:kubelet_volume_stats_used_bytes:provisioner:sum|ceph_cluster_total_bytes|ceph_cluster_total_used_raw_bytes|ceph_health_status|odf_system_raw_capacity_total_bytes|odf_system_raw_capacity_used_bytes|odf_system_health_status|job:ceph_osd_metadata:count|job:kube_pv:count|job:odf_system_pvs:count|job:ceph_pools_iops:total|job:ceph_pools_iops_bytes:total|job:ceph_versions_running:count|job:noobaa_total_unhealthy_buckets:sum|job:noobaa_bucket_count:sum|job:noobaa_total_object_count:sum|odf_system_bucket_count|odf_system_objects_total|noobaa_accounts_num|noobaa_total_usage|console_url|cluster:ovnkube_master_egress_routing_via_host:max|cluster:network_attachment_definition_instances:max|cluster:network_attachment_definition_enabled_instance_up:max|cluster:ingress_controller_aws_nlb_active:sum|insightsclient_request_send_total|cam_app_workload_migrations|cluster:apiserver_current_inflight_requests:sum:max_over_time:2m|cluster:alertmanager_integrations:max|cluster:telemetry_selected_series:count|openshift:prometheus_tsdb_head_series:sum|openshift:prometheus_tsdb_head_samples_appended_total:sum|monitoring:container_memory_working_set_bytes:sum|namespace_job:scrape_series_added:topk3_sum1h|namespace_job:scrape_samples_post_metric_relabeling:topk3|monitoring:haproxy_server_http_responses_total:sum|rhmi_status|cluster_legacy_scheduler_policy|cluster_master_schedulable|che_workspace_status|che_workspace_started_total|che_workspace_failure_total|che_workspace_start_time_seconds_sum|che_workspace_start_time_seconds_count|cco_credentials_mode|cluster:kube_persistentvolume_plugin_type_counts:sum|visual_web_terminal_sessions_total|acm_managed_cluster_info|cluster:vsphere_vcenter_info:sum|cluster:vsphere_esxi_version_total:sum|cluster:vsphere_node_hw_version_total:sum|openshift:build_by_strategy:sum|rhods_aggregate_availability|rhods_total_users|instance:etcd_disk_wal_fsync_duration_seconds:histogram_quantile|instance:etcd_mvcc_db_total_size_in_bytes:sum|instance:etcd_network_peer_round_trip_time_seconds:histogram_quantile|instance:etcd_mvcc_db_total_size_in_use_in_bytes:sum|instance:etcd_disk_backend_commit_duration_seconds:histogram_quantile|jaeger_operator_instances_storage_types|jaeger_operator_instances_strategies|jaeger_operator_instances_agent_strategies|appsvcs:cores_by_product:sum|nto_custom_profiles:count|openshift_csi_share_configmap|openshift_csi_share_secret|openshift_csi_share_mount_failures_total|openshift_csi_share_mount_requests_total|cluster:velero_backup_total:max|cluster:velero_restore_total:max|eo_es_storage_info|eo_es_redundancy_policy_info|eo_es_defined_delete_namespaces_total|eo_es_misconfigured_memory_resources_info|cluster:eo_es_data_nodes_total:max|cluster:eo_es_documents_created_total:sum|cluster:eo_es_documents_deleted_total:sum|pod:eo_es_shards_total:max|eo_es_cluster_management_state_info|imageregistry:imagestreamtags_count:sum|imageregistry:operations_count:sum|log_logging_info|log_collector_error_count_total|log_forwarder_pipeline_info|log_forwarder_input_info|log_forwarder_output_info|cluster:log_collected_bytes_total:sum|cluster:log_logged_bytes_total:sum|cluster:kata_monitor_running_shim_count:sum|platform:hypershift_hostedclusters:max|platform:hypershift_nodepools:max|namespace:noobaa_unhealthy_bucket_claims:max|namespace:noobaa_buckets_claims:max|namespace:noobaa_unhealthy_namespace_resources:max|namespace:noobaa_namespace_resources:max|namespace:noobaa_unhealthy_namespace_buckets:max|namespace:noobaa_namespace_buckets:max|namespace:noobaa_accounts:max|namespace:noobaa_usage:max|namespace:noobaa_system_health_status:max|ocs_advanced_feature_usage|os_image_url_override:sum",alertstate=~"firing|",quantile=~"0.99|0.99|0.99|",system_type=~"OCS|OCS|",system_vendor=~"Red Hat|Red Hat|"} ``` For reference, here is an example response produced by a running OpenShift cluster: diff --git a/Documentation/telemetry/telemeter_query b/Documentation/telemetry/telemeter_query index f17ce91a5b..9c3da96fc3 100644 --- a/Documentation/telemetry/telemeter_query +++ b/Documentation/telemetry/telemeter_query @@ -1 +1 @@ -{__name__=~"cluster:usage:.*|count:up0|count:up1|cluster_version|cluster_version_available_updates|cluster_version_capability|cluster_operator_up|cluster_operator_conditions|cluster_version_payload|cluster_installer|cluster_infrastructure_provider|cluster_feature_set|instance:etcd_object_counts:sum|ALERTS|code:apiserver_request_total:rate:sum|cluster:capacity_cpu_cores:sum|cluster:capacity_memory_bytes:sum|cluster:cpu_usage_cores:sum|cluster:memory_usage_bytes:sum|openshift:cpu_usage_cores:sum|openshift:memory_usage_bytes:sum|workload:cpu_usage_cores:sum|workload:memory_usage_bytes:sum|cluster:virt_platform_nodes:sum|cluster:node_instance_type_count:sum|cnv:vmi_status_running:count|cluster:vmi_request_cpu_cores:sum|node_role_os_version_machine:cpu_capacity_cores:sum|node_role_os_version_machine:cpu_capacity_sockets:sum|subscription_sync_total|olm_resolution_duration_seconds|csv_succeeded|csv_abnormal|cluster:kube_persistentvolumeclaim_resource_requests_storage_bytes:provisioner:sum|cluster:kubelet_volume_stats_used_bytes:provisioner:sum|ceph_cluster_total_bytes|ceph_cluster_total_used_raw_bytes|ceph_health_status|odf_system_raw_capacity_total_bytes|odf_system_raw_capacity_used_bytes|odf_system_health_status|job:ceph_osd_metadata:count|job:kube_pv:count|job:odf_system_pvs:count|job:ceph_pools_iops:total|job:ceph_pools_iops_bytes:total|job:ceph_versions_running:count|job:noobaa_total_unhealthy_buckets:sum|job:noobaa_bucket_count:sum|job:noobaa_total_object_count:sum|odf_system_bucket_count|odf_system_objects_total|noobaa_accounts_num|noobaa_total_usage|console_url|cluster:ovnkube_master_egress_routing_via_host:max|cluster:network_attachment_definition_instances:max|cluster:network_attachment_definition_enabled_instance_up:max|cluster:ingress_controller_aws_nlb_active:sum|insightsclient_request_send_total|cam_app_workload_migrations|cluster:apiserver_current_inflight_requests:sum:max_over_time:2m|cluster:alertmanager_integrations:max|cluster:telemetry_selected_series:count|openshift:prometheus_tsdb_head_series:sum|openshift:prometheus_tsdb_head_samples_appended_total:sum|monitoring:container_memory_working_set_bytes:sum|namespace_job:scrape_series_added:topk3_sum1h|namespace_job:scrape_samples_post_metric_relabeling:topk3|monitoring:haproxy_server_http_responses_total:sum|rhmi_status|cluster_legacy_scheduler_policy|cluster_master_schedulable|che_workspace_status|che_workspace_started_total|che_workspace_failure_total|che_workspace_start_time_seconds_sum|che_workspace_start_time_seconds_count|cco_credentials_mode|cluster:kube_persistentvolume_plugin_type_counts:sum|visual_web_terminal_sessions_total|acm_managed_cluster_info|cluster:vsphere_vcenter_info:sum|cluster:vsphere_esxi_version_total:sum|cluster:vsphere_node_hw_version_total:sum|openshift:build_by_strategy:sum|rhods_aggregate_availability|rhods_total_users|instance:etcd_disk_wal_fsync_duration_seconds:histogram_quantile|instance:etcd_mvcc_db_total_size_in_bytes:sum|instance:etcd_network_peer_round_trip_time_seconds:histogram_quantile|instance:etcd_mvcc_db_total_size_in_use_in_bytes:sum|instance:etcd_disk_backend_commit_duration_seconds:histogram_quantile|jaeger_operator_instances_storage_types|jaeger_operator_instances_strategies|jaeger_operator_instances_agent_strategies|appsvcs:cores_by_product:sum|nto_custom_profiles:count|openshift_csi_share_configmap|openshift_csi_share_secret|openshift_csi_share_mount_failures_total|openshift_csi_share_mount_requests_total|cluster:velero_backup_total:max|cluster:velero_restore_total:max|eo_es_storage_info|eo_es_redundancy_policy_info|eo_es_defined_delete_namespaces_total|eo_es_misconfigured_memory_resources_info|cluster:eo_es_data_nodes_total:max|cluster:eo_es_documents_created_total:sum|cluster:eo_es_documents_deleted_total:sum|pod:eo_es_shards_total:max|eo_es_cluster_management_state_info|imageregistry:imagestreamtags_count:sum|imageregistry:operations_count:sum|log_logging_info|log_collector_error_count_total|log_forwarder_pipeline_info|log_forwarder_input_info|log_forwarder_output_info|cluster:log_collected_bytes_total:sum|cluster:log_logged_bytes_total:sum|cluster:kata_monitor_running_shim_count:sum|platform:hypershift_hostedclusters:max|platform:hypershift_nodepools:max|namespace:noobaa_unhealthy_bucket_claims:max|namespace:noobaa_buckets_claims:max|namespace:noobaa_unhealthy_namespace_resources:max|namespace:noobaa_namespace_resources:max|namespace:noobaa_unhealthy_namespace_buckets:max|namespace:noobaa_namespace_buckets:max|namespace:noobaa_accounts:max|namespace:noobaa_usage:max|namespace:noobaa_system_health_status:max|ocs_advanced_feature_usage",alertstate=~"firing|",quantile=~"0.99|0.99|0.99|",system_type=~"OCS|OCS|",system_vendor=~"Red Hat|Red Hat|"} +{__name__=~"cluster:usage:.*|count:up0|count:up1|cluster_version|cluster_version_available_updates|cluster_version_capability|cluster_operator_up|cluster_operator_conditions|cluster_version_payload|cluster_installer|cluster_infrastructure_provider|cluster_feature_set|instance:etcd_object_counts:sum|ALERTS|code:apiserver_request_total:rate:sum|cluster:capacity_cpu_cores:sum|cluster:capacity_memory_bytes:sum|cluster:cpu_usage_cores:sum|cluster:memory_usage_bytes:sum|openshift:cpu_usage_cores:sum|openshift:memory_usage_bytes:sum|workload:cpu_usage_cores:sum|workload:memory_usage_bytes:sum|cluster:virt_platform_nodes:sum|cluster:node_instance_type_count:sum|cnv:vmi_status_running:count|cluster:vmi_request_cpu_cores:sum|node_role_os_version_machine:cpu_capacity_cores:sum|node_role_os_version_machine:cpu_capacity_sockets:sum|subscription_sync_total|olm_resolution_duration_seconds|csv_succeeded|csv_abnormal|cluster:kube_persistentvolumeclaim_resource_requests_storage_bytes:provisioner:sum|cluster:kubelet_volume_stats_used_bytes:provisioner:sum|ceph_cluster_total_bytes|ceph_cluster_total_used_raw_bytes|ceph_health_status|odf_system_raw_capacity_total_bytes|odf_system_raw_capacity_used_bytes|odf_system_health_status|job:ceph_osd_metadata:count|job:kube_pv:count|job:odf_system_pvs:count|job:ceph_pools_iops:total|job:ceph_pools_iops_bytes:total|job:ceph_versions_running:count|job:noobaa_total_unhealthy_buckets:sum|job:noobaa_bucket_count:sum|job:noobaa_total_object_count:sum|odf_system_bucket_count|odf_system_objects_total|noobaa_accounts_num|noobaa_total_usage|console_url|cluster:ovnkube_master_egress_routing_via_host:max|cluster:network_attachment_definition_instances:max|cluster:network_attachment_definition_enabled_instance_up:max|cluster:ingress_controller_aws_nlb_active:sum|insightsclient_request_send_total|cam_app_workload_migrations|cluster:apiserver_current_inflight_requests:sum:max_over_time:2m|cluster:alertmanager_integrations:max|cluster:telemetry_selected_series:count|openshift:prometheus_tsdb_head_series:sum|openshift:prometheus_tsdb_head_samples_appended_total:sum|monitoring:container_memory_working_set_bytes:sum|namespace_job:scrape_series_added:topk3_sum1h|namespace_job:scrape_samples_post_metric_relabeling:topk3|monitoring:haproxy_server_http_responses_total:sum|rhmi_status|cluster_legacy_scheduler_policy|cluster_master_schedulable|che_workspace_status|che_workspace_started_total|che_workspace_failure_total|che_workspace_start_time_seconds_sum|che_workspace_start_time_seconds_count|cco_credentials_mode|cluster:kube_persistentvolume_plugin_type_counts:sum|visual_web_terminal_sessions_total|acm_managed_cluster_info|cluster:vsphere_vcenter_info:sum|cluster:vsphere_esxi_version_total:sum|cluster:vsphere_node_hw_version_total:sum|openshift:build_by_strategy:sum|rhods_aggregate_availability|rhods_total_users|instance:etcd_disk_wal_fsync_duration_seconds:histogram_quantile|instance:etcd_mvcc_db_total_size_in_bytes:sum|instance:etcd_network_peer_round_trip_time_seconds:histogram_quantile|instance:etcd_mvcc_db_total_size_in_use_in_bytes:sum|instance:etcd_disk_backend_commit_duration_seconds:histogram_quantile|jaeger_operator_instances_storage_types|jaeger_operator_instances_strategies|jaeger_operator_instances_agent_strategies|appsvcs:cores_by_product:sum|nto_custom_profiles:count|openshift_csi_share_configmap|openshift_csi_share_secret|openshift_csi_share_mount_failures_total|openshift_csi_share_mount_requests_total|cluster:velero_backup_total:max|cluster:velero_restore_total:max|eo_es_storage_info|eo_es_redundancy_policy_info|eo_es_defined_delete_namespaces_total|eo_es_misconfigured_memory_resources_info|cluster:eo_es_data_nodes_total:max|cluster:eo_es_documents_created_total:sum|cluster:eo_es_documents_deleted_total:sum|pod:eo_es_shards_total:max|eo_es_cluster_management_state_info|imageregistry:imagestreamtags_count:sum|imageregistry:operations_count:sum|log_logging_info|log_collector_error_count_total|log_forwarder_pipeline_info|log_forwarder_input_info|log_forwarder_output_info|cluster:log_collected_bytes_total:sum|cluster:log_logged_bytes_total:sum|cluster:kata_monitor_running_shim_count:sum|platform:hypershift_hostedclusters:max|platform:hypershift_nodepools:max|namespace:noobaa_unhealthy_bucket_claims:max|namespace:noobaa_buckets_claims:max|namespace:noobaa_unhealthy_namespace_resources:max|namespace:noobaa_namespace_resources:max|namespace:noobaa_unhealthy_namespace_buckets:max|namespace:noobaa_namespace_buckets:max|namespace:noobaa_accounts:max|namespace:noobaa_usage:max|namespace:noobaa_system_health_status:max|ocs_advanced_feature_usage|os_image_url_override:sum",alertstate=~"firing|",quantile=~"0.99|0.99|0.99|",system_type=~"OCS|OCS|",system_vendor=~"Red Hat|Red Hat|"} diff --git a/manifests/0000_50_cluster-monitoring-operator_04-config.yaml b/manifests/0000_50_cluster-monitoring-operator_04-config.yaml index 3851129a70..c9459babd4 100644 --- a/manifests/0000_50_cluster-monitoring-operator_04-config.yaml +++ b/manifests/0000_50_cluster-monitoring-operator_04-config.yaml @@ -839,6 +839,11 @@ data: # ocs_advanced_feature_usage shows whether the cluster is using any of the advanced # features, like external cluster mode or KMS/PV Encryption etc - '{__name__="ocs_advanced_feature_usage"}' + # + # owners: (https://github.com/openshift/machine-config-operator/) + # + # os_image_url_override:sum tells whether cluster is using default OS image or has been overridden by user + - '{__name__="os_image_url_override:sum"}' kind: ConfigMap metadata: name: telemetry-config