diff --git a/docs/en/changes/changes.md b/docs/en/changes/changes.md index 0e5e806f816a..33626b5389c3 100644 --- a/docs/en/changes/changes.md +++ b/docs/en/changes/changes.md @@ -66,6 +66,7 @@ * Fix `findEndpoint` query require `keyword` when using BanyanDB. * Support to analysis the ztunnel mapped IP address in eBPF Access Log Receiver. * Adapt BanyanDB Java Client 0.7.0-rc3. +* Add SkyWalking Java Agent self observability dashboard. #### UI diff --git a/docs/en/setup/backend/dashboards-so11y-java-agent.md b/docs/en/setup/backend/dashboards-so11y-java-agent.md new file mode 100644 index 000000000000..1fe9c3888759 --- /dev/null +++ b/docs/en/setup/backend/dashboards-so11y-java-agent.md @@ -0,0 +1,32 @@ +# Java Agent self observability dashboard + +SkyWalking java agent reports itself metrics by Meter APIS in order to measure tracing performance. +it also provides a dashboard to visualize the agent metrics. + +## Data flow +1. SkyWalking java agent reports metrics data internally and automatically. +2. SkyWalking OAP accept these meters through native protocols. +3. The SkyWalking OAP Server parses the expression with [MAL](../../concepts-and-designs/mal.md) to filter/calculate/aggregate and store the results. + +## Set up +Java Agent so11y is a build-in feature, it reports meters automatically after boot. + +## Self observability monitoring +Self observability monitoring provides monitoring of the runtime performance of the java agent itself. `agent.service_name` is a `Service` in Agent so11y, and land on the `Layer: SO11Y_JAVA_AGENT`. + +### Self observability metrics + +| Unit | Metric Name | Description | Data Source | +|-------------------|----------------------------------------------------------------|---------------------------------------------|-----------------------| +| Count Per Minute | meter_java_agent_created_tracing_context_count | Created Tracing Context Count (Per Minute) | SkyWalking Java Agent | +| Count Per Minute | meter_java_agent_finished_tracing_context_count | Finished Tracing Context Count (Per Minute) | SkyWalking Java Agent | +| Count Per Minute | meter_java_agent_created_ignored_context_count | Created Ignored Context Count (Per Minute) | SkyWalking Java Agent | +| Count Per Minute | meter_java_agent_finished_ignored_context_count | Finished Ignored Context Count (Per Minute) | SkyWalking Java Agent | +| Count Per Minute | meter_java_agent_possible_leaked_context_count | Possible Leak Context Count (Per Minute) | SkyWalking Java Agent | +| Count Per Minute | meter_java_agent_interceptor_error_count | Interceptor Error Count (Per Minute) | SkyWalking Java Agent | +| ms | meter_java_agent_tracing_context_execution_time_percentile | Tracing Context Execution Time (ms) | SkyWalking Java Agent | + +## Customizations +You can customize your own metrics/expression/dashboard panel. +The metrics definition and expression rules are found in `/meter-analyzer-config/java-agent.yaml` +The self observability dashboard panel configurations are found in `/config/ui-initialized-templates/so11y_java_agent`. diff --git a/docs/menu.yml b/docs/menu.yml index 50e0a2927790..88be59dac004 100644 --- a/docs/menu.yml +++ b/docs/menu.yml @@ -146,6 +146,8 @@ catalog: path: "/en/setup/backend/dashboards-so11y" - name: "Satellite self telemetry" path: "/en/setup/backend/dashboards-so11y-satellite" + - name: "SkyWalking Java Agent self telemetry" + path: "/en/setup/backend/dashboards-so11y-java-agent" - name: "Configuration Vocabulary" path: "/en/setup/backend/configuration-vocabulary" - name: "Advanced Setup" diff --git a/oap-server/server-core/src/main/java/org/apache/skywalking/oap/server/core/analysis/Layer.java b/oap-server/server-core/src/main/java/org/apache/skywalking/oap/server/core/analysis/Layer.java index 0f9b6b8f80ec..0eb61a9bc64a 100644 --- a/oap-server/server-core/src/main/java/org/apache/skywalking/oap/server/core/analysis/Layer.java +++ b/oap-server/server-core/src/main/java/org/apache/skywalking/oap/server/core/analysis/Layer.java @@ -234,7 +234,13 @@ public enum Layer { * Cilium is open source software for providing and transparently securing network connectivity and load balancing * between application workloads such as application containers or processes. */ - CILIUM_SERVICE(38, true); + CILIUM_SERVICE(38, true), + + /** + * The self observability of SkyWalking Java Agent, + * which provides the abilities to measure the tracing performance and error statistics of plugins. + */ + SO11Y_JAVA_AGENT(39, true); private final int value; /** diff --git a/oap-server/server-core/src/main/java/org/apache/skywalking/oap/server/core/management/ui/template/UITemplateInitializer.java b/oap-server/server-core/src/main/java/org/apache/skywalking/oap/server/core/management/ui/template/UITemplateInitializer.java index 7c11cee66a6b..58dcbd8328f4 100644 --- a/oap-server/server-core/src/main/java/org/apache/skywalking/oap/server/core/management/ui/template/UITemplateInitializer.java +++ b/oap-server/server-core/src/main/java/org/apache/skywalking/oap/server/core/management/ui/template/UITemplateInitializer.java @@ -76,6 +76,7 @@ public class UITemplateInitializer { Layer.CLICKHOUSE.name(), Layer.ACTIVEMQ.name(), Layer.CILIUM_SERVICE.name(), + Layer.SO11Y_JAVA_AGENT.name(), "custom" }; private final UITemplateManagementService uiTemplateManagementService; diff --git a/oap-server/server-starter/src/main/resources/application.yml b/oap-server/server-starter/src/main/resources/application.yml index 44c825a3bd42..33c1607038ed 100644 --- a/oap-server/server-starter/src/main/resources/application.yml +++ b/oap-server/server-starter/src/main/resources/application.yml @@ -269,7 +269,7 @@ agent-analyzer: # Nginx and Envoy agents can't get the real remote address. # Exit spans with the component in the list would not generate the client-side instance relation metrics. noUpstreamRealAddressAgents: ${SW_NO_UPSTREAM_REAL_ADDRESS:6000,9000} - meterAnalyzerActiveFiles: ${SW_METER_ANALYZER_ACTIVE_FILES:datasource,threadpool,satellite,go-runtime,python-runtime,continuous-profiling} # Which files could be meter analyzed, files split by "," + meterAnalyzerActiveFiles: ${SW_METER_ANALYZER_ACTIVE_FILES:datasource,threadpool,satellite,go-runtime,python-runtime,continuous-profiling,java-agent} # Which files could be meter analyzed, files split by "," slowCacheReadThreshold: ${SW_SLOW_CACHE_SLOW_READ_THRESHOLD:default:20,redis:10} # The slow cache read operation thresholds. Unit ms. slowCacheWriteThreshold: ${SW_SLOW_CACHE_SLOW_WRITE_THRESHOLD:default:20,redis:10} # The slow cache write operation thresholds. Unit ms. diff --git a/oap-server/server-starter/src/main/resources/meter-analyzer-config/java-agent.yaml b/oap-server/server-starter/src/main/resources/meter-analyzer-config/java-agent.yaml new file mode 100644 index 000000000000..4086f66e3e72 --- /dev/null +++ b/oap-server/server-starter/src/main/resources/meter-analyzer-config/java-agent.yaml @@ -0,0 +1,32 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +expSuffix: instance(['service'], ['instance'], Layer.SO11Y_JAVA_AGENT) +metricPrefix: meter_java_agent +metricsRules: + - name: created_tracing_context_count + exp: created_tracing_context_counter.sum(['created_by', 'service', 'instance']).increase('PT1M') + - name: finished_tracing_context_count + exp: finished_tracing_context_counter.sum(['service', 'instance']).increase('PT1M') + - name: created_ignored_context_count + exp: created_ignored_context_counter.sum(['created_by', 'service', 'instance']).increase('PT1M') + - name: finished_ignored_context_count + exp: finished_ignored_context_counter.sum(['service', 'instance']).increase('PT1M') + - name: possible_leaked_context_count + exp: possible_leaked_context_counter.sum(['source', 'service', 'instance']).increase('PT1M') + - name: interceptor_error_count + exp: interceptor_error_counter.sum(['plugin_name', 'inter_type', 'service', 'instance']).increase('PT1M') + - name: tracing_context_execution_time_percentile + exp: tracing_context_performance.sum(['le', 'service', 'instance']).histogram().histogram_percentile([50,70,90,99]) diff --git a/oap-server/server-starter/src/main/resources/ui-initialized-templates/menu.yaml b/oap-server/server-starter/src/main/resources/ui-initialized-templates/menu.yaml index 6d0696d28387..7dcf95350b99 100644 --- a/oap-server/server-starter/src/main/resources/ui-initialized-templates/menu.yaml +++ b/oap-server/server-starter/src/main/resources/ui-initialized-templates/menu.yaml @@ -247,3 +247,8 @@ menus: description: "Satellite: an open-source agent designed for the cloud-native infrastructures, which provides a low-cost, high-efficient, and more secure way to collect telemetry data. It is the recommended load balancer for telemetry collecting." documentLink: https://skywalking.apache.org/docs/main/next/en/setup/backend/backend-load-balancer/ i18nKey: self_observability_satellite + - title: SkyWalking Java Agent + layer: SO11Y_JAVA_AGENT + description: The Java Agent for Apache SkyWalking, which provides the native tracing/metrics/logging/event/profiling abilities for Java projects. + documentLink: https://skywalking.apache.org/docs/main/next/en/setup/backend/dashboards-so11y-java-agent/ + i18nKey: self_observability_java_agent diff --git a/oap-server/server-starter/src/main/resources/ui-initialized-templates/so11y_java_agent/so11y-instance.json b/oap-server/server-starter/src/main/resources/ui-initialized-templates/so11y_java_agent/so11y-instance.json new file mode 100644 index 000000000000..6825961f56ce --- /dev/null +++ b/oap-server/server-starter/src/main/resources/ui-initialized-templates/so11y_java_agent/so11y-instance.json @@ -0,0 +1,195 @@ +[ + { + "id": "Self-Observability-Java-Agent-Instance", + "configuration": { + "children": [ + { + "x": 0, + "y": 0, + "w": 6, + "h": 13, + "i": "14", + "type": "Widget", + "widget": { + "title": "Tracing Context Creation (Per Minute)" + }, + "graph": { + "type": "Line", + "step": false, + "smooth": false, + "showSymbol": true, + "showXAxis": true, + "showYAxis": true + }, + "expressions": [ + "meter_java_agent_created_tracing_context_count" + ] + }, + { + "x": 6, + "y": 0, + "w": 6, + "h": 13, + "i": "6", + "type": "Widget", + "widget": { + "title": "Tracing Context Creation and Completion (Per Minute)" + }, + "graph": { + "type": "Line", + "step": false, + "smooth": false, + "showSymbol": true, + "showXAxis": true, + "showYAxis": true + }, + "metricConfig": [ + { + "label": "Creation" + }, + { + "label": "Completion" + } + ], + "expressions": [ + "aggregate_labels(meter_java_agent_created_tracing_context_count,sum)", + "meter_java_agent_finished_tracing_context_count" + ] + }, + { + "x": 12, + "y": 0, + "w": 6, + "h": 13, + "i": "1", + "type": "Widget", + "widget": { + "title": "Ignored Context Creation (Per Minute)" + }, + "graph": { + "type": "Line", + "step": false, + "smooth": false, + "showSymbol": true, + "showXAxis": true, + "showYAxis": true + }, + "expressions": [ + "meter_java_agent_created_ignored_context_count" + ] + }, + { + "x": 18, + "y": 0, + "w": 6, + "h": 13, + "i": "2", + "type": "Widget", + "widget": { + "title": "Ignored Context Creation and Completion (Per Minute)" + }, + "graph": { + "type": "Line", + "step": false, + "smooth": false, + "showSymbol": true, + "showXAxis": true, + "showYAxis": true + }, + "expressions": [ + "aggregate_labels(meter_java_agent_created_ignored_context_count,sum)", + "meter_java_agent_finished_ignored_context_count" + ], + "metricConfig": [ + { + "label": "Creation" + }, + { + "label": "Completion" + } + ] + }, + { + "x": 0, + "y": 13, + "w": 6, + "h": 13, + "i": "11", + "type": "Widget", + "widget": { + "title": "Possible Leaked Context (Per Minute)" + }, + "graph": { + "type": "Line", + "step": false, + "smooth": false, + "showSymbol": true, + "showXAxis": true, + "showYAxis": true + }, + "expressions": [ + "meter_java_agent_possible_leaked_context_count" + ], + "metricConfig": [ + { + "label": "count" + } + ] + }, + { + "x": 12, + "y": 13, + "w": 12, + "h": 13, + "i": "8", + "type": "Widget", + "widget": { + "title": "Interceptor Error Count (Per Minute)" + }, + "graph": { + "type": "Line", + "step": false, + "smooth": false, + "showSymbol": true, + "showXAxis": true, + "showYAxis": true + }, + "expressions": [ + "meter_java_agent_interceptor_error_count" + ], + "metricConfig": [ + { + "label": "count" + } + ] + }, + { + "x": 6, + "y": 13, + "w": 6, + "h": 13, + "i": "15", + "type": "Widget", + "graph": { + "type": "Line", + "step": false, + "smooth": false, + "showSymbol": true, + "showXAxis": true, + "showYAxis": true + }, + "widget": { + "title": "Tracing Context Execution time (ms)" + }, + "expressions": [ + "relabels(meter_java_agent_tracing_context_execution_time_percentile,p='50,75,90,95,99',p='50,75,90,95,99')" + ] + } + ], + "layer": "SO11Y_JAVA_AGENT", + "entity": "ServiceInstance", + "name": "Self-Observability-Java-Agent-Instance", + "isRoot": false + } + } +] \ No newline at end of file diff --git a/oap-server/server-starter/src/main/resources/ui-initialized-templates/so11y_java_agent/so11y-service.json b/oap-server/server-starter/src/main/resources/ui-initialized-templates/so11y_java_agent/so11y-service.json new file mode 100644 index 000000000000..9967dcef929c --- /dev/null +++ b/oap-server/server-starter/src/main/resources/ui-initialized-templates/so11y_java_agent/so11y-service.json @@ -0,0 +1,62 @@ +[ + { + "id": "Self-Observability-Java-Agent-Service", + "configuration": { + "children": [ + { + "x": 0, + "y": 2, + "w": 24, + "h": 38, + "i": "0", + "type": "Widget", + "graph": { + "type": "InstanceList", + "dashboardName": "Self-Observability-Java-Agent-Instance", + "fontSize": 12 + }, + "metricConfig": [ + { + "label": "Context Creation", + "detailLabel": "context_creation", + "unit": "Per Minute" + }, + { + "label": "Context Completion", + "unit": "Per Minute", + "detailLabel": "context_completion" + } + ], + "expressions": [ + "avg(aggregate_labels(meter_java_agent_created_tracing_context_count,sum)+aggregate_labels(meter_java_agent_created_ignored_context_count,sum))", + "avg(meter_java_agent_finished_tracing_context_count+meter_java_agent_finished_ignored_context_count)" + ], + "subExpressions": [ + "aggregate_labels(meter_java_agent_created_tracing_context_count,sum)+aggregate_labels(meter_java_agent_created_ignored_context_count,sum)", + "meter_java_agent_finished_tracing_context_count+meter_java_agent_finished_ignored_context_count" + ] + }, + { + "x": 0, + "y": 0, + "w": 24, + "h": 2, + "i": "100", + "type": "Text", + "graph": { + "fontColor": "theme", + "backgroundColor": "theme", + "content": "The self observability of SkyWalking Java Agent, which provides the abilities to measure the tracing performance and error statistics of plugins.", + "fontSize": 14, + "textAlign": "left", + "url": "https://skywalking.apache.org/docs/main/next/en/setup/backend/dashboards-so11y-java-agent/" + } + } + ], + "layer": "SO11Y_JAVA_AGENT", + "entity": "Service", + "name": "Self-Observability-Java-Agent-Service", + "isRoot": true + } + } +] \ No newline at end of file