diff --git a/client/client_4.x.go b/client/client_4.x.go index 0a85880..63a7fb5 100644 --- a/client/client_4.x.go +++ b/client/client_4.x.go @@ -153,22 +153,22 @@ func (n *cluster4x) getRuleEngineMetrics() (metrics []collector.RuleEngine, err resp := struct { Data []struct { Metrics []struct { - Node string + Node string `json:"node"` SpeedMax float64 `json:"speed_max"` SpeedLast5m float64 `json:"speed_last5m"` Speed float64 `json:"speed"` - Matched int64 - Passed int64 - //NoResult int64 - //Exception int64 - Failed int64 + Matched int64 `json:"matched"` + Passed int64 `json:"passed"` + NoResult int64 `json:"no_result"` + Exception int64 `json:"exception"` + Failed int64 `json:"failed"` } Actions []struct { Metrics []struct { - Node string - Taken int64 - Success int64 - Failed int64 + Node string `json:"node"` + Taken int64 `json:"taken"` + Success int64 `json:"success"` + Failed int64 `json:"failed"` } } ID string `json:"id"` @@ -208,10 +208,11 @@ func (n *cluster4x) getRuleEngineMetrics() (metrics []collector.RuleEngine, err NodeName: cutNodeName(m.Node), RuleID: rule.ID, //ResStatus: unknown, - TopicHitCount: m.Matched, - ExecPassCount: m.Passed, - ExecFailureCount: m.Failed, - //NoResultCount: m.NoResult, + TopicHitCount: m.Matched, + ExecPassCount: m.Passed, + ExecFailureCount: m.Failed, + NoResultCount: m.NoResult, + ExecExceptionCount: m.Exception, ExecRate: m.Speed, ExecLast5mRate: m.SpeedLast5m, ExecMaxRate: m.SpeedMax, diff --git a/client/client_5.x.go b/client/client_5.x.go index 57cca97..5a216b4 100644 --- a/client/client_5.x.go +++ b/client/client_5.x.go @@ -146,7 +146,7 @@ func (n *cluster5x) getRuleEngineMetrics() (metrics []collector.RuleEngine, err Matched int64 Passed int64 Failed int64 - //Exception int64 `json:"failed.exception"` + Exception int64 `json:"failed.exception"` NoResult int64 `json:"failed.no_result"` ActionTotal int64 `json:"actions.total"` ActionSuccess int64 `json:"actions.success"` @@ -166,6 +166,7 @@ func (n *cluster5x) getRuleEngineMetrics() (metrics []collector.RuleEngine, err TopicHitCount: node.Metrics.Matched, ExecPassCount: node.Metrics.Passed, ExecFailureCount: node.Metrics.Failed, + ExecExceptionCount: node.Metrics.Exception, NoResultCount: node.Metrics.NoResult, ExecRate: node.Metrics.Rate, ExecLast5mRate: node.Metrics.RateLast5m, diff --git a/collector/client.go b/collector/client.go index bdd120a..16523f0 100644 --- a/collector/client.go +++ b/collector/client.go @@ -33,6 +33,7 @@ type RuleEngine struct { TopicHitCount int64 ExecPassCount int64 ExecFailureCount int64 + ExecExceptionCount int64 NoResultCount int64 ExecRate float64 ExecLast5mRate float64 diff --git a/collector/rule_engine.go b/collector/rule_engine.go index cce65bb..8348fcf 100644 --- a/collector/rule_engine.go +++ b/collector/rule_engine.go @@ -34,6 +34,7 @@ const ( ruleExecPassCount = "exec_pass_count" ruleExecFailureCount = "exec_failure_count" ruleNoResultCount = "exec_no_result_count" + ruleExecExceptionCount = "exec_exception_count" ruleExecRate = "exec_rate" ruleExecLast5mRate = "exec_last5m_rate" ruleExecMaxRate = "exec_max_rate" @@ -111,6 +112,11 @@ func NewRuleEngineCollector(client Cluster, logger log.Logger) (Collector, error help: "The failure count of rule exec", labels: []string{"node", "rule"}, }, + { + name: ruleExecExceptionCount, + help: "The exception count of rule exec", + labels: []string{"node", "rule"}, + }, { name: ruleNoResultCount, help: "The no result count of rule exec", @@ -199,6 +205,10 @@ func (c *ruleEngineCollector) Update(ch chan<- prometheus.Metric) error { c.desc[ruleExecFailureCount], prometheus.CounterValue, float64(metric.ExecFailureCount), metric.NodeName, metric.RuleID, ) + ch <- prometheus.MustNewConstMetric( + c.desc[ruleExecExceptionCount], + prometheus.CounterValue, float64(metric.ExecExceptionCount), metric.NodeName, metric.RuleID, + ) ch <- prometheus.MustNewConstMetric( c.desc[ruleNoResultCount], prometheus.CounterValue, float64(metric.NoResultCount), metric.NodeName, metric.RuleID, diff --git a/config/grafana-template/grafanalib-emqx/metrics.py b/config/grafana-template/grafanalib-emqx/metrics.py index 2db96a5..a9b6d61 100644 --- a/config/grafana-template/grafanalib-emqx/metrics.py +++ b/config/grafana-template/grafanalib-emqx/metrics.py @@ -601,8 +601,8 @@ "expr": "sum by(rule) (emqx_rule_exec_pass_count{cluster=\"$cluster\", node=~\".*\"})", }, { - "legendFormat": "Exec Failed last 15m", - "expr": "sum by(rule) (irate(emqx_rule_exec_failure_count{cluster=\"$cluster\", node=~\".*\"}[15m]))", + "legendFormat": "Exec Exception last 15m", + "expr": "sum by(rule) (irate(emqx_rule_exec_exception_count{cluster=\"$cluster\", node=~\".*\"}[15m]))", "thresholds": { "mode": "absolute", "steps": [ @@ -618,8 +618,8 @@ } }, { - "legendFormat": "Exec No Result", - "expr": "sum by(rule) (emqx_rule_exec_no_result_count{cluster=\"$cluster\", node=~\".*\"})", + "legendFormat": "Exec No Result 15m", + "expr": "sum by(rule) (irate(emqx_rule_exec_no_result_count{cluster=\"$cluster\", node=~\".*\"}[15m]))", "thresholds": { "mode": "absolute", "steps": [