Skip to content

Commit

Permalink
Merge pull request #104 from Chia-Network/plot-metrics
Browse files Browse the repository at this point in the history
  • Loading branch information
cmmarslender committed Aug 21, 2023
2 parents 609ae83 + f13896b commit ef3b03a
Show file tree
Hide file tree
Showing 4 changed files with 178 additions and 49 deletions.
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ module github.com/chia-network/chia-exporter
go 1.18

require (
github.com/chia-network/go-chia-libs v0.4.0
github.com/chia-network/go-chia-libs v0.5.0
github.com/chia-network/go-modules v0.0.4
github.com/oschwald/maxminddb-golang v1.12.0
github.com/prometheus/client_golang v1.16.0
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,8 @@ github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6r
github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44=
github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/chia-network/go-chia-libs v0.4.0 h1:KmBIJAtniqaFqyB+IKLPxEEKEtx89kBgP3tC4lgIT80=
github.com/chia-network/go-chia-libs v0.4.0/go.mod h1:cLTizmlrAoyfL+PGLS5G7MT+Q288wtUHCneIDrIp7Mc=
github.com/chia-network/go-chia-libs v0.5.0 h1:j4skq96iOIawGWqXebk87HBYJlUZbBIhh9zr/LLGxKs=
github.com/chia-network/go-chia-libs v0.5.0/go.mod h1:cLTizmlrAoyfL+PGLS5G7MT+Q288wtUHCneIDrIp7Mc=
github.com/chia-network/go-modules v0.0.4 h1:XlCcuT4j1krLvsFT1Y49Un5xORwcTc8jjE4SHih7OTI=
github.com/chia-network/go-modules v0.0.4/go.mod h1:JP8mG/9ieE76VcGIbzD5G3/4YDmvNhRryiQwp8GQr1U=
github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
Expand Down
123 changes: 113 additions & 10 deletions internal/metrics/farmer.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ package metrics

import (
"encoding/json"
"time"
"fmt"

"github.com/chia-network/go-chia-libs/pkg/rpc"
"github.com/chia-network/go-chia-libs/pkg/types"
Expand All @@ -24,17 +24,36 @@ type FarmerServiceMetrics struct {
// Connection Metrics
connectionCount *prometheus.GaugeVec

// Keep a local copy of the plot count, so we can do other actions when the value changes
// Tracked per node ID, since we get checkins from all harvesters here
totalPlotsValue map[types.Bytes32]uint64

// Also have to keep track of node id to hostname mapping, since not all responses have the friendly hostname
nodeIDToHostname map[types.Bytes32]string

// Partial/Pooling Metrics
submittedPartials *prometheus.CounterVec
currentDifficulty *prometheus.GaugeVec
pointsAckSinceStart *prometheus.GaugeVec

// Proof Metrics
proofsFound *wrappedPrometheus.LazyCounter

// Remote Harvester Plot Counts
plotFilesize *prometheus.GaugeVec
plotCount *prometheus.GaugeVec
totalFoundProofs *prometheus.CounterVec
lastFoundProofs *prometheus.GaugeVec
totalEligiblePlots *prometheus.CounterVec
lastEligiblePlots *prometheus.GaugeVec
lastLookupTime *prometheus.GaugeVec
}

// InitMetrics sets all the metrics properties
func (s *FarmerServiceMetrics) InitMetrics() {
s.totalPlotsValue = map[types.Bytes32]uint64{}
s.nodeIDToHostname = map[types.Bytes32]string{}

// Connection Metrics
s.connectionCount = s.metrics.newGaugeVec(chiaServiceFarmer, "connection_count", "Number of active connections for each type of peer", []string{"node_type"})

Expand All @@ -46,24 +65,34 @@ func (s *FarmerServiceMetrics) InitMetrics() {

// Proof Metrics
s.proofsFound = s.metrics.newCounter(chiaServiceFarmer, "proofs_found", "Number of proofs found since the exporter has been running")

// Remote harvester plot counts
plotLabels := []string{"host", "node_id", "size", "type", "compression"}
s.plotFilesize = s.metrics.newGaugeVec(chiaServiceFarmer, "plot_filesize", "Filesize of plots separated by harvester", plotLabels)
s.plotCount = s.metrics.newGaugeVec(chiaServiceFarmer, "plot_count", "Number of plots separated by harvester", plotLabels)
s.totalFoundProofs = s.metrics.newCounterVec(chiaServiceFarmer, "total_found_proofs", "Counter of total found proofs since the exporter started", []string{"host", "node_id"})
s.lastFoundProofs = s.metrics.newGaugeVec(chiaServiceFarmer, "last_found_proofs", "Number of proofs found for the last farmer_info event", []string{"host", "node_id"})
s.totalEligiblePlots = s.metrics.newCounterVec(chiaServiceFarmer, "total_eligible_plots", "Counter of total eligible plots since the exporter started", []string{"host", "node_id"})
s.lastEligiblePlots = s.metrics.newGaugeVec(chiaServiceFarmer, "last_eligible_plots", "Number of eligible plots for the last farmer_info event", []string{"host", "node_id"})
s.lastLookupTime = s.metrics.newGaugeVec(chiaServiceFarmer, "last_lookup_time", "Lookup time for the last farmer_info event", []string{"host", "node_id"})
}

// InitialData is called on startup of the metrics server, to allow seeding metrics with current/initial data
func (s *FarmerServiceMetrics) InitialData() {}
func (s *FarmerServiceMetrics) InitialData() {
utils.LogErr(s.metrics.client.FarmerService.GetConnections(&rpc.GetConnectionsOptions{}))
}

// SetupPollingMetrics starts any metrics that happen on an interval
func (s *FarmerServiceMetrics) SetupPollingMetrics() {
go func() {
for {
utils.LogErr(s.metrics.client.FarmerService.GetConnections(&rpc.GetConnectionsOptions{}))
time.Sleep(15 * time.Second)
}
}()
}
func (s *FarmerServiceMetrics) SetupPollingMetrics() {}

// Disconnected clears/unregisters metrics when the connection drops
func (s *FarmerServiceMetrics) Disconnected() {
s.connectionCount.Reset()
s.plotFilesize.Reset()
s.plotCount.Reset()
s.lastFoundProofs.Reset()
s.lastEligiblePlots.Reset()
s.lastLookupTime.Reset()
}

// Reconnected is called when the service is reconnected after the websocket was disconnected
Expand All @@ -76,16 +105,90 @@ func (s *FarmerServiceMetrics) ReceiveResponse(resp *types.WebsocketResponse) {
switch resp.Command {
case "get_connections":
s.GetConnections(resp)
case "new_farming_info":
s.NewFarmingInfo(resp)
case "submitted_partial":
s.SubmittedPartial(resp)
case "proof":
s.Proof(resp)
case "harvester_removed":
fallthrough
case "add_connection":
fallthrough
case "close_connection":
utils.LogErr(s.metrics.client.FarmerService.GetConnections(&rpc.GetConnectionsOptions{}))
}
}

// GetConnections handler for get_connections events
func (s *FarmerServiceMetrics) GetConnections(resp *types.WebsocketResponse) {
connectionCountHelper(resp, s.connectionCount)
harvesters, _, err := s.metrics.httpClient.FarmerService.GetHarvesters(&rpc.FarmerGetHarvestersOptions{})
if err != nil {
log.Errorf("farmer: Error getting harvesters: %s\n", err.Error())
return
}

// Must be reset prior to setting new values in case all of a particular type, k-size, or c level are gone
s.plotFilesize.Reset()
s.plotCount.Reset()

for _, harvester := range harvesters.Harvesters {
// keep track of the node ID to host mapping
s.nodeIDToHostname[harvester.Connection.NodeID] = harvester.Connection.Host

_totalPlotCount := uint64(0)
plotSize, plotCount := PlotSizeCountHelper(harvester.Plots)

// Now we can set the gauges with the calculated total values
// Labels: "host", "size", "type", "compression"
for kSize, cLevels := range plotSize {
for cLevel, fileSizes := range cLevels {
s.plotFilesize.WithLabelValues(harvester.Connection.Host, harvester.Connection.NodeID.String(), fmt.Sprintf("%d", kSize), "og", fmt.Sprintf("%d", cLevel)).Set(float64(fileSizes[PlotTypeOg]))
s.plotFilesize.WithLabelValues(harvester.Connection.Host, harvester.Connection.NodeID.String(), fmt.Sprintf("%d", kSize), "pool", fmt.Sprintf("%d", cLevel)).Set(float64(fileSizes[PlotTypePool]))
}
}

for kSize, cLevelsByType := range plotCount {
for cLevel, plotCountByType := range cLevelsByType {
_totalPlotCount += plotCountByType[PlotTypeOg]
_totalPlotCount += plotCountByType[PlotTypePool]

s.plotCount.WithLabelValues(harvester.Connection.Host, harvester.Connection.NodeID.String(), fmt.Sprintf("%d", kSize), "og", fmt.Sprintf("%d", cLevel)).Set(float64(plotCountByType[PlotTypeOg]))
s.plotCount.WithLabelValues(harvester.Connection.Host, harvester.Connection.NodeID.String(), fmt.Sprintf("%d", kSize), "pool", fmt.Sprintf("%d", cLevel)).Set(float64(plotCountByType[PlotTypePool]))
}
}

s.totalPlotsValue[harvester.Connection.NodeID] = _totalPlotCount
}
}

// NewFarmingInfo handles new_farming_info events
func (s *FarmerServiceMetrics) NewFarmingInfo(resp *types.WebsocketResponse) {
info := &types.EventFarmerNewFarmingInfo{}
err := json.Unmarshal(resp.Data, info)
if err != nil {
log.Errorf("Error unmarshalling: %s\n", err.Error())
return
}

nodeID := info.FarmingInfo.NodeID
hostname, foundHostname := s.nodeIDToHostname[nodeID]
if !foundHostname || s.totalPlotsValue[nodeID] != uint64(info.FarmingInfo.TotalPlots) {
log.Debugf("Missing node ID to host mapping or plot count doesn't match. Refreshing harvester info. New Plot Count: %d | Previous Plot Count: %d\n", info.FarmingInfo.TotalPlots, s.totalPlotsValue[nodeID])
// When plot counts change, we have to refresh information about the plots
utils.LogErr(s.metrics.client.FarmerService.GetConnections(&rpc.GetConnectionsOptions{}))
}

if foundHostname {
// Labels: "host", "node_id"
s.totalFoundProofs.WithLabelValues(hostname, nodeID.String()).Add(float64(info.FarmingInfo.Proofs))
s.lastFoundProofs.WithLabelValues(hostname, nodeID.String()).Set(float64(info.FarmingInfo.Proofs))
s.totalEligiblePlots.WithLabelValues(hostname, nodeID.String()).Add(float64(info.FarmingInfo.PassedFilter))
s.lastEligiblePlots.WithLabelValues(hostname, nodeID.String()).Set(float64(info.FarmingInfo.PassedFilter))
s.lastLookupTime.WithLabelValues(hostname, nodeID.String()).Set(float64(info.FarmingInfo.LookupTime))
}

}

// SubmittedPartial handles a received submitted_partial event
Expand Down
98 changes: 62 additions & 36 deletions internal/metrics/harvester.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"fmt"
"time"

"github.com/chia-network/go-chia-libs/pkg/protocols"
"github.com/chia-network/go-chia-libs/pkg/rpc"
"github.com/chia-network/go-chia-libs/pkg/types"
"github.com/prometheus/client_golang/prometheus"
Expand Down Expand Up @@ -44,9 +45,10 @@ func (s *HarvesterServiceMetrics) InitMetrics() {
// Connection Metrics
s.connectionCount = s.metrics.newGaugeVec(chiaServiceHarvester, "connection_count", "Number of active connections for each type of peer", []string{"node_type"})

plotLabels := []string{"size", "type", "compression"}
s.totalPlots = s.metrics.newGauge(chiaServiceHarvester, "total_plots", "Total number of plots on this harvester")
s.plotFilesize = s.metrics.newGaugeVec(chiaServiceHarvester, "plot_filesize", "Total filesize of plots on this harvester, by K size", []string{"size", "type"})
s.plotCount = s.metrics.newGaugeVec(chiaServiceHarvester, "plot_count", "Total count of plots on this harvester, by K size", []string{"size", "type"})
s.plotFilesize = s.metrics.newGaugeVec(chiaServiceHarvester, "plot_filesize", "Total filesize of plots on this harvester, by K size", plotLabels)
s.plotCount = s.metrics.newGaugeVec(chiaServiceHarvester, "plot_count", "Total count of plots on this harvester, by K size", plotLabels)

s.totalFoundProofs = s.metrics.newCounter(chiaServiceHarvester, "total_found_proofs", "Counter of total found proofs since the exporter started")
s.lastFoundProofs = s.metrics.newGauge(chiaServiceHarvester, "last_found_proofs", "Number of proofs found for the last farmer_info event")
Expand Down Expand Up @@ -155,55 +157,79 @@ func (s *HarvesterServiceMetrics) GetPlots(resp *types.WebsocketResponse) {
s.ProcessGetPlots(plots)
}

// PlotType is the type of plot (og or pool)
type PlotType uint8

const (
// PlotTypeOg is the original plot format, no plotNFT
PlotTypeOg = PlotType(0)
// PlotTypePool is the new plotNFT plot format
PlotTypePool = PlotType(1)
)

// ProcessGetPlots processes the `GetPlotsResponse` from get_plots so that we can use this with websockets or HTTP RPC requests
func (s *HarvesterServiceMetrics) ProcessGetPlots(plots *rpc.HarvesterGetPlotsResponse) {
// First, iterate through all the plots to get totals for each ksize
type plotType uint8
plotTypeOg := plotType(0)
plotTypePool := plotType(1)
plotSize, plotCount := PlotSizeCountHelper(plots.Plots.OrEmpty())

plotSize := map[uint8]map[plotType]uint64{}
plotCount := map[uint8]map[plotType]uint64{}
// Now we can set the gauges with the calculated total values
// Labels: "size", "type", "compression"
for kSize, cLevels := range plotSize {
for cLevel, fileSizes := range cLevels {
s.plotFilesize.WithLabelValues(fmt.Sprintf("%d", kSize), "og", fmt.Sprintf("%d", cLevel)).Set(float64(fileSizes[PlotTypeOg]))
s.plotFilesize.WithLabelValues(fmt.Sprintf("%d", kSize), "pool", fmt.Sprintf("%d", cLevel)).Set(float64(fileSizes[PlotTypePool]))
}
}

for _, plot := range plots.Plots.OrEmpty() {
for kSize, cLevelsByType := range plotCount {
for cLevel, plotCountByType := range cLevelsByType {
s.plotCount.WithLabelValues(fmt.Sprintf("%d", kSize), "og", fmt.Sprintf("%d", cLevel)).Set(float64(plotCountByType[PlotTypeOg]))
s.plotCount.WithLabelValues(fmt.Sprintf("%d", kSize), "pool", fmt.Sprintf("%d", cLevel)).Set(float64(plotCountByType[PlotTypePool]))
}
}

totalPlotCount := len(plots.Plots.OrEmpty())
s.totalPlots.Set(float64(totalPlotCount))

s.totalPlotsValue = uint64(totalPlotCount)
}

// PlotSizeCountHelper returns information about plot sizes and counts for the given set of plots
// Return is (plotSize, plotCount)
func PlotSizeCountHelper(plots []protocols.Plot) (map[uint8]map[uint8]map[PlotType]uint64, map[uint8]map[uint8]map[PlotType]uint64) {
// First, iterate through all the plots to get totals for each ksize
// map[ksize]map[clevel]map[PlotType]uint64
plotSize := map[uint8]map[uint8]map[PlotType]uint64{}
plotCount := map[uint8]map[uint8]map[PlotType]uint64{}

for _, plot := range plots {
cLevel := plot.CompressionLevel.OrElse(uint8(0))
kSize := plot.Size

if _, ok := plotSize[kSize]; !ok {
plotSize[kSize] = map[plotType]uint64{
plotTypeOg: 0,
plotTypePool: 0,
}
// It's safe to assume that if plotSize isn't set, plotCount isn't either, since they are created together
plotSize[kSize] = map[uint8]map[PlotType]uint64{}
plotCount[kSize] = map[uint8]map[PlotType]uint64{}
}

if _, ok := plotCount[kSize]; !ok {
plotCount[kSize] = map[plotType]uint64{
plotTypeOg: 0,
plotTypePool: 0,
if _, ok := plotSize[kSize][cLevel]; !ok {
plotSize[kSize][cLevel] = map[PlotType]uint64{
PlotTypeOg: 0,
PlotTypePool: 0,
}
plotCount[kSize][cLevel] = map[PlotType]uint64{
PlotTypeOg: 0,
PlotTypePool: 0,
}
}

if plot.PoolContractPuzzleHash.IsPresent() {
plotSize[kSize][plotTypePool] += plot.FileSize
plotCount[kSize][plotTypePool]++
plotSize[kSize][cLevel][PlotTypePool] += plot.FileSize
plotCount[kSize][cLevel][PlotTypePool]++
} else {
plotSize[kSize][plotTypeOg] += plot.FileSize
plotCount[kSize][plotTypeOg]++
plotSize[kSize][cLevel][PlotTypeOg] += plot.FileSize
plotCount[kSize][cLevel][PlotTypeOg]++
}
}

// Now we can set the gauges with the calculated total values
for kSize, fileSizes := range plotSize {
s.plotFilesize.WithLabelValues(fmt.Sprintf("%d", kSize), "og").Set(float64(fileSizes[plotTypeOg]))
s.plotFilesize.WithLabelValues(fmt.Sprintf("%d", kSize), "pool").Set(float64(fileSizes[plotTypePool]))
}

for kSize, plotCountByType := range plotCount {
s.plotCount.WithLabelValues(fmt.Sprintf("%d", kSize), "og").Set(float64(plotCountByType[plotTypeOg]))
s.plotCount.WithLabelValues(fmt.Sprintf("%d", kSize), "pool").Set(float64(plotCountByType[plotTypePool]))
}

totalPlotCount := len(plots.Plots.OrEmpty())
s.totalPlots.Set(float64(totalPlotCount))

s.totalPlotsValue = uint64(totalPlotCount)
return plotSize, plotCount
}

0 comments on commit ef3b03a

Please sign in to comment.