Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[HCP Telemetry] Periodic Refresh for Dynamic Telemetry Configuration #18168

Merged
merged 42 commits into from
Aug 1, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
00cee32
OTElExporter now uses an EndpointProvider to discover the endpoint
Achooo Jul 18, 2023
77f09b8
OTELSink uses a ConfigProvider to obtain filters and labels configura…
Achooo Jul 18, 2023
ff4d85f
improve tests for otel_sink
Achooo Jul 18, 2023
0264d5b
Regex logic is moved into client for a method on the TelemetryConfig …
Achooo Jul 18, 2023
f12219a
Create a telemetry_config_provider and update deps to use it
Achooo Jul 18, 2023
8b4bf1d
Fix conversion
Achooo Jul 18, 2023
5b4c997
fix import newline
Achooo Jul 18, 2023
7550818
Add logger to hcp client and move telemetry_config out of the client.…
Achooo Jul 25, 2023
3f1c53d
Add a telemetry_config.go to refactor client.go
Achooo Jul 25, 2023
8dce916
Update deps
Achooo Jul 25, 2023
2313673
update hcp deps test
Achooo Jul 25, 2023
196a4ef
Modify telemetry_config_providers
Achooo Jul 25, 2023
3b031a2
Check for nil filters
Achooo Jul 25, 2023
cb869b1
PR review updates
Achooo Jul 26, 2023
38d6bca
Fix comments and move around pieces
Achooo Jul 26, 2023
1feea25
Fix comments
Achooo Jul 26, 2023
0d25ba4
Remove context from client struct
Achooo Jul 26, 2023
a58baa7
Moved ctx out of sink struct and fixed filters, added a test
Achooo Jul 26, 2023
f3be3ec
Remove named imports, use errors.New if not fformatting
Achooo Jul 26, 2023
a353527
Remove HCP dependencies in telemetry package
Achooo Jul 26, 2023
de55bf9
Add success metric and move lock only to grab the t.cfgHahs
Achooo Jul 26, 2023
eacf20e
Update hash
Achooo Jul 27, 2023
4ed2b74
fix nits
Achooo Jul 27, 2023
e3839b3
Create an equals method and add tests
Achooo Jul 27, 2023
b867e30
Improve telemetry_config_provider.go tests
Achooo Jul 27, 2023
d4b788a
Add race test
Achooo Jul 27, 2023
932633e
Add missing godoc
Achooo Jul 27, 2023
696966f
Remove mock for MetricsClient
Achooo Jul 27, 2023
1d58bf6
Avoid goroutine test panics
Achooo Jul 28, 2023
fea50d4
trying to kick CI lint issues by upgrading mod
Achooo Jul 28, 2023
bfebfb1
imprve test code and add hasher for testing
Achooo Jul 28, 2023
54eb748
Use structure logging for filters, fix error constants, and default t…
Achooo Jul 31, 2023
9677781
removed hashin and modify logic to simplify
Achooo Jul 31, 2023
c23798c
Improve race test and fix PR feedback by removing hash equals and avo…
Achooo Jul 31, 2023
235acac
Ran make go-mod-tidy
Achooo Jul 31, 2023
1ef0795
Use errtypes in the test
Achooo Jul 31, 2023
2e10100
Add changelog
Achooo Jul 31, 2023
952129c
add safety check for exporter endpoint
Achooo Jul 31, 2023
b46d5b9
remove require.Contains by using error types, fix structure logging, …
Achooo Aug 1, 2023
7ea6280
Fixed race test to have changing config values
Achooo Aug 1, 2023
d3522ee
Send success metric before modifying config
Achooo Aug 1, 2023
aaec9a6
Avoid the defer and move the success metric under
Achooo Aug 1, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .changelog/18168.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:improvement
hcp: Add dynamic configuration support for the export of server metrics to HCP.
```
80 changes: 6 additions & 74 deletions agent/hcp/client/client.go
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Changes:

  • Init client with context to add a logger
  • Refactored out the TelemetryConfig, MetricsConfig, new RefreshConfigand the conversions into the telemetry_config.go file

Why this refactor?

  1. This file was growing larger and larger
  2. Modified the endpoint, label and filters validation and parsing to be done when converting the response. This reduces the code over in deps.go and telemetry_config_provider.go. It also avoids code duplication.

Original file line number Diff line number Diff line change
Expand Up @@ -35,21 +35,6 @@ type Client interface {
DiscoverServers(ctx context.Context) ([]string, error)
}

// MetricsConfig holds metrics specific configuration for the TelemetryConfig.
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Moved to new file telemetry_config.go

// The endpoint field overrides the TelemetryConfig endpoint.
type MetricsConfig struct {
Filters []string
Endpoint string
}

// TelemetryConfig contains configuration for telemetry data forwarded by Consul servers
// to the HCP Telemetry gateway.
type TelemetryConfig struct {
Endpoint string
Labels map[string]string
MetricsConfig *MetricsConfig
}

type BootstrapConfig struct {
Name string
BootstrapExpect int
Expand Down Expand Up @@ -112,10 +97,14 @@ func (c *hcpClient) FetchTelemetryConfig(ctx context.Context) (*TelemetryConfig,

resp, err := c.tgw.AgentTelemetryConfig(params, nil)
if err != nil {
return nil, err
return nil, fmt.Errorf("failed to fetch from HCP: %w", err)
}

if err := validateAgentTelemetryConfigPayload(resp); err != nil {
return nil, fmt.Errorf("invalid response payload: %w", err)
}

return convertTelemetryConfig(resp)
return convertAgentTelemetryResponse(ctx, resp, c.cfg)
}

func (c *hcpClient) FetchBootstrap(ctx context.Context) (*BootstrapConfig, error) {
Expand Down Expand Up @@ -272,60 +261,3 @@ func (c *hcpClient) DiscoverServers(ctx context.Context) ([]string, error) {

return servers, nil
}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Moved conversions to new file telemetry_config.go

// convertTelemetryConfig validates the AgentTelemetryConfig payload and converts it into a TelemetryConfig object.
func convertTelemetryConfig(resp *hcptelemetry.AgentTelemetryConfigOK) (*TelemetryConfig, error) {
if resp.Payload == nil {
return nil, fmt.Errorf("missing payload")
}

if resp.Payload.TelemetryConfig == nil {
return nil, fmt.Errorf("missing telemetry config")
}

payloadConfig := resp.Payload.TelemetryConfig
var metricsConfig MetricsConfig
if payloadConfig.Metrics != nil {
metricsConfig.Endpoint = payloadConfig.Metrics.Endpoint
metricsConfig.Filters = payloadConfig.Metrics.IncludeList
}
return &TelemetryConfig{
Endpoint: payloadConfig.Endpoint,
Labels: payloadConfig.Labels,
MetricsConfig: &metricsConfig,
}, nil
}

// Enabled verifies if telemetry is enabled by ensuring a valid endpoint has been retrieved.
// It returns full metrics endpoint and true if a valid endpoint was obtained.
func (t *TelemetryConfig) Enabled() (string, bool) {
endpoint := t.Endpoint
if override := t.MetricsConfig.Endpoint; override != "" {
endpoint = override
}

if endpoint == "" {
return "", false
}

// The endpoint from Telemetry Gateway is a domain without scheme, and without the metrics path, so they must be added.
return endpoint + metricsGatewayPath, true
}

// DefaultLabels returns a set of <key, value> string pairs that must be added as attributes to all exported telemetry data.
func (t *TelemetryConfig) DefaultLabels(cfg config.CloudConfig) map[string]string {
labels := make(map[string]string)
nodeID := string(cfg.NodeID)
if nodeID != "" {
labels["node_id"] = nodeID
}
if cfg.NodeName != "" {
labels["node_name"] = cfg.NodeName
}

for k, v := range t.Labels {
labels[k] = v
}

return labels
}
240 changes: 81 additions & 159 deletions agent/hcp/client/client_test.go
Copy link
Contributor Author

@Achooo Achooo Jul 25, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removed most tests in this file to put them in telemetry_config_test.go
Fixed the actual client test which was not mocking the TGW client properly, and test the FetchTelemetryConfig function including validations and conversion.

Original file line number Diff line number Diff line change
Expand Up @@ -2,200 +2,122 @@ package client

import (
"context"
"fmt"
"net/url"
"regexp"
"testing"
"time"

"github.com/hashicorp/consul/agent/hcp/config"
"github.com/hashicorp/consul/types"
"github.com/hashicorp/hcp-sdk-go/clients/cloud-consul-telemetry-gateway/preview/2023-04-14/client/consul_telemetry_service"
"github.com/go-openapi/runtime"
hcptelemetry "github.com/hashicorp/hcp-sdk-go/clients/cloud-consul-telemetry-gateway/preview/2023-04-14/client/consul_telemetry_service"
"github.com/hashicorp/hcp-sdk-go/clients/cloud-consul-telemetry-gateway/preview/2023-04-14/models"
"github.com/stretchr/testify/mock"
"github.com/stretchr/testify/require"
)

func TestFetchTelemetryConfig(t *testing.T) {
t.Parallel()
for name, test := range map[string]struct {
metricsEndpoint string
expect func(*MockClient)
disabled bool
}{
"success": {
expect: func(mockClient *MockClient) {
mockClient.EXPECT().FetchTelemetryConfig(mock.Anything).Return(&TelemetryConfig{
Endpoint: "https://test.com",
MetricsConfig: &MetricsConfig{
Endpoint: "",
},
}, nil)
},
metricsEndpoint: "https://test.com/v1/metrics",
},
"overrideMetricsEndpoint": {
expect: func(mockClient *MockClient) {
mockClient.EXPECT().FetchTelemetryConfig(mock.Anything).Return(&TelemetryConfig{
Endpoint: "https://test.com",
MetricsConfig: &MetricsConfig{
Endpoint: "https://test.com",
},
}, nil)
},
metricsEndpoint: "https://test.com/v1/metrics",
},
"disabledWithEmptyEndpoint": {
expect: func(mockClient *MockClient) {
mockClient.EXPECT().FetchTelemetryConfig(mock.Anything).Return(&TelemetryConfig{
Endpoint: "",
MetricsConfig: &MetricsConfig{
Endpoint: "",
},
}, nil)
},
disabled: true,
},
} {
test := test
t.Run(name, func(t *testing.T) {
t.Parallel()

mock := NewMockClient(t)
test.expect(mock)

telemetryCfg, err := mock.FetchTelemetryConfig(context.Background())
require.NoError(t, err)

if test.disabled {
endpoint, ok := telemetryCfg.Enabled()
require.False(t, ok)
require.Empty(t, endpoint)
return
}
type mockTGW struct {
mockResponse *hcptelemetry.AgentTelemetryConfigOK
mockError error
}

endpoint, ok := telemetryCfg.Enabled()
func (m *mockTGW) AgentTelemetryConfig(params *hcptelemetry.AgentTelemetryConfigParams, authInfo runtime.ClientAuthInfoWriter, opts ...hcptelemetry.ClientOption) (*hcptelemetry.AgentTelemetryConfigOK, error) {
return m.mockResponse, m.mockError
}
func (m *mockTGW) GetLabelValues(params *hcptelemetry.GetLabelValuesParams, authInfo runtime.ClientAuthInfoWriter, opts ...hcptelemetry.ClientOption) (*hcptelemetry.GetLabelValuesOK, error) {
return hcptelemetry.NewGetLabelValuesOK(), nil
}
func (m *mockTGW) QueryRangeBatch(params *hcptelemetry.QueryRangeBatchParams, authInfo runtime.ClientAuthInfoWriter, opts ...hcptelemetry.ClientOption) (*hcptelemetry.QueryRangeBatchOK, error) {
return hcptelemetry.NewQueryRangeBatchOK(), nil
}
func (m *mockTGW) SetTransport(transport runtime.ClientTransport) {}

require.True(t, ok)
require.Equal(t, test.metricsEndpoint, endpoint)
})
}
type expectedTelemetryCfg struct {
endpoint string
labels map[string]string
filters string
refreshInterval time.Duration
}

func TestConvertTelemetryConfig(t *testing.T) {
func TestFetchTelemetryConfig(t *testing.T) {
t.Parallel()
for name, test := range map[string]struct {
resp *consul_telemetry_service.AgentTelemetryConfigOK
expectedTelemetryCfg *TelemetryConfig
wantErr string
for name, tc := range map[string]struct {
mockResponse *hcptelemetry.AgentTelemetryConfigOK
mockError error
wantErr string
expected *expectedTelemetryCfg
}{
"success": {
resp: &consul_telemetry_service.AgentTelemetryConfigOK{
Payload: &models.HashicorpCloudConsulTelemetry20230414AgentTelemetryConfigResponse{
TelemetryConfig: &models.HashicorpCloudConsulTelemetry20230414TelemetryConfig{
Endpoint: "https://test.com",
Labels: map[string]string{"test": "test"},
},
},
},
expectedTelemetryCfg: &TelemetryConfig{
Endpoint: "https://test.com",
Labels: map[string]string{"test": "test"},
MetricsConfig: &MetricsConfig{},
"errorsWithFetchFailure": {
mockError: fmt.Errorf("failed to fetch from HCP"),
mockResponse: nil,
wantErr: "failed to fetch from HCP",
},
"errorsWithInvalidPayload": {
mockResponse: &hcptelemetry.AgentTelemetryConfigOK{
Payload: &models.HashicorpCloudConsulTelemetry20230414AgentTelemetryConfigResponse{},
},
mockError: nil,
wantErr: "invalid response payload",
},
"successWithMetricsConfig": {
resp: &consul_telemetry_service.AgentTelemetryConfigOK{
"success:": {
mockResponse: &hcptelemetry.AgentTelemetryConfigOK{
Payload: &models.HashicorpCloudConsulTelemetry20230414AgentTelemetryConfigResponse{
RefreshConfig: &models.HashicorpCloudConsulTelemetry20230414RefreshConfig{
RefreshInterval: "1s",
},
TelemetryConfig: &models.HashicorpCloudConsulTelemetry20230414TelemetryConfig{
Endpoint: "https://test.com",
Labels: map[string]string{"test": "test"},
Labels: map[string]string{"test": "123"},
Metrics: &models.HashicorpCloudConsulTelemetry20230414TelemetryMetricsConfig{
Endpoint: "https://metrics-test.com",
IncludeList: []string{"consul.raft.apply"},
IncludeList: []string{"consul", "test"},
},
},
},
},
expectedTelemetryCfg: &TelemetryConfig{
Endpoint: "https://test.com",
Labels: map[string]string{"test": "test"},
MetricsConfig: &MetricsConfig{
Endpoint: "https://metrics-test.com",
Filters: []string{"consul.raft.apply"},
},
},
},
"errorsWithNilPayload": {
resp: &consul_telemetry_service.AgentTelemetryConfigOK{},
wantErr: "missing payload",
},
"errorsWithNilTelemetryConfig": {
resp: &consul_telemetry_service.AgentTelemetryConfigOK{
Payload: &models.HashicorpCloudConsulTelemetry20230414AgentTelemetryConfigResponse{},
expected: &expectedTelemetryCfg{
endpoint: "https://test.com/v1/metrics",
labels: map[string]string{"test": "123"},
filters: "consul|test",
refreshInterval: 1 * time.Second,
},
wantErr: "missing telemetry config",
},
} {
test := test
tc := tc
t.Run(name, func(t *testing.T) {
t.Parallel()
telemetryCfg, err := convertTelemetryConfig(test.resp)
if test.wantErr != "" {
c := &hcpClient{
tgw: &mockTGW{
mockError: tc.mockError,
mockResponse: tc.mockResponse,
},
}

telemetryCfg, err := c.FetchTelemetryConfig(context.Background())

if tc.wantErr != "" {
require.Error(t, err)
require.Contains(t, err.Error(), tc.wantErr)
require.Nil(t, telemetryCfg)
require.Contains(t, err.Error(), test.wantErr)
return
}

urlEndpoint, err := url.Parse(tc.expected.endpoint)
require.NoError(t, err)
require.Equal(t, test.expectedTelemetryCfg, telemetryCfg)
})
}
}

func Test_DefaultLabels(t *testing.T) {
for name, tc := range map[string]struct {
cfg config.CloudConfig
expectedLabels map[string]string
}{
"Success": {
cfg: config.CloudConfig{
NodeID: types.NodeID("nodeyid"),
NodeName: "nodey",
},
expectedLabels: map[string]string{
"node_id": "nodeyid",
"node_name": "nodey",
},
},
regexFilters, err := regexp.Compile(tc.expected.filters)
require.NoError(t, err)

"NoNodeID": {
cfg: config.CloudConfig{
NodeID: types.NodeID(""),
NodeName: "nodey",
},
expectedLabels: map[string]string{
"node_name": "nodey",
},
},
"NoNodeName": {
cfg: config.CloudConfig{
NodeID: types.NodeID("nodeyid"),
NodeName: "",
},
expectedLabels: map[string]string{
"node_id": "nodeyid",
},
},
"Empty": {
cfg: config.CloudConfig{
NodeID: "",
NodeName: "",
},
expectedLabels: map[string]string{},
},
} {
t.Run(name, func(t *testing.T) {
tCfg := &TelemetryConfig{}
labels := tCfg.DefaultLabels(tc.cfg)
require.Equal(t, labels, tc.expectedLabels)
expectedCfg := &TelemetryConfig{
MetricsConfig: &MetricsConfig{
Endpoint: urlEndpoint,
Filters: regexFilters,
Labels: tc.expected.labels,
},
RefreshConfig: &RefreshConfig{
RefreshInterval: tc.expected.refreshInterval,
},
}

require.NoError(t, err)
require.Equal(t, expectedCfg, telemetryCfg)
})
}
}
Loading