Skip to content

Commit

Permalink
Update OpenTelemetry configurations and remove unused modules
Browse files Browse the repository at this point in the history
Revised otel-collector and docker-compose configurations to streamline tracing and metrics collection, replacing Datadog connectors with OpenTelemetry. Removed unused scraping modules and unnecessary dependencies to improve setup efficiency and resource utilization.
  • Loading branch information
sfmskywalker committed Sep 25, 2024
1 parent 758b607 commit a62de77
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 108 deletions.
98 changes: 40 additions & 58 deletions docker/docker-compose-datadog.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
version: '3.7'

services:
services:
postgres:
image: postgres:latest
command: -c 'max_connections=2000'
Expand All @@ -13,17 +11,6 @@ services:
ports:
- "5432:5432"

cockroachdb:
image: cockroachdb/cockroach:v22.1.0
command: start-single-node --insecure
ports:
- "26257:26257" # CockroachDB SQL port
- "8080:8080" # CockroachDB UI port
volumes:
- cockroachdb-data:/cockroach/cockroach-data
environment:
- COCKROACH_DATABASE=elsa

rabbitmq:
image: "rabbitmq:3-management"
ports:
Expand All @@ -44,40 +31,22 @@ services:
- postgres
- rabbitmq
- redis
- datadog-agent
- otel-collector
environment:
DD_AGENT_HOST: datadog-agent
DD_ENV: development
DD_TRACE_DEBUG: true
DD_TRACE_OTEL_ENABLED: true
DD_SERVICE: "elsa-server-local"
DD_VERSION: "3.2.1-blueberry"

# Enable priority sampling
DD_TRACE_SAMPLING_PRIORITY: "true"

# Global rate limiting of traces (number of spans per second)
DD_TRACE_RATE_LIMIT: 100

# Global sample rate for all traces (applies to spans that do not match a specific sampling rule)
DD_TRACE_SAMPLE_RATE: 1.0 # Keep 100% of the traces globally (adjust as needed)

# Sampling rules for controlling sampling of specific services and errors
DD_TRACE_SAMPLING_RULES: >
[
{
"service": "elsa-server-local",
"name": "WorkflowExecution",
"sample_rate": 1.0,
"condition": {"tags": {"hasIncidents": "true"}}
},
{
"service": "elsa-server-local",
"name": "ActivityExecution",
"sample_rate": 1.0,
"condition": {"tags": {"hasIncidents": "true"}}
}
]
# OpenTelemetry environment variables
OTEL_EXPORTER_OTLP_ENDPOINT: "http://otel-collector:4317" # Point to OpenTelemetry Collector
OTEL_EXPORTER_OTLP_PROTOCOL: "grpc" # Use gRPC for OTLP
OTEL_TRACES_EXPORTER: "otlp"
OTEL_METRICS_EXPORTER: "otlp"
OTEL_LOGS_EXPORTER: "otpl"
OTEL_RESOURCE_ATTRIBUTES: "service.name=elsa-server-local,service.version=3.2.1-blueberry,deployment.environment=development"
OTEL_DOTNET_AUTO_TRACES_ADDITIONAL_SOURCES: "Elsa.Workflows"
OTEL_DOTNET_AUTO_INSTRUMENTATION_ENABLED: "true"
OTEL_LOG_LEVEL: "debug"
OTEL_DOTNET_AUTO_RESOURCE_DETECTOR_ENABLED: "true"
OTEL_DOTNET_AUTO_LOGS_CONSOLE_EXPORTER_ENABLED: "true"
OTEL_DOTNET_AUTO_METRICS_CONSOLE_EXPORTER_ENABLED: "true"
OTEL_DOTNET_AUTO_TRACES_CONSOLE_EXPORTER_ENABLED: "true"

ASPNETCORE_ENVIRONMENT: Development
PYTHONNET_PYDLL: /opt/homebrew/Cellar/python@3.11/3.11.6_1/Frameworks/Python.framework/Versions/3.11/bin/python3.11
Expand All @@ -100,28 +69,41 @@ services:
ports:
- "14000:8080"

otel-collector:
image: otel/opentelemetry-collector-contrib:latest
volumes:
- ./otel-collector-config.yaml:/etc/otel-collector-config.yaml
command: [ "--config", "/etc/otel-collector-config.yaml", "--feature-gates", "-component.UseLocalHostAsDefaultHost" ]
environment:
DD_API_KEY: "secret api key"
DD_SITE: "datadoghq.eu"
ports:
- "13133:13133"
- "4317:4317"
- "4318:4318"

datadog-agent:
image: datadog/agent:7.57.1
image: datadog/agent:latest
environment:
DD_API_KEY: "YOUR_API_KEY"
DD_API_KEY: "secret api key"
DD_SITE: "datadoghq.eu"
DD_HOSTNAME: "otel-collector"
DD_LOGS_ENABLED: "true"
DD_OTLP_CONFIG_LOGS_ENABLED: "true"
DD_LOGS_CONFIG_CONTAINER_COLLECT_ALL: "true"
DD_APM_ENABLED: "true"
DD_REMOTE_CONFIGURATION_ENABLED: "true"
DD_APM_NON_LOCAL_TRAFFIC: "true"

DD_OTLP_CONFIG_RECEIVER_PROTOCOLS_GRPC_ENDPOINT: 0.0.0.0:4317 # The Datadog Agent expects traces from OpenTelemetry Collector
DD_OTLP_CONFIG_RECEIVER_PROTOCOLS_HTTP_ENDPOINT: 0.0.0.0:4318

# Service autodiscovery
DD_AC_INCLUDE: "name:postgres,name:rabbitmq,name:redis,name:elsa-server"
DD_AC_EXCLUDE: "name:datadog-agent"

volumes:
- /var/run/docker.sock:/var/run/docker.sock
- /proc/:/host/proc/:ro
- /sys/fs/cgroup/:/host/sys/fs/cgroup:ro

ports:
- "8126:8126"
- "14317:4317"
- "14318:4318"

volumes:
postgres-data:
cockroachdb-data:
postgres-data:
66 changes: 18 additions & 48 deletions docker/otel-collector-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,47 +5,25 @@ receivers:
endpoint: 0.0.0.0:4317
http:
endpoint: 0.0.0.0:4318
hostmetrics:
collection_interval: 10s
scrapers:
paging:
metrics:
system.paging.utilization:
enabled: true
cpu:
metrics:
system.cpu.utilization:
enabled: true
disk:
filesystem:
metrics:
system.filesystem.utilization:
enabled: true
load:
memory:
network:
processes:
docker_stats:
metrics:
container.network.io.usage.rx_packets:
enabled: true
container.network.io.usage.tx_packets:
enabled: true
container.cpu.usage.system:
enabled: true
container.memory.rss:
enabled: true
container.blockio.io_serviced_recursive:
enabled: true

processors:
batch:
send_batch_max_size: 100
send_batch_size: 10
send_batch_size: 10 # Increased batch size for efficiency
timeout: 1s

connectors:
datadog/connector:
tail_sampling:
decision_wait: 10s
num_traces: 10000 # Increased from 100 to handle more traces
expected_new_traces_per_sec: 100 # Increased from 10
decision_cache:
sampled_cache_size: 100000
policies: [
{
name: incidents-policy,
type: boolean_attribute,
boolean_attribute: { key: hasIncidents, value: true }
}
]

exporters:
debug:
Expand All @@ -58,22 +36,14 @@ exporters:
service:
pipelines:
metrics:
receivers: [ hostmetrics, otlp, datadog/connector ]
receivers: [ otlp ]
processors: [ batch ]
exporters: [ datadog ]
traces:
receivers: [ otlp ]
processors: [ batch ]
exporters: [ datadog/connector ]
traces/sampling:
# This pipeline has a Datadog connector, a batch processor and a Datadog exporter.
# It receivers all traces from the Datadog connector and sends them to Datadog.
# Add any sampling here, so that the generated trace metrics account for all traces.
receivers: [ datadog/connector ]
# Add any sampling here
processors: [ ]
exporters: [ datadog ]
processors: [ batch, tail_sampling ] # Added tail_sampling to the main traces pipeline
exporters: [ debug, datadog ] # Directly exporting to debug and datadog
logs:
receivers: [ otlp ]
processors: [ batch ]
exporters: [ datadog ]
exporters: [ debug, datadog ]
2 changes: 0 additions & 2 deletions src/bundles/Elsa.Server.Web/Elsa.Server.Web.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,6 @@
<PackageReference Include="DistributedLock.Redis"/>
<PackageReference Include="FluentStorage.Azure.Blobs"/>
<PackageReference Include="Grpc.Net.Client" />
<PackageReference Include="OpenTelemetry.AutoInstrumentation" />
<PackageReference Include="OpenTelemetry.Instrumentation.StackExchangeRedis" />
<PackageReference Include="Proto.Persistence.Sqlite"/>
<PackageReference Include="Proto.Persistence.SqlServer"/>
</ItemGroup>
Expand Down

0 comments on commit a62de77

Please sign in to comment.