From f997fe8b9f81fa3852c622a8137c4e3be4f9466b Mon Sep 17 00:00:00 2001 From: Louis Barthonet Date: Tue, 20 Aug 2024 13:40:39 +0300 Subject: [PATCH] chore: add passive sampling option to tracing When we have many services in the same flow, we want to defer the sampling logic to the parent service (which will defer to the root service in the end). The PassiveSampler is really good for that, as it will only sample traces that have one external reference. This assumes that the parent service will send the trace info to stitch, if we should keep it, and won't, if we should discard it. This is what some of our services do already today, and it would really help us onboard to foundations! --- foundations/src/telemetry/settings/tracing.rs | 63 ++++++++++++++----- foundations/src/telemetry/tracing/init.rs | 15 +++-- foundations/src/telemetry/tracing/internal.rs | 4 +- .../src/telemetry/tracing/rate_limit.rs | 4 +- foundations/tests/tracing.rs | 15 +++-- 5 files changed, 72 insertions(+), 29 deletions(-) diff --git a/foundations/src/telemetry/settings/tracing.rs b/foundations/src/telemetry/settings/tracing.rs index b311b7f..5cc6b9d 100644 --- a/foundations/src/telemetry/settings/tracing.rs +++ b/foundations/src/telemetry/settings/tracing.rs @@ -24,15 +24,8 @@ pub struct TracingSettings { /// The output for the collected traces. pub output: TracesOutput, - /// Sampling ratio. - /// - /// This can be any fractional value between `0.0` and `1.0`. - /// Where `1.0` means "sample everything", and `0.0` means "don't sample anything". - #[serde(default = "TracingSettings::default_sampling_ratio")] - pub sampling_ratio: f64, - - /// Settings for rate limiting emission of traces - pub rate_limit: RateLimitingSettings, + /// The strategy used to sample traces. + pub sampling_strategy: SamplingStrategy, } #[cfg(not(feature = "settings"))] @@ -41,8 +34,7 @@ impl Default for TracingSettings { Self { enabled: TracingSettings::default_enabled(), output: Default::default(), - sampling_ratio: TracingSettings::default_sampling_ratio(), - rate_limit: Default::default(), + sampling_strategy: Default::default(), } } } @@ -51,10 +43,6 @@ impl TracingSettings { fn default_enabled() -> bool { true } - - fn default_sampling_ratio() -> f64 { - 1.0 - } } /// The output for the collected traces. @@ -125,3 +113,48 @@ impl JaegerThriftUdpOutputSettings { server_addr } } + +/// Settings used when active sampling is enabled. +#[cfg_attr(feature = "settings", settings(crate_path = "crate"))] +#[cfg_attr(not(feature = "settings"), derive(Clone, Debug, serde::Deserialize))] +pub struct ActiveSamplingSettings { + /// Sampling ratio. + /// + /// This can be any fractional value between `0.0` and `1.0`. + /// Where `1.0` means "sample everything", and `0.0` means "don't sample anything". + #[serde(default = "ActiveSamplingSettings::default_sampling_ratio")] + pub sampling_ratio: f64, + + /// Settings for rate limiting emission of traces + pub rate_limit: RateLimitingSettings, +} + +impl ActiveSamplingSettings { + fn default_sampling_ratio() -> f64 { + 1.0 + } +} + +/// The sampling strategy used for tracing purposes. +#[cfg_attr( + feature = "settings", + settings(crate_path = "crate", impl_default = false) +)] +#[cfg_attr(not(feature = "settings"), derive(Clone, Debug, serde::Deserialize))] +pub enum SamplingStrategy { + /// This only samples traces which have one or more references. + /// + /// Backed by [cf_rustracing::sampler::PassiveSampler] + Passive, + + /// Active sampling. + /// + /// Backed by [crate::telemetry::tracing::rate_limit::RateLimitingProbabilisticSampler] + Active(ActiveSamplingSettings), +} + +impl Default for SamplingStrategy { + fn default() -> Self { + Self::Active(Default::default()) + } +} diff --git a/foundations/src/telemetry/tracing/init.rs b/foundations/src/telemetry/tracing/init.rs index 438e5aa..3f2fde3 100644 --- a/foundations/src/telemetry/tracing/init.rs +++ b/foundations/src/telemetry/tracing/init.rs @@ -1,7 +1,7 @@ use super::internal::{SharedSpan, Tracer}; use super::output_jaeger_thrift_udp; use crate::telemetry::scope::ScopeStack; -use crate::telemetry::settings::{TracesOutput, TracingSettings}; +use crate::telemetry::settings::{SamplingStrategy, TracesOutput, TracingSettings}; use crate::{BootstrapResult, ServiceInfo}; use cf_rustracing_jaeger::span::SpanReceiver; use futures_util::future::BoxFuture; @@ -10,6 +10,7 @@ use once_cell::sync::{Lazy, OnceCell}; #[cfg(feature = "telemetry-otlp-grpc")] use super::output_otlp_grpc; +use cf_rustracing::sampler::{PassiveSampler, Sampler}; #[cfg(feature = "testing")] use std::borrow::Cow; @@ -18,7 +19,7 @@ use crate::telemetry::tracing::rate_limit::RateLimitingProbabilisticSampler; static HARNESS: OnceCell = OnceCell::new(); static NOOP_HARNESS: Lazy = Lazy::new(|| { - let (noop_tracer, _) = Tracer::new(Default::default()); + let (noop_tracer, _) = Tracer::new(RateLimitingProbabilisticSampler::default().boxed()); TracingHarness { tracer: noop_tracer, @@ -60,9 +61,13 @@ impl TracingHarness { pub(crate) fn create_tracer_and_span_rx( settings: &TracingSettings, ) -> BootstrapResult<(Tracer, SpanReceiver)> { - Ok(Tracer::new(RateLimitingProbabilisticSampler::new( - settings, - )?)) + let sampler = match &settings.sampling_strategy { + SamplingStrategy::Passive => PassiveSampler.boxed(), + SamplingStrategy::Active(settings) => { + RateLimitingProbabilisticSampler::new(settings)?.boxed() + } + }; + Ok(Tracer::new(sampler)) } // NOTE: does nothing if tracing has already been initialized in this process. diff --git a/foundations/src/telemetry/tracing/internal.rs b/foundations/src/telemetry/tracing/internal.rs index 113fde0..84c5891 100644 --- a/foundations/src/telemetry/tracing/internal.rs +++ b/foundations/src/telemetry/tracing/internal.rs @@ -2,14 +2,14 @@ use super::init::TracingHarness; use super::StartTraceOptions; use rand::{self, Rng}; -use crate::telemetry::tracing::rate_limit::RateLimitingProbabilisticSampler; +use cf_rustracing::sampler::BoxSampler; use cf_rustracing::tag::Tag; use cf_rustracing_jaeger::span::{Span, SpanContext, SpanContextState}; use std::borrow::Cow; use std::error::Error; use std::sync::Arc; -pub(crate) type Tracer = cf_rustracing::Tracer; +pub(crate) type Tracer = cf_rustracing::Tracer, SpanContextState>; #[derive(Debug, Clone)] pub(crate) struct SharedSpan { diff --git a/foundations/src/telemetry/tracing/rate_limit.rs b/foundations/src/telemetry/tracing/rate_limit.rs index 5808369..99e9cde 100644 --- a/foundations/src/telemetry/tracing/rate_limit.rs +++ b/foundations/src/telemetry/tracing/rate_limit.rs @@ -1,4 +1,4 @@ -use crate::telemetry::settings::TracingSettings; +use crate::telemetry::settings::ActiveSamplingSettings; use cf_rustracing::sampler::Sampler; use cf_rustracing::span::CandidateSpan; use cf_rustracing::{sampler::ProbabilisticSampler, Result}; @@ -26,7 +26,7 @@ impl Default for RateLimitingProbabilisticSampler { impl RateLimitingProbabilisticSampler { /// If `sampling_rate` is not in the range `0.0...1.0`, /// it will return an error with the kind `ErrorKind::InvalidInput`. - pub(crate) fn new(settings: &TracingSettings) -> Result { + pub(crate) fn new(settings: &ActiveSamplingSettings) -> Result { let rate_limiter = if settings.rate_limit.enabled { settings .rate_limit diff --git a/foundations/tests/tracing.rs b/foundations/tests/tracing.rs index 51f60b4..a200f01 100644 --- a/foundations/tests/tracing.rs +++ b/foundations/tests/tracing.rs @@ -1,4 +1,6 @@ -use foundations::telemetry::settings::{RateLimitingSettings, TracingSettings}; +use foundations::telemetry::settings::{ + ActiveSamplingSettings, RateLimitingSettings, SamplingStrategy, TracingSettings, +}; use foundations::telemetry::tracing; use foundations::telemetry::TestTelemetryContext; use foundations_macros::with_test_telemetry; @@ -18,10 +20,13 @@ fn test_rate_limiter(mut ctx: TestTelemetryContext) { assert_eq!(ctx.traces(Default::default()).len(), 10); ctx.set_tracing_settings(TracingSettings { - rate_limit: RateLimitingSettings { - enabled: true, - max_events_per_second: 5, - }, + sampling_strategy: SamplingStrategy::Active(ActiveSamplingSettings { + rate_limit: RateLimitingSettings { + enabled: true, + max_events_per_second: 5, + }, + ..Default::default() + }), ..Default::default() });