Skip to content

Commit

Permalink
chore: add passive sampling option to tracing
Browse files Browse the repository at this point in the history
When we have many services in the same flow, we want to defer the sampling
logic to the parent service (which will defer to the root service in the end).

The PassiveSampler is really good for that, as it will only sample traces that
have one external reference. This assumes that the parent service will send the
trace info to stitch, if we should keep it, and won't, if we should discard it.
This is what some of our services do already today, and it would really help us
onboard to foundations!
  • Loading branch information
lbarthon committed Aug 22, 2024
1 parent afd9094 commit f997fe8
Show file tree
Hide file tree
Showing 5 changed files with 72 additions and 29 deletions.
63 changes: 48 additions & 15 deletions foundations/src/telemetry/settings/tracing.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,8 @@ pub struct TracingSettings {
/// The output for the collected traces.
pub output: TracesOutput,

/// Sampling ratio.
///
/// This can be any fractional value between `0.0` and `1.0`.
/// Where `1.0` means "sample everything", and `0.0` means "don't sample anything".
#[serde(default = "TracingSettings::default_sampling_ratio")]
pub sampling_ratio: f64,

/// Settings for rate limiting emission of traces
pub rate_limit: RateLimitingSettings,
/// The strategy used to sample traces.
pub sampling_strategy: SamplingStrategy,
}

#[cfg(not(feature = "settings"))]
Expand All @@ -41,8 +34,7 @@ impl Default for TracingSettings {
Self {
enabled: TracingSettings::default_enabled(),
output: Default::default(),
sampling_ratio: TracingSettings::default_sampling_ratio(),
rate_limit: Default::default(),
sampling_strategy: Default::default(),
}
}
}
Expand All @@ -51,10 +43,6 @@ impl TracingSettings {
fn default_enabled() -> bool {
true
}

fn default_sampling_ratio() -> f64 {
1.0
}
}

/// The output for the collected traces.
Expand Down Expand Up @@ -125,3 +113,48 @@ impl JaegerThriftUdpOutputSettings {
server_addr
}
}

/// Settings used when active sampling is enabled.
#[cfg_attr(feature = "settings", settings(crate_path = "crate"))]
#[cfg_attr(not(feature = "settings"), derive(Clone, Debug, serde::Deserialize))]
pub struct ActiveSamplingSettings {
/// Sampling ratio.
///
/// This can be any fractional value between `0.0` and `1.0`.
/// Where `1.0` means "sample everything", and `0.0` means "don't sample anything".
#[serde(default = "ActiveSamplingSettings::default_sampling_ratio")]
pub sampling_ratio: f64,

/// Settings for rate limiting emission of traces
pub rate_limit: RateLimitingSettings,
}

impl ActiveSamplingSettings {
fn default_sampling_ratio() -> f64 {
1.0
}
}

/// The sampling strategy used for tracing purposes.
#[cfg_attr(
feature = "settings",
settings(crate_path = "crate", impl_default = false)
)]
#[cfg_attr(not(feature = "settings"), derive(Clone, Debug, serde::Deserialize))]
pub enum SamplingStrategy {
/// This only samples traces which have one or more references.
///
/// Backed by [cf_rustracing::sampler::PassiveSampler]
Passive,

/// Active sampling.
///
/// Backed by [crate::telemetry::tracing::rate_limit::RateLimitingProbabilisticSampler]
Active(ActiveSamplingSettings),
}

impl Default for SamplingStrategy {
fn default() -> Self {
Self::Active(Default::default())
}
}
15 changes: 10 additions & 5 deletions foundations/src/telemetry/tracing/init.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use super::internal::{SharedSpan, Tracer};
use super::output_jaeger_thrift_udp;
use crate::telemetry::scope::ScopeStack;
use crate::telemetry::settings::{TracesOutput, TracingSettings};
use crate::telemetry::settings::{SamplingStrategy, TracesOutput, TracingSettings};
use crate::{BootstrapResult, ServiceInfo};
use cf_rustracing_jaeger::span::SpanReceiver;
use futures_util::future::BoxFuture;
Expand All @@ -10,6 +10,7 @@ use once_cell::sync::{Lazy, OnceCell};
#[cfg(feature = "telemetry-otlp-grpc")]
use super::output_otlp_grpc;

use cf_rustracing::sampler::{PassiveSampler, Sampler};
#[cfg(feature = "testing")]
use std::borrow::Cow;

Expand All @@ -18,7 +19,7 @@ use crate::telemetry::tracing::rate_limit::RateLimitingProbabilisticSampler;
static HARNESS: OnceCell<TracingHarness> = OnceCell::new();

static NOOP_HARNESS: Lazy<TracingHarness> = Lazy::new(|| {
let (noop_tracer, _) = Tracer::new(Default::default());
let (noop_tracer, _) = Tracer::new(RateLimitingProbabilisticSampler::default().boxed());

TracingHarness {
tracer: noop_tracer,
Expand Down Expand Up @@ -60,9 +61,13 @@ impl TracingHarness {
pub(crate) fn create_tracer_and_span_rx(
settings: &TracingSettings,
) -> BootstrapResult<(Tracer, SpanReceiver)> {
Ok(Tracer::new(RateLimitingProbabilisticSampler::new(
settings,
)?))
let sampler = match &settings.sampling_strategy {
SamplingStrategy::Passive => PassiveSampler.boxed(),
SamplingStrategy::Active(settings) => {
RateLimitingProbabilisticSampler::new(settings)?.boxed()
}
};
Ok(Tracer::new(sampler))
}

// NOTE: does nothing if tracing has already been initialized in this process.
Expand Down
4 changes: 2 additions & 2 deletions foundations/src/telemetry/tracing/internal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,14 @@ use super::init::TracingHarness;
use super::StartTraceOptions;
use rand::{self, Rng};

use crate::telemetry::tracing::rate_limit::RateLimitingProbabilisticSampler;
use cf_rustracing::sampler::BoxSampler;
use cf_rustracing::tag::Tag;
use cf_rustracing_jaeger::span::{Span, SpanContext, SpanContextState};
use std::borrow::Cow;
use std::error::Error;
use std::sync::Arc;

pub(crate) type Tracer = cf_rustracing::Tracer<RateLimitingProbabilisticSampler, SpanContextState>;
pub(crate) type Tracer = cf_rustracing::Tracer<BoxSampler<SpanContextState>, SpanContextState>;

#[derive(Debug, Clone)]
pub(crate) struct SharedSpan {
Expand Down
4 changes: 2 additions & 2 deletions foundations/src/telemetry/tracing/rate_limit.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use crate::telemetry::settings::TracingSettings;
use crate::telemetry::settings::ActiveSamplingSettings;
use cf_rustracing::sampler::Sampler;
use cf_rustracing::span::CandidateSpan;
use cf_rustracing::{sampler::ProbabilisticSampler, Result};
Expand Down Expand Up @@ -26,7 +26,7 @@ impl Default for RateLimitingProbabilisticSampler {
impl RateLimitingProbabilisticSampler {
/// If `sampling_rate` is not in the range `0.0...1.0`,
/// it will return an error with the kind `ErrorKind::InvalidInput`.
pub(crate) fn new(settings: &TracingSettings) -> Result<Self> {
pub(crate) fn new(settings: &ActiveSamplingSettings) -> Result<Self> {
let rate_limiter = if settings.rate_limit.enabled {
settings
.rate_limit
Expand Down
15 changes: 10 additions & 5 deletions foundations/tests/tracing.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
use foundations::telemetry::settings::{RateLimitingSettings, TracingSettings};
use foundations::telemetry::settings::{
ActiveSamplingSettings, RateLimitingSettings, SamplingStrategy, TracingSettings,
};
use foundations::telemetry::tracing;
use foundations::telemetry::TestTelemetryContext;
use foundations_macros::with_test_telemetry;
Expand All @@ -18,10 +20,13 @@ fn test_rate_limiter(mut ctx: TestTelemetryContext) {
assert_eq!(ctx.traces(Default::default()).len(), 10);

ctx.set_tracing_settings(TracingSettings {
rate_limit: RateLimitingSettings {
enabled: true,
max_events_per_second: 5,
},
sampling_strategy: SamplingStrategy::Active(ActiveSamplingSettings {
rate_limit: RateLimitingSettings {
enabled: true,
max_events_per_second: 5,
},
..Default::default()
}),
..Default::default()
});

Expand Down

0 comments on commit f997fe8

Please sign in to comment.