From e997632fb4db42bd3c24f727d079d1056f60dc78 Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Thu, 23 Nov 2023 13:42:54 +0000 Subject: [PATCH] Mark ddtrace threads as fork-safe **What does this PR do?** This PR applies to all relevant ddtrace threads a pattern pioneered by rails: set the `:fork_safe` thread variable to indicate that it's OK if ddtrace threads are started before a Ruby app calls fork. **Motivation:** This PR fixes the issue reported in #3203. The puma webserver in clustering mode has a check where it will warn when it detects that threads were started before the webserver has fork'd the child processes, see: https://github.com/puma/puma/blob/32e011ab9e029c757823efb068358ed255fb7ef4/lib/puma/cluster.rb#L353-L359 This is not a problem for ddtrace (as discussed in #3203), but the warning is annoying. While looking into the issue again today, I spotted that actually there's a way to tell puma that a thread is fine and is it's not a problem if it's started before the webserver has fork'd: puma checks if the thread has a `:fork_safe` thread-variable set to `true`. **Additional Notes:** We have a bunch of places in ddtrace where we create threads... I briefly played with the idea of creating a superclass of all ddtrace Threads, but decided to not get into a bigger refactoring for such a small issue. Maybe next time...? **How to test the change?** Test coverage included. Furthermore, you can try running a Ruby webapp in puma before/after the change to confirm the warning is gone. Fixes #3203 --- lib/datadog/core/remote/worker.rb | 1 + lib/datadog/core/workers/async.rb | 1 + .../profiling/collectors/cpu_and_wall_time_worker.rb | 1 + lib/datadog/profiling/collectors/idle_sampling_helper.rb | 1 + lib/datadog/tracing/workers.rb | 1 + spec/datadog/core/remote/worker_spec.rb | 6 ++++++ spec/datadog/core/workers/async_spec.rb | 8 +++++++- .../profiling/collectors/cpu_and_wall_time_worker_spec.rb | 6 ++++++ .../profiling/collectors/idle_sampling_helper_spec.rb | 6 ++++++ spec/datadog/tracing/workers_spec.rb | 8 +++++++- 10 files changed, 37 insertions(+), 2 deletions(-) diff --git a/lib/datadog/core/remote/worker.rb b/lib/datadog/core/remote/worker.rb index e6114e3dbef..da6f9379d75 100644 --- a/lib/datadog/core/remote/worker.rb +++ b/lib/datadog/core/remote/worker.rb @@ -30,6 +30,7 @@ def start thread = Thread.new { poll(@interval) } thread.name = self.class.name unless Gem::Version.new(RUBY_VERSION) < Gem::Version.new('2.3') + thread.thread_variable_set(:fork_safe, true) @thr = thread @started = true diff --git a/lib/datadog/core/workers/async.rb b/lib/datadog/core/workers/async.rb index 9f1943f11ef..c29cb6f3aec 100644 --- a/lib/datadog/core/workers/async.rb +++ b/lib/datadog/core/workers/async.rb @@ -147,6 +147,7 @@ def start_worker # rubocop:enable Lint/RescueException end @worker.name = self.class.name unless Gem::Version.new(RUBY_VERSION) < Gem::Version.new('2.3') + @worker.thread_variable_set(:fork_safe, true) nil end diff --git a/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb b/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb index 0185b6b63d0..1b2315034c9 100644 --- a/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb +++ b/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb @@ -78,6 +78,7 @@ def start(on_failure_proc: nil) end end @worker_thread.name = self.class.name # Repeated from above to make sure thread gets named asap + @worker_thread.thread_variable_set(:fork_safe, true) end true diff --git a/lib/datadog/profiling/collectors/idle_sampling_helper.rb b/lib/datadog/profiling/collectors/idle_sampling_helper.rb index c01d7dc01a8..d1fa8258ff0 100644 --- a/lib/datadog/profiling/collectors/idle_sampling_helper.rb +++ b/lib/datadog/profiling/collectors/idle_sampling_helper.rb @@ -43,6 +43,7 @@ def start end end @worker_thread.name = self.class.name # Repeated from above to make sure thread gets named asap + @worker_thread.thread_variable_set(:fork_safe, true) end true diff --git a/lib/datadog/tracing/workers.rb b/lib/datadog/tracing/workers.rb index c950a8b7e95..91dc60b34cd 100644 --- a/lib/datadog/tracing/workers.rb +++ b/lib/datadog/tracing/workers.rb @@ -69,6 +69,7 @@ def start Datadog.logger.debug { "Starting thread for: #{self}" } @worker = Thread.new { perform } @worker.name = self.class.name unless Gem::Version.new(RUBY_VERSION) < Gem::Version.new('2.3') + @worker.thread_variable_set(:fork_safe, true) nil end diff --git a/spec/datadog/core/remote/worker_spec.rb b/spec/datadog/core/remote/worker_spec.rb index 1be708b97db..cee11cb24e2 100644 --- a/spec/datadog/core/remote/worker_spec.rb +++ b/spec/datadog/core/remote/worker_spec.rb @@ -60,6 +60,12 @@ expect(Thread.list.map(&:name)).to include(described_class.to_s) end end + + it 'marks the worker thread as fork-safe (to avoid fork-safety warnings in webservers)' do + worker.start + + expect(worker.instance_variable_get(:@thr).thread_variable_get(:fork_safe)).to be true + end end describe '#stop' do diff --git a/spec/datadog/core/workers/async_spec.rb b/spec/datadog/core/workers/async_spec.rb index 11f33abeef1..880c57892e4 100644 --- a/spec/datadog/core/workers/async_spec.rb +++ b/spec/datadog/core/workers/async_spec.rb @@ -451,7 +451,7 @@ end end - describe 'thread naming' do + describe 'thread naming and fork-safety marker' do after { worker.terminate } context 'on Ruby < 2.3' do @@ -488,6 +488,12 @@ expect(worker.send(:worker).name).to eq worker_class.to_s end end + + it 'marks the worker thread as fork-safe (to avoid fork-safety warnings in webservers)' do + worker.perform + + expect(worker.send(:worker).thread_variable_get(:fork_safe)).to be true + end end end end diff --git a/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb b/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb index e48655979d4..3ffe3227ca2 100644 --- a/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb +++ b/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb @@ -71,6 +71,12 @@ expect(Thread.list.map(&:name)).to include(described_class.name) end + it 'marks the new thread as fork-safe' do + start + + expect(cpu_and_wall_time_worker.instance_variable_get(:@worker_thread).thread_variable_get(:fork_safe)).to be true + end + it 'does not create a second thread if start is called again' do start diff --git a/spec/datadog/profiling/collectors/idle_sampling_helper_spec.rb b/spec/datadog/profiling/collectors/idle_sampling_helper_spec.rb index a18d2486641..b24ea6836f2 100644 --- a/spec/datadog/profiling/collectors/idle_sampling_helper_spec.rb +++ b/spec/datadog/profiling/collectors/idle_sampling_helper_spec.rb @@ -26,6 +26,12 @@ start end + it 'marks the new thread as fork-safe' do + start + + expect(idle_sampling_helper.instance_variable_get(:@worker_thread).thread_variable_get(:fork_safe)).to be true + end + it 'does not create a second thread if start is called again' do start diff --git a/spec/datadog/tracing/workers_spec.rb b/spec/datadog/tracing/workers_spec.rb index 45199566f0c..33334dda77b 100644 --- a/spec/datadog/tracing/workers_spec.rb +++ b/spec/datadog/tracing/workers_spec.rb @@ -39,7 +39,7 @@ end end - describe 'thread naming' do + describe 'thread naming and fork-safety marker' do context 'on Ruby < 2.3' do before do skip 'Only applies to old Rubies' if Gem::Version.new(RUBY_VERSION) >= Gem::Version.new('2.3') @@ -65,6 +65,12 @@ expect(worker.instance_variable_get(:@worker).name).to eq described_class.name end end + + it 'marks the worker thread as fork-safe (to avoid fork-safety warnings in webservers)' do + worker.start + + expect(worker.instance_variable_get(:@worker).thread_variable_get(:fork_safe)).to be true + end end describe '#start' do