Skip to content

Commit

Permalink
Use a separate thread for tiered compilation background work
Browse files Browse the repository at this point in the history
- Makes it easier to manage how much time is spend for performing background work like rejitting and allows yielding more frequently with just Sleep without incurring thread pool overhead, which is useful in CPU-limited cases
- A min/max range is determined for how long background work will be done before yielding the thread. The max is the same as before, 50 ms. For now the min is `processor count` ms (capped to the max), such that in CPU-limited cases the thread would yield more frequently in order to not monopolize too much of the limited CPU resources for background work, and in cases with a larger number of processors where the background work is typically less intrusive to foreground work it would yield less frequently.
- At the same time, progress should be made on background work such that steady-state perf would be reached in reasonable time. Yielding too frequently can slow down the background work too much. The sleep duration is measured to identify oversubscribed situations to yield less frequently and make faster progress on the background work.
- Due to less time spent rejitting in some CPU-limited cases, steady-state performance may be reached a bit later in favor of fewer spikes along the way
- When the portable thread pool is enabled, a side effect of using a managed worker thread for tiering background work was that several GC-heavy microbenchmarks regressed. Tiering was the only thing using the thread pool in those tests and stack-walking the managed thread was slower due to the presence of GC refs. It's not too concerning, the benchmarks are just measuring something different from before, but in any case this change also resolves that issue. Fixes dotnet#44211.
  • Loading branch information
kouvel committed Jan 19, 2021
1 parent 72e2109 commit a1906f4
Show file tree
Hide file tree
Showing 9 changed files with 561 additions and 469 deletions.
3 changes: 2 additions & 1 deletion src/coreclr/inc/clrconfigvalues.h
Original file line number Diff line number Diff line change
Expand Up @@ -604,6 +604,7 @@ RETAIL_CONFIG_DWORD_INFO(EXTERNAL_TieredCompilation, W("TieredCompilation"), 1,
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_TC_QuickJit, W("TC_QuickJit"), 1, "For methods that would be jitted, enable using quick JIT when appropriate.")
RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_TC_QuickJitForLoops, W("TC_QuickJitForLoops"), 0, "When quick JIT is enabled, quick JIT may also be used for methods that contain loops.")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_TC_AggressiveTiering, W("TC_AggressiveTiering"), 0, "Transition through tiers aggressively.")
RETAIL_CONFIG_DWORD_INFO(INTERNAL_TC_BackgroundWorkerTimeoutMs, W("TC_BackgroundWorkerTimeoutMs"), 4000, "How long in milliseconds the background worker thread may remain idle before exiting.")
RETAIL_CONFIG_DWORD_INFO(INTERNAL_TC_CallCountThreshold, W("TC_CallCountThreshold"), 30, "Number of times a method must be called in tier 0 after which it is promoted to the next tier.")
RETAIL_CONFIG_DWORD_INFO(INTERNAL_TC_CallCountingDelayMs, W("TC_CallCountingDelayMs"), 100, "A perpetual delay in milliseconds that is applied call counting in tier 0 and jitting at higher tiers, while there is startup-like activity.")
RETAIL_CONFIG_DWORD_INFO(INTERNAL_TC_DelaySingleProcMultiplier, W("TC_DelaySingleProcMultiplier"), 10, "Multiplier for TC_CallCountingDelayMs that is applied on a single-processor machine or when the process is affinitized to a single processor.")
Expand All @@ -614,7 +615,7 @@ RETAIL_CONFIG_DWORD_INFO(INTERNAL_TC_DeleteCallCountingStubsAfter, W("TC_DeleteC
#else
RETAIL_CONFIG_DWORD_INFO(INTERNAL_TC_DeleteCallCountingStubsAfter, W("TC_DeleteCallCountingStubsAfter"), 4096, "Deletes call counting stubs after this many have completed. Zero to disable deleting.")
#endif
#endif
#endif // FEATURE_TIERED_COMPILATION

///
/// On-Stack Replacement
Expand Down
252 changes: 121 additions & 131 deletions src/coreclr/vm/callcounting.cpp

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions src/coreclr/vm/callcounting.h
Original file line number Diff line number Diff line change
Expand Up @@ -270,15 +270,15 @@ class CallCountingManager
NativeCodeVersion activeCodeVersion,
PCODE codeEntryPoint,
bool wasMethodCalled,
bool *scheduleTieringBackgroundWorkRef);
bool *createTieringBackgroundWorker);
static PCODE OnCallCountThresholdReached(TransitionBlock *transitionBlock, TADDR stubIdentifyingToken);
static COUNT_T GetCountOfCodeVersionsPendingCompletion();
static void CompleteCallCounting();

public:
static void StopAndDeleteAllCallCountingStubs();
private:
static void StopAllCallCounting(TieredCompilationManager *tieredCompilationManager, bool *scheduleTieringBackgroundWorkRef);
static void StopAllCallCounting(TieredCompilationManager *tieredCompilationManager);
static void DeleteAllCallCountingStubs();
void TrimCollections();
#endif // !DACCESS_COMPILE
Expand Down
12 changes: 6 additions & 6 deletions src/coreclr/vm/codeversion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1754,7 +1754,7 @@ PCODE CodeVersionManager::PublishVersionableCodeIfNecessary(
#endif

bool done = false;
bool scheduleTieringBackgroundWork = false;
bool createTieringBackgroundWorker = false;
NativeCodeVersion newActiveVersion;
do
{
Expand Down Expand Up @@ -1816,10 +1816,10 @@ PCODE CodeVersionManager::PublishVersionableCodeIfNecessary(
}
#ifdef FEATURE_TIERED_COMPILATION
else if (
!CallCountingManager::SetCodeEntryPoint(activeVersion, pCode, true, &scheduleTieringBackgroundWork))
!CallCountingManager::SetCodeEntryPoint(activeVersion, pCode, true, &createTieringBackgroundWorker))
{
_ASSERTE(!g_pConfig->TieredCompilation_UseCallCountingStubs());
_ASSERTE(!scheduleTieringBackgroundWork);
_ASSERTE(!createTieringBackgroundWorker);
*doBackpatchRef = doPublish = false;
}
#endif
Expand All @@ -1842,19 +1842,19 @@ PCODE CodeVersionManager::PublishVersionableCodeIfNecessary(
{
_ASSERTE(doPublish);
_ASSERTE(!handleCallCounting);
_ASSERTE(!scheduleTieringBackgroundWork);
_ASSERTE(!createTieringBackgroundWorker);

// The code entry point is set before recording the method for call counting to avoid a race. Otherwise, the
// tiering delay may expire and enable call counting for the method before the entry point is set here, in which
// case calls to the method would not be counted anymore.
GetAppDomain()->GetTieredCompilationManager()->HandleCallCountingForFirstCall(pMethodDesc);
}
else if (scheduleTieringBackgroundWork)
else if (createTieringBackgroundWorker)
{
_ASSERTE(doPublish);
_ASSERTE(handleCallCounting);
_ASSERTE(!handleCallCountingForFirstCall);
GetAppDomain()->GetTieredCompilationManager()->ScheduleBackgroundWork(); // requires GC_TRIGGERS
TieredCompilationManager::CreateBackgroundWorker(); // requires GC_TRIGGERS
}
#endif

Expand Down
4 changes: 4 additions & 0 deletions src/coreclr/vm/eeconfig.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,7 @@ HRESULT EEConfig::Init()
fTieredCompilation_CallCounting = false;
fTieredCompilation_UseCallCountingStubs = false;
tieredCompilation_CallCountThreshold = 1;
tieredCompilation_BackgroundWorkerTimeoutMs = 0;
tieredCompilation_CallCountingDelayMs = 0;
tieredCompilation_DeleteCallCountingStubsAfter = 0;
#endif
Expand Down Expand Up @@ -880,6 +881,9 @@ fTrackDynamicMethodDebugInfo = CLRConfig::GetConfigValue(CLRConfig::UNSUPPORTED_
CLRConfig::UNSUPPORTED_TC_QuickJitForLoops);
}

tieredCompilation_BackgroundWorkerTimeoutMs =
CLRConfig::GetConfigValue(CLRConfig::INTERNAL_TC_BackgroundWorkerTimeoutMs);

fTieredCompilation_CallCounting = CLRConfig::GetConfigValue(CLRConfig::INTERNAL_TC_CallCounting) != 0;

DWORD tieredCompilation_ConfiguredCallCountThreshold =
Expand Down
2 changes: 2 additions & 0 deletions src/coreclr/vm/eeconfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ class EEConfig
bool TieredCompilation(void) const { LIMITED_METHOD_CONTRACT; return fTieredCompilation; }
bool TieredCompilation_QuickJit() const { LIMITED_METHOD_CONTRACT; return fTieredCompilation_QuickJit; }
bool TieredCompilation_QuickJitForLoops() const { LIMITED_METHOD_CONTRACT; return fTieredCompilation_QuickJitForLoops; }
DWORD TieredCompilation_BackgroundWorkerTimeoutMs() const { LIMITED_METHOD_CONTRACT; return tieredCompilation_BackgroundWorkerTimeoutMs; }
bool TieredCompilation_CallCounting() const { LIMITED_METHOD_CONTRACT; return fTieredCompilation_CallCounting; }
UINT16 TieredCompilation_CallCountThreshold() const { LIMITED_METHOD_CONTRACT; return tieredCompilation_CallCountThreshold; }
DWORD TieredCompilation_CallCountingDelayMs() const { LIMITED_METHOD_CONTRACT; return tieredCompilation_CallCountingDelayMs; }
Expand Down Expand Up @@ -733,6 +734,7 @@ class EEConfig
bool fTieredCompilation_CallCounting;
bool fTieredCompilation_UseCallCountingStubs;
UINT16 tieredCompilation_CallCountThreshold;
DWORD tieredCompilation_BackgroundWorkerTimeoutMs;
DWORD tieredCompilation_CallCountingDelayMs;
DWORD tieredCompilation_DeleteCallCountingStubsAfter;
#endif
Expand Down
4 changes: 0 additions & 4 deletions src/coreclr/vm/synch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -359,10 +359,6 @@ BOOL CLREventBase::Reset()

_ASSERTE(Thread::Debug_AllowCallout());

// We do not allow Reset on AutoEvent
_ASSERTE (!IsAutoEvent() ||
!"Can not call Reset on AutoEvent");

{
return ResetEvent(m_handle);
}
Expand Down
Loading

0 comments on commit a1906f4

Please sign in to comment.