diff --git a/src/coreclr/src/nativeaot/Bootstrap/CppCodeGen.h b/src/coreclr/src/nativeaot/Bootstrap/CppCodeGen.h new file mode 100644 index 0000000000000..91709981afd8e --- /dev/null +++ b/src/coreclr/src/nativeaot/Bootstrap/CppCodeGen.h @@ -0,0 +1,71 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// CppCodeGen.h : Facilities for the C++ code generation backend + +#ifndef __CPP_CODE_GEN_H +#define __CPP_CODE_GEN_H + +#define _CRT_SECURE_NO_WARNINGS + +#ifdef _MSC_VER +// Warnings disabled for generated cpp code +#pragma warning(disable:4200) // zero-sized array +#pragma warning(disable:4101) // unreferenced local variable +#pragma warning(disable:4102) // unreferenced label +#pragma warning(disable:4244) // possible loss of data +#pragma warning(disable:4717) // recursive on all control paths +#pragma warning(disable:4307) // integral constant overflow +#endif + +#ifdef _MSC_VER +#define INT64VAL(x) (x##i64) +#else +#define INT64VAL(x) (x##LL) +#endif + +#ifdef _MSC_VER +#define CORERT_UNREACHABLE __assume(0) +#else +#define CORERT_UNREACHABLE __builtin_unreachable() +#endif + +#ifdef _MSC_VER +#define CORERT_THREAD __declspec(thread) +#else +#define CORERT_THREAD __thread +#endif + +// Use the bit representation of uint64_t `v` as the bit representation of a double. +inline double __uint64_to_double(uint64_t v) +{ + union + { + uint64_t u64; + double d; + } val; + val.u64 = v; + return val.d; +} + +struct ReversePInvokeFrame +{ + void* m_savedPInvokeTransitionFrame; + void* m_savedThread; +}; + +struct PInvokeTransitionFrame +{ + void* m_RIP; + void* m_pThread; // unused by stack crawler, this is so GetThread is only called once per method + // can be an invalid pointer in universal transition cases (which never need to call GetThread) + uint32_t m_Flags; // PInvokeTransitionFrameFlags +}; + +// Should be synchronized with System.Private.CoreLib/src/System/Runtime/CompilerServices/StaticClassConstructionContext.cs +struct StaticClassConstructionContext +{ + void* m_cctorMethodAddress; + uint32_t m_initialized; +}; +#endif diff --git a/src/coreclr/src/nativeaot/Bootstrap/common.cpp b/src/coreclr/src/nativeaot/Bootstrap/common.cpp new file mode 100644 index 0000000000000..9efc4b2ad89b6 --- /dev/null +++ b/src/coreclr/src/nativeaot/Bootstrap/common.cpp @@ -0,0 +1,11 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// common.cpp : source file that includes just the standard includes +// testNative.pch will be the pre-compiled header +// common.obj will contain the pre-compiled type information + +#include "common.h" + +// TODO: reference any additional headers you need in common.H +// and not in this file diff --git a/src/coreclr/src/nativeaot/Bootstrap/common.h b/src/coreclr/src/nativeaot/Bootstrap/common.h new file mode 100644 index 0000000000000..697bfb544d404 --- /dev/null +++ b/src/coreclr/src/nativeaot/Bootstrap/common.h @@ -0,0 +1,106 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// common.h : include file for standard system include files, +// or project specific include files that are used frequently, but +// are changed infrequently +// + +#ifndef __COMMON_H +#define __COMMON_H + +#define _CRT_SECURE_NO_WARNINGS + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#ifndef _WIN32 +#include +#endif + +using namespace std; + +class MethodTable; +class Object; + +#ifdef _MSC_VER +#define __NORETURN __declspec(noreturn) +#else +#define __NORETURN __attribute((noreturn)) +#endif + +int __initialize_runtime(); +void __shutdown_runtime(); + +extern "C" Object * __allocate_object(MethodTable * pMT); +extern "C" Object * __allocate_array(size_t elements, MethodTable * pMT); +extern "C" Object * __castclass(MethodTable * pMT, void * obj); +extern "C" Object * __isinst(MethodTable * pMT, void * obj); +extern "C" __NORETURN void __throw_exception(void * pEx); +extern "C" void __debug_break(); + +Object * __load_string_literal(const char * string); + +extern "C" void __range_check_fail(); + +inline void __range_check(void * a, size_t elem) +{ + if (elem >= *((size_t*)a + 1)) + __range_check_fail(); +} + +Object * __get_commandline_args(int argc, char * argv[]); + +// POD version of EEType to use for static initialization +struct RawEEType +{ + uint16_t m_componentSize; + uint16_t m_flags; + uint32_t m_baseSize; + MethodTable * m_pBaseType; + uint16_t m_usNumVtableSlots; + uint16_t m_usNumInterfaces; + uint32_t m_uHashCode; +}; + +struct ReversePInvokeFrame; + +void __reverse_pinvoke(ReversePInvokeFrame* pRevFrame); +void __reverse_pinvoke_return(ReversePInvokeFrame* pRevFrame); + +struct PInvokeTransitionFrame; + +void __pinvoke(PInvokeTransitionFrame* pFrame); +void __pinvoke_return(PInvokeTransitionFrame* pFrame); + +typedef size_t UIntNative; + +inline bool IS_ALIGNED(UIntNative val, UIntNative alignment) +{ + //ASSERT(0 == (alignment & (alignment - 1))); + return 0 == (val & (alignment - 1)); +} + +template +inline bool IS_ALIGNED(T* val, UIntNative alignment) +{ + //ASSERT(0 == (alignment & (alignment - 1))); + return IS_ALIGNED(reinterpret_cast(val), alignment); +} + +#define RAW_MIN_OBJECT_SIZE (3*sizeof(void*)) + +#define AlignBaseSize(s) ((s < RAW_MIN_OBJECT_SIZE) ? RAW_MIN_OBJECT_SIZE : ((s + (sizeof(void*)-1) & ~(sizeof(void*)-1)))) + +#define ARRAY_BASE (2*sizeof(void*)) + +#endif // __COMMON_H diff --git a/src/coreclr/src/nativeaot/Bootstrap/main.cpp b/src/coreclr/src/nativeaot/Bootstrap/main.cpp new file mode 100644 index 0000000000000..00dac83b5775a --- /dev/null +++ b/src/coreclr/src/nativeaot/Bootstrap/main.cpp @@ -0,0 +1,464 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include "common.h" + +#include "sal.h" +#include "gcenv.structs.h" +#include "gcenv.base.h" + +#include + +#ifndef CPPCODEGEN + +// +// This is the mechanism whereby multiple linked modules contribute their global data for initialization at +// startup of the application. +// +// ILC creates sections in the output obj file to mark the beginning and end of merged global data. +// It defines sentinel symbols that are used to get the addresses of the start and end of global data +// at runtime. The section names are platform-specific to match platform-specific linker conventions. +// +#if defined(_MSC_VER) + +#pragma section(".modules$A", read) +#pragma section(".modules$Z", read) +extern "C" __declspec(allocate(".modules$A")) void * __modules_a[]; +extern "C" __declspec(allocate(".modules$Z")) void * __modules_z[]; + +__declspec(allocate(".modules$A")) void * __modules_a[] = { nullptr }; +__declspec(allocate(".modules$Z")) void * __modules_z[] = { nullptr }; + +// +// Each obj file compiled from managed code has a .modules$I section containing a pointer to its ReadyToRun +// data (which points at eager class constructors, frozen strings, etc). +// +// The #pragma ... /merge directive folds the book-end sections and all .modules$I sections from all input +// obj files into .rdata in alphabetical order. +// +#pragma comment(linker, "/merge:.modules=.rdata") + +// +// Unboxing stubs need to be merged, folded and sorted. They are delimited by two special sections (.unbox$A +// and .unbox$Z). All unboxing stubs are in .unbox$M sections. +// +#pragma comment(linker, "/merge:.unbox=.text") + +char _bookend_a; +char _bookend_z; + +// +// Generate bookends for the managed code section. +// We give them unique bodies to prevent folding. +// + +#pragma code_seg(".managedcode$A") +void* __managedcode_a() { return &_bookend_a; } +#pragma code_seg(".managedcode$Z") +void* __managedcode_z() { return &_bookend_z; } +#pragma code_seg() + +// +// Generate bookends for the unboxing stub section. +// We give them unique bodies to prevent folding. +// + +#pragma code_seg(".unbox$A") +void* __unbox_a() { return &_bookend_a; } +#pragma code_seg(".unbox$Z") +void* __unbox_z() { return &_bookend_z; } +#pragma code_seg() + +#else // _MSC_VER + +#if defined(__APPLE__) + +extern void * __modules_a[] __asm("section$start$__DATA$__modules"); +extern void * __modules_z[] __asm("section$end$__DATA$__modules"); +extern char __managedcode_a __asm("section$start$__TEXT$__managedcode"); +extern char __managedcode_z __asm("section$end$__TEXT$__managedcode"); +extern char __unbox_a __asm("section$start$__TEXT$__unbox"); +extern char __unbox_z __asm("section$end$__TEXT$__unbox"); + +#else // __APPLE__ + +extern "C" void * __start___modules[]; +extern "C" void * __stop___modules[]; +static void * (&__modules_a)[] = __start___modules; +static void * (&__modules_z)[] = __stop___modules; + +extern "C" char __start___managedcode; +extern "C" char __stop___managedcode; +static char& __managedcode_a = __start___managedcode; +static char& __managedcode_z = __stop___managedcode; + +extern "C" char __start___unbox; +extern "C" char __stop___unbox; +static char& __unbox_a = __start___unbox; +static char& __unbox_z = __stop___unbox; + +#endif // __APPLE__ + +#endif // _MSC_VER + +#endif // !CPPCODEGEN + +// Do not warn that extern C methods throw exceptions. This is temporary +// as long as we have unimplemented/throwing APIs in this file. +#pragma warning(disable:4297) + +#ifdef CPPCODEGEN + +extern "C" Object * RhNewObject(MethodTable * pMT); +extern "C" Object * RhNewArray(MethodTable * pMT, int32_t elements); +extern "C" void * RhTypeCast_IsInstanceOf(MethodTable * pMT, void* pObject); +extern "C" void * RhTypeCast_CheckCast(MethodTable * pMT, void* pObject); +extern "C" void RhpStelemRef(void * pArray, int index, void * pObj); +extern "C" void * RhpLdelemaRef(void * pArray, int index, MethodTable * pMT); +extern "C" __NORETURN void RhpThrowEx(void * pEx); +extern "C" void RhDebugBreak(); + +extern "C" Object * __allocate_object(MethodTable * pMT) +{ + return RhNewObject(pMT); +} + +extern "C" Object * __allocate_array(size_t elements, MethodTable * pMT) +{ + return RhNewArray(pMT, (int32_t)elements); // TODO: type mismatch +} + +extern "C" Object * __castclass(MethodTable * pTargetMT, void* obj) +{ + return (Object *)RhTypeCast_CheckCast(pTargetMT, obj); +} + +extern "C" Object * __isinst(MethodTable * pTargetMT, void* obj) +{ + return (Object *)RhTypeCast_IsInstanceOf(pTargetMT, obj); +} + +extern "C" void __stelem_ref(void * pArray, unsigned idx, void * obj) +{ + RhpStelemRef(pArray, idx, obj); +} + +extern "C" void* __ldelema_ref(void * pArray, unsigned idx, MethodTable * type) +{ + return RhpLdelemaRef(pArray, idx, type); +} + +extern "C" void __throw_exception(void * pEx) +{ + RhpThrowEx(pEx); +} + +extern "C" void __debug_break() +{ + RhDebugBreak(); +} + +void __range_check_fail() +{ + throw "ThrowRangeOverflowException"; +} + +extern "C" void RhpReversePInvoke2(ReversePInvokeFrame* pRevFrame); +extern "C" void RhpReversePInvokeReturn2(ReversePInvokeFrame* pRevFrame); + +void __reverse_pinvoke(ReversePInvokeFrame* pRevFrame) +{ + RhpReversePInvoke2(pRevFrame); +} + +void __reverse_pinvoke_return(ReversePInvokeFrame* pRevFrame) +{ + RhpReversePInvokeReturn2(pRevFrame); +} + +extern "C" void RhpPInvoke2(PInvokeTransitionFrame* pFrame); +extern "C" void RhpPInvokeReturn2(PInvokeTransitionFrame* pFrame); + +void __pinvoke(PInvokeTransitionFrame* pFrame) +{ + RhpPInvoke2(pFrame); +} + +void __pinvoke_return(PInvokeTransitionFrame* pFrame) +{ + RhpPInvokeReturn2(pFrame); +} + +namespace System_Private_CoreLib { namespace System { + + class Object { + public: + MethodTable * get_EEType() { return *(MethodTable **)this; } + }; + + class Array : public Object { + public: + int32_t GetArrayLength() { + return *(int32_t *)((void **)this + 1); + } + void * GetArrayData() { + return (void **)this + 2; + } + }; + + class String : public Object { public: + static MethodTable * __getMethodTable(); + }; + + class String__Array : public Object { public: + static MethodTable * __getMethodTable(); + }; + + class EETypePtr { public: + intptr_t m_value; + }; + +}; }; + +Object * __load_string_literal(const char * string) +{ + // TODO: Cache/intern string literals + // TODO: Unicode string literals + + size_t len = strlen(string); + + Object * pString = RhNewArray(System_Private_CoreLib::System::String::__getMethodTable(), (int32_t)len); + + uint16_t * p = (uint16_t *)((char*)pString + sizeof(intptr_t) + sizeof(int32_t)); + for (size_t i = 0; i < len; i++) + p[i] = string[i]; + return pString; +} + +#if defined(HOST_WASM) +// Exception wrapper type that allows us to differentiate managed and native exceptions +class ManagedExceptionWrapper : exception +{ +public: + ManagedExceptionWrapper(void* pManagedException) + { + m_pManagedException = pManagedException; + } + +public: + void* m_pManagedException; +}; +#endif + +extern "C" void RhpThrowEx(void * pEx) +{ +#if defined(HOST_WASM) + throw ManagedExceptionWrapper(pEx); +#else + throw "RhpThrowEx"; +#endif +} + +extern "C" void RhpThrowHwEx() +{ + throw "RhpThrowHwEx"; +} + +#if defined(HOST_WASM) +// returns the Leave target +extern "C" uint32_t LlvmCatchFunclet(void* pHandlerIP, void* pvRegDisplay); +extern "C" uint32_t RhpCallCatchFunclet(void * exceptionObj, void* pHandlerIP, void* pvRegDisplay, void *exInfo) +{ + return LlvmCatchFunclet(pHandlerIP, pvRegDisplay); +} + +extern "C" uint32_t LlvmFilterFunclet(void* pHandlerIP, void* pvRegDisplay); +extern "C" uint32_t RhpCallFilterFunclet(void* exceptionObj, void * pHandlerIP, void* shadowStack) +{ + return LlvmFilterFunclet(pHandlerIP, shadowStack); +} +#else +extern "C" uint32_t RhpCallCatchFunclet(void *, void*, void*, void*) +{ + throw "RhpCallCatchFunclet"; +} +extern "C" void* RhpCallFilterFunclet(void*, void*, void*) +{ + throw "RhpCallFilterFunclet"; +} +#endif + +#if defined(HOST_WASM) +extern "C" void LlvmFinallyFunclet(void *finallyHandler, void *shadowStack); +extern "C" void RhpCallFinallyFunclet(void *finallyHandler, void *shadowStack) +{ + LlvmFinallyFunclet(finallyHandler, shadowStack); +} +#else +extern "C" void RhpCallFinallyFunclet(void *, void*) +{ + throw "RhpCallFinallyFunclet"; +} +#endif + +extern "C" void RhpUniversalTransition() +{ + throw "RhpUniversalTransition"; +} +extern "C" void RhpUniversalTransition_DebugStepTailCall() +{ + throw "RhpUniversalTransition_DebugStepTailCall"; +} +extern "C" void ConstrainedCallSupport_GetStubs(void*, void*) +{ + throw "ConstrainedCallSupport_GetStubs"; +} + +extern "C" void* RtRHeaderWrapper(); +#endif // CPPCODEGEN + +// This works around System.Private.Interop's references to Interop.Native. +// This won't be needed once we stop dragging in S.P.Interop for basic p/invoke support. +extern "C" void CCWAddRef() +{ + throw "CCWAddRef"; +} + +extern "C" void __fail_fast() +{ + // TODO: FailFast + printf("Call to an unimplemented runtime method; execution cannot continue.\n"); + printf("Method: __fail_fast\n"); + exit(-1); +} + +extern "C" bool RhInitialize(); +extern "C" void RhpEnableConservativeStackReporting(); +extern "C" void RhpShutdown(); +extern "C" void RhSetRuntimeInitializationCallback(int (*fPtr)()); + +#ifndef CPPCODEGEN + +extern "C" bool RhRegisterOSModule(void * pModule, + void * pvManagedCodeStartRange, uint32_t cbManagedCodeRange, + void * pvUnboxingStubsStartRange, uint32_t cbUnboxingStubsRange, + void ** pClasslibFunctions, uint32_t nClasslibFunctions); + +extern "C" void* PalGetModuleHandleFromPointer(void* pointer); + +#endif // !CPPCODEGEN + +extern "C" void GetRuntimeException(); +extern "C" void FailFast(); +extern "C" void AppendExceptionStackFrame(); +extern "C" void GetSystemArrayEEType(); +extern "C" void OnFirstChanceException(); + +typedef void(*pfn)(); + +static const pfn c_classlibFunctions[] = { + &GetRuntimeException, + &FailFast, + nullptr, // &UnhandledExceptionHandler, + &AppendExceptionStackFrame, + nullptr, // &CheckStaticClassConstruction, + &GetSystemArrayEEType, + &OnFirstChanceException, + nullptr, // &DebugFuncEvalHelper, + nullptr, // &DebugFuncEvalAbortHelper, +}; + +extern "C" void InitializeModules(void* osModule, void ** modules, int count, void ** pClasslibFunctions, int nClasslibFunctions); + +#ifndef CORERT_DLL +#define CORERT_ENTRYPOINT __managed__Main +#if defined(_WIN32) +extern "C" int __managed__Main(int argc, wchar_t* argv[]); +#else +extern "C" int __managed__Main(int argc, char* argv[]); +#endif +#else +#define CORERT_ENTRYPOINT __managed__Startup +extern "C" void __managed__Startup(); +#endif // !CORERT_DLL + +static int InitializeRuntime() +{ + if (!RhInitialize()) + return -1; + +#if defined(CPPCODEGEN) || defined(HOST_WASM) + RhpEnableConservativeStackReporting(); +#endif // CPPCODEGEN + +#ifndef CPPCODEGEN + void * osModule = PalGetModuleHandleFromPointer((void*)&CORERT_ENTRYPOINT); + + // TODO: pass struct with parameters instead of the large signature of RhRegisterOSModule + if (!RhRegisterOSModule( + osModule, + (void*)&__managedcode_a, (uint32_t)((char *)&__managedcode_z - (char*)&__managedcode_a), + (void*)&__unbox_a, (uint32_t)((char *)&__unbox_z - (char*)&__unbox_a), + (void **)&c_classlibFunctions, _countof(c_classlibFunctions))) + { + return -1; + } +#endif // !CPPCODEGEN + +#ifndef CPPCODEGEN + InitializeModules(osModule, __modules_a, (int)((__modules_z - __modules_a)), (void **)&c_classlibFunctions, _countof(c_classlibFunctions)); +#elif defined HOST_WASM + InitializeModules(nullptr, (void**)RtRHeaderWrapper(), 1, (void **)&c_classlibFunctions, _countof(c_classlibFunctions)); +#else // !CPPCODEGEN + InitializeModules(nullptr, (void**)RtRHeaderWrapper(), 2, (void **)&c_classlibFunctions, _countof(c_classlibFunctions)); +#endif // !CPPCODEGEN + +#ifdef CORERT_DLL + // Run startup method immediately for a native library + __managed__Startup(); +#endif // CORERT_DLL + + return 0; +} + +#ifndef CORERT_DLL +#if defined(_WIN32) +int __cdecl wmain(int argc, wchar_t* argv[]) +#else +int main(int argc, char* argv[]) +#endif +{ + int initval = InitializeRuntime(); + if (initval != 0) + return initval; + + int retval; +#ifdef CPPCODEGEN + try +#endif + { + retval = __managed__Main(argc, argv); + } +#ifdef CPPCODEGEN + catch (const char* &e) + { + printf("Call to an unimplemented runtime method; execution cannot continue.\n"); + printf("Method: %s\n", e); + retval = -1; + } +#endif + RhpShutdown(); + + return retval; +} +#endif // !CORERT_DLL + +#ifdef CORERT_DLL +static struct InitializeRuntimePointerHelper +{ + InitializeRuntimePointerHelper() + { + RhSetRuntimeInitializationCallback(&InitializeRuntime); + } +} initializeRuntimePointerHelper; +#endif // CORERT_DLL diff --git a/src/coreclr/src/nativeaot/Runtime/AsmOffsets.h b/src/coreclr/src/nativeaot/Runtime/AsmOffsets.h new file mode 100644 index 0000000000000..0860aad4c8c2e --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/AsmOffsets.h @@ -0,0 +1,121 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// +// This file is used by AsmOffsets.cpp to validate that our +// assembly-code offsets always match their C++ counterparts. + +// You must #define PLAT_ASM_OFFSET and PLAT_ASM_SIZEOF before you #include this file + +#ifdef HOST_64BIT +#define ASM_OFFSET(offset32, offset64, cls, member) PLAT_ASM_OFFSET(offset64, cls, member) +#define ASM_SIZEOF(sizeof32, sizeof64, cls ) PLAT_ASM_SIZEOF(sizeof64, cls) +#define ASM_CONST(const32, const64, expr) PLAT_ASM_CONST(const64, expr) +#else +#define ASM_OFFSET(offset32, offset64, cls, member) PLAT_ASM_OFFSET(offset32, cls, member) +#define ASM_SIZEOF(sizeof32, sizeof64, cls ) PLAT_ASM_SIZEOF(sizeof32, cls) +#define ASM_CONST(const32, const64, expr) PLAT_ASM_CONST(const32, expr) +#endif + +// NOTE: the values MUST be in hex notation WITHOUT the 0x prefix + +// 32-bit,64-bit, constant symbol +ASM_CONST( 14c08, 14c08, RH_LARGE_OBJECT_SIZE) +ASM_CONST( 400, 800, CLUMP_SIZE) +ASM_CONST( a, b, LOG2_CLUMP_SIZE) + +// 32-bit,64-bit, class, member +ASM_OFFSET( 0, 0, Object, m_pEEType) + +ASM_OFFSET( 4, 8, Array, m_Length) + +ASM_OFFSET( 4, 8, String, m_Length) +ASM_OFFSET( 8, C, String, m_FirstChar) +ASM_CONST( 2, 2, STRING_COMPONENT_SIZE) +ASM_CONST( E, 16, STRING_BASE_SIZE) +ASM_CONST(3FFFFFDF,3FFFFFDF,MAX_STRING_LENGTH) + +ASM_OFFSET( 0, 0, EEType, m_usComponentSize) +ASM_OFFSET( 2, 2, EEType, m_usFlags) +ASM_OFFSET( 4, 4, EEType, m_uBaseSize) +ASM_OFFSET( 14, 18, EEType, m_VTable) + +ASM_OFFSET( 0, 0, Thread, m_rgbAllocContextBuffer) +ASM_OFFSET( 28, 38, Thread, m_ThreadStateFlags) +ASM_OFFSET( 2c, 40, Thread, m_pTransitionFrame) +ASM_OFFSET( 30, 48, Thread, m_pHackPInvokeTunnel) +ASM_OFFSET( 40, 68, Thread, m_ppvHijackedReturnAddressLocation) +ASM_OFFSET( 44, 70, Thread, m_pvHijackedReturnAddress) +#ifdef HOST_64BIT +ASM_OFFSET( 0, 78, Thread, m_uHijackedReturnValueFlags) +#endif +ASM_OFFSET( 48, 80, Thread, m_pExInfoStackHead) +ASM_OFFSET( 4c, 88, Thread, m_threadAbortException) + +ASM_SIZEOF( 14, 20, EHEnum) + +ASM_OFFSET( 0, 0, gc_alloc_context, alloc_ptr) +ASM_OFFSET( 4, 8, gc_alloc_context, alloc_limit) + +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH +ASM_OFFSET( 4, 8, InterfaceDispatchCell, m_pCache) +#ifndef HOST_64BIT +ASM_OFFSET( 8, 0, InterfaceDispatchCache, m_pCell) +#endif +ASM_OFFSET( 10, 20, InterfaceDispatchCache, m_rgEntries) +ASM_SIZEOF( 8, 10, InterfaceDispatchCacheEntry) +#endif + +ASM_OFFSET( 4, 8, StaticClassConstructionContext, m_initialized) + +#ifdef FEATURE_DYNAMIC_CODE +ASM_OFFSET( 0, 0, CallDescrData, pSrc) +ASM_OFFSET( 4, 8, CallDescrData, numStackSlots) +ASM_OFFSET( 8, C, CallDescrData, fpReturnSize) +ASM_OFFSET( C, 10, CallDescrData, pArgumentRegisters) +ASM_OFFSET( 10, 18, CallDescrData, pFloatArgumentRegisters) +ASM_OFFSET( 14, 20, CallDescrData, pTarget) +ASM_OFFSET( 18, 28, CallDescrData, pReturnBuffer) +#endif + +// Undefine macros that are only used in this header for convenience. +#undef ASM_OFFSET +#undef ASM_SIZEOF +#undef ASM_CONST + +// Define platform specific offsets +#include "AsmOffsetsCpu.h" + +//#define USE_COMPILE_TIME_CONSTANT_FINDER // Uncomment this line to use the constant finder +#if defined(__cplusplus) && defined(USE_COMPILE_TIME_CONSTANT_FINDER) +// This class causes the compiler to emit an error with the constant we're interested in +// in the error message. This is useful if a size or offset changes. To use, comment out +// the compile-time assert that is firing, enable the constant finder, add the appropriate +// constant to find to BogusFunction(), and build. +// +// Here's a sample compiler error: +// In file included from corert/src/Native/Runtime/AsmOffsetsVerify.cpp:38: +// corert/src/Native/Runtime/Full/../AsmOffsets.h:117:61: error: calling a private constructor of class +// 'AsmOffsets::FindCompileTimeConstant<25>' +// FindCompileTimeConstant bogus_variable; +// ^ +// corert/src/Native/Runtime/Full/../AsmOffsets.h:111:5: note: declared private here +// FindCompileTimeConstant(); +// ^ +template +class FindCompileTimeConstant +{ +private: + FindCompileTimeConstant(); +}; + +void BogusFunction() +{ + // Sample usage to generate the error + FindCompileTimeConstant bogus_variable; + FindCompileTimeConstant bogus_variable2; + FindCompileTimeConstant bogus_variable3; + FindCompileTimeConstant bogus_variable4; + FindCompileTimeConstant bogus_variable5; +} +#endif // defined(__cplusplus) && defined(USE_COMPILE_TIME_CONSTANT_FINDER) diff --git a/src/coreclr/src/nativeaot/Runtime/AsmOffsetsVerify.cpp b/src/coreclr/src/nativeaot/Runtime/AsmOffsetsVerify.cpp new file mode 100644 index 0000000000000..6f4b87104e8d2 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/AsmOffsetsVerify.cpp @@ -0,0 +1,48 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +#include "common.h" +#include "gcenv.h" +#include "gcheaputilities.h" +#include "rhassert.h" +#include "RedhawkWarnings.h" +#include "slist.h" +#include "gcrhinterface.h" +#include "varint.h" +#include "regdisplay.h" +#include "StackFrameIterator.h" +#include "thread.h" +#include "TargetPtrs.h" +#include "rhbinder.h" +#include "RWLock.h" +#include "RuntimeInstance.h" +#include "CachedInterfaceDispatch.h" +#include "shash.h" +#include "CallDescr.h" + +class AsmOffsets +{ + static_assert(sizeof(Thread::m_rgbAllocContextBuffer) >= sizeof(gc_alloc_context), "Thread::m_rgbAllocContextBuffer is not big enough to hold a gc_alloc_context"); + + // Some assembly helpers for arrays and strings are shared and use the fact that arrays and strings have similar layouts) + static_assert(offsetof(Array, m_Length) == offsetof(String, m_Length), "The length field of String and Array have different offsets"); + static_assert(sizeof(((Array*)0)->m_Length) == sizeof(((String*)0)->m_Length), "The length field of String and Array have different sizes"); + +#define PLAT_ASM_OFFSET(offset, cls, member) \ + static_assert((offsetof(cls, member) == 0x##offset) || (offsetof(cls, member) > 0x##offset), "Bad asm offset for '" #cls "." #member "', the actual offset is smaller than 0x" #offset "."); \ + static_assert((offsetof(cls, member) == 0x##offset) || (offsetof(cls, member) < 0x##offset), "Bad asm offset for '" #cls "." #member "', the actual offset is larger than 0x" #offset "."); + +#define PLAT_ASM_SIZEOF(size, cls ) \ + static_assert((sizeof(cls) == 0x##size) || (sizeof(cls) > 0x##size), "Bad asm size for '" #cls "', the actual size is smaller than 0x" #size "."); \ + static_assert((sizeof(cls) == 0x##size) || (sizeof(cls) < 0x##size), "Bad asm size for '" #cls "', the actual size is larger than 0x" #size "."); + +#define PLAT_ASM_CONST(constant, expr) \ + static_assert(((expr) == 0x##constant) || ((expr) > 0x##constant), "Bad asm constant for '" #expr "', the actual value is smaller than 0x" #constant "."); \ + static_assert(((expr) == 0x##constant) || ((expr) < 0x##constant), "Bad asm constant for '" #expr "', the actual value is larger than 0x" #constant "."); + +#include "AsmOffsets.h" + +}; + +#ifdef _MSC_VER +namespace { char WorkaroundLNK4221Warning; }; +#endif diff --git a/src/coreclr/src/nativeaot/Runtime/CachedInterfaceDispatch.cpp b/src/coreclr/src/nativeaot/Runtime/CachedInterfaceDispatch.cpp new file mode 100644 index 0000000000000..1a9a15aafda89 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/CachedInterfaceDispatch.cpp @@ -0,0 +1,543 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// ==--== +// +// Shared (non-architecture specific) portions of a mechanism to perform interface dispatch using an alternate +// mechanism to VSD that does not require runtime generation of code. +// +// ============================================================================ +#include "common.h" +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH + +#include "CommonTypes.h" +#include "CommonMacros.h" +#include "daccess.h" +#include "DebugMacrosExt.h" +#include "PalRedhawkCommon.h" +#include "PalRedhawk.h" +#include "rhassert.h" +#include "slist.h" +#include "holder.h" +#include "Crst.h" +#include "RedhawkWarnings.h" +#include "TargetPtrs.h" +#include "eetype.h" +#include "Range.h" +#include "allocheap.h" +#include "rhbinder.h" +#include "ObjectLayout.h" +#include "gcrhinterface.h" +#include "shash.h" +#include "RWLock.h" +#include "TypeManager.h" +#include "RuntimeInstance.h" +#include "eetype.inl" + +#include "CachedInterfaceDispatch.h" + +// We always allocate cache sizes with a power of 2 number of entries. We have a maximum size we support, +// defined below. +#define CID_MAX_CACHE_SIZE_LOG2 6 +#define CID_MAX_CACHE_SIZE (1 << CID_MAX_CACHE_SIZE_LOG2) + +//#define FEATURE_CID_STATS 1 + +#ifdef FEATURE_CID_STATS + +// Some counters used for debugging and profiling the algorithms. +extern "C" +{ + UInt32 CID_g_cLoadVirtFunc = 0; + UInt32 CID_g_cCacheMisses = 0; + UInt32 CID_g_cCacheSizeOverflows = 0; + UInt32 CID_g_cCacheOutOfMemory = 0; + UInt32 CID_g_cCacheReallocates = 0; + UInt32 CID_g_cCacheAllocates = 0; + UInt32 CID_g_cCacheDiscards = 0; + UInt32 CID_g_cInterfaceDispatches = 0; + UInt32 CID_g_cbMemoryAllocated = 0; + UInt32 CID_g_rgAllocatesBySize[CID_MAX_CACHE_SIZE_LOG2 + 1] = { 0 }; +}; + +#define CID_COUNTER_INC(_counter_name) CID_g_c##_counter_name++ + +#else + +#define CID_COUNTER_INC(_counter_name) + +#endif // FEATURE_CID_STATS + +// Helper function for updating two adjacent pointers (which are aligned on a double pointer-sized boundary) +// atomically. +// +// This is used to update interface dispatch cache entries and also the stub/cache pair in +// interface dispatch indirection cells. The cases have slightly different semantics: cache entry updates +// (fFailOnNonNull == true) require that the existing values in the location are both NULL whereas indirection +// cell updates have no such restriction. In both cases we'll try the update once; on failure we'll return the +// new value of the second pointer and on success we'll the old value of the second pointer. +// +// This suits the semantics of both callers. For indirection cell updates the caller needs to know the address +// of the cache that can now be scheduled for release and the cache pointer is the second one in the pair. For +// cache entry updates the caller only needs a success/failure indication: on success the return value will be +// NULL and on failure non-NULL. +static void * UpdatePointerPairAtomically(void * pPairLocation, + void * pFirstPointer, + void * pSecondPointer, + bool fFailOnNonNull) +{ +#if defined(HOST_64BIT) + // The same comments apply to the AMD64 version. The CompareExchange looks a little different since the + // API was refactored in terms of Int64 to avoid creating a 128-bit integer type. + + Int64 rgComparand[2] = { 0 , 0 }; + if (!fFailOnNonNull) + { + rgComparand[0] = *(Int64 volatile *)pPairLocation; + rgComparand[1] = *((Int64 volatile *)pPairLocation + 1); + } + + UInt8 bResult = PalInterlockedCompareExchange128((Int64*)pPairLocation, (Int64)pSecondPointer, (Int64)pFirstPointer, rgComparand); + if (bResult == 1) + { + // Success, return old value of second pointer (rgComparand is updated by + // PalInterlockedCompareExchange128 with the old pointer values in this case). + return (void*)rgComparand[1]; + } + + // Failure, return the new second pointer value. + return pSecondPointer; +#else + // Stuff the two pointers into a 64-bit value as the proposed new value for the CompareExchange64 below. + Int64 iNewValue = (Int64)((UInt64)(UIntNative)pFirstPointer | ((UInt64)(UIntNative)pSecondPointer << 32)); + + // Read the old value in the location. If fFailOnNonNull is set we just assume this was zero and we'll + // fail below if that's not the case. + Int64 iOldValue = fFailOnNonNull ? 0 : *(Int64 volatile *)pPairLocation; + + Int64 iUpdatedOldValue = PalInterlockedCompareExchange64((Int64*)pPairLocation, iNewValue, iOldValue); + if (iUpdatedOldValue == iOldValue) + { + // Successful update. Return the previous value of the second pointer. For cache entry updates + // (fFailOnNonNull == true) this is guaranteed to be NULL in this case and the result being being + // NULL in the success case is all the caller cares about. For indirection cell updates the second + // pointer represents the old cache and the caller needs this data so they can schedule the cache + // for deletion once it becomes safe to do so. + return (void*)(UInt32)(iOldValue >> 32); + } + + // The update failed due to a racing update to the same location. Return the new value of the second + // pointer (either a new cache that lost the race or a non-NULL pointer in the cache entry update case). + return pSecondPointer; +#endif // HOST_64BIT +} + +// Helper method for updating an interface dispatch cache entry atomically. See comments by the usage of +// this method for the details of why we need this. If a racing update is detected false is returned and the +// update abandoned. This is necessary since it's not safe to update a valid cache entry (one with a non-NULL +// m_pInstanceType field) outside of a GC. +static bool UpdateCacheEntryAtomically(InterfaceDispatchCacheEntry *pEntry, + EEType * pInstanceType, + void * pTargetCode) +{ + C_ASSERT(sizeof(InterfaceDispatchCacheEntry) == (sizeof(void*) * 2)); + C_ASSERT(offsetof(InterfaceDispatchCacheEntry, m_pInstanceType) < offsetof(InterfaceDispatchCacheEntry, m_pTargetCode)); + + return UpdatePointerPairAtomically(pEntry, pInstanceType, pTargetCode, true) == NULL; +} + +// Helper method for updating an interface dispatch indirection cell's stub and cache pointer atomically. +// Returns the value of the cache pointer that is not referenced by the cell after this operation. This can be +// NULL on the initial cell update, the value of the old cache pointer or the value of the new cache pointer +// supplied (in the case where another thread raced with us for the update and won). In any case, if the +// returned pointer is non-NULL it represents a cache that should be scheduled for release. +static InterfaceDispatchCache * UpdateCellStubAndCache(InterfaceDispatchCell * pCell, + void * pStub, + UIntNative newCacheValue) +{ + C_ASSERT(offsetof(InterfaceDispatchCell, m_pStub) == 0); + C_ASSERT(offsetof(InterfaceDispatchCell, m_pCache) == sizeof(void*)); + + UIntNative oldCacheValue = (UIntNative)UpdatePointerPairAtomically(pCell, pStub, (void*)newCacheValue, false); + + if (InterfaceDispatchCell::IsCache(oldCacheValue)) + { + return (InterfaceDispatchCache *)oldCacheValue; + } + else + { + return nullptr; + } +} + +// +// Cache allocation logic. +// +// We use the existing AllocHeap mechanism as our base allocator for cache blocks. This is because it can +// provide the required 16-byte alignment with no padding or heap header costs. The downside is that there is +// no deallocation support (which would be hard to implement without implementing a cache block compaction +// scheme, which is certainly possible but not necessarily needed at this point). +// +// Instead, much like the original VSD algorithm, we keep discarded cache blocks and use them to satisfy new +// allocation requests before falling back on AllocHeap. +// +// We can't re-use discarded cache blocks immediately since there may be code that is still using them. +// Instead we link them into a global list and then at the next GC (when no code can hold a reference to these +// any more) we can place them on one of several free lists based on their size. +// + +#if defined(HOST_AMD64) || defined(HOST_ARM64) + +// Head of the list of discarded cache blocks that can't be re-used just yet. +InterfaceDispatchCache * g_pDiscardedCacheList; // for AMD64 and ARM64, m_pCell is not used and we can link the discarded blocks themselves + +#else // defined(HOST_AMD64) || defined(HOST_ARM64) + +struct DiscardedCacheBlock +{ + DiscardedCacheBlock * m_pNext; // for x86 and ARM, we are short of registers, thus need the m_pCell back pointers + InterfaceDispatchCache * m_pCache; // and thus need this auxiliary list +}; + +// Head of the list of discarded cache blocks that can't be re-used just yet. +static DiscardedCacheBlock * g_pDiscardedCacheList = NULL; + +// Free list of DiscardedCacheBlock items +static DiscardedCacheBlock * g_pDiscardedCacheFree = NULL; + +#endif // defined(HOST_AMD64) || defined(HOST_ARM64) + +// Free lists for each cache size up to the maximum. We allocate from these in preference to new memory. +static InterfaceDispatchCache * g_rgFreeLists[CID_MAX_CACHE_SIZE_LOG2 + 1]; + +// Lock protecting both g_pDiscardedCacheList and g_rgFreeLists. We don't use the OS SLIST support here since +// it imposes too much space overhead on list entries on 64-bit (each is actually 16 bytes). +static CrstStatic g_sListLock; + +// The base memory allocator. +static AllocHeap * g_pAllocHeap = NULL; + +// Each cache size has an associated stub used to perform lookup over that cache. +extern "C" void RhpInterfaceDispatch1(); +extern "C" void RhpInterfaceDispatch2(); +extern "C" void RhpInterfaceDispatch4(); +extern "C" void RhpInterfaceDispatch8(); +extern "C" void RhpInterfaceDispatch16(); +extern "C" void RhpInterfaceDispatch32(); +extern "C" void RhpInterfaceDispatch64(); + +extern "C" void RhpVTableOffsetDispatch(); + +typedef void (*InterfaceDispatchStub)(); + +static void * g_rgDispatchStubs[CID_MAX_CACHE_SIZE_LOG2 + 1] = { + (void *)&RhpInterfaceDispatch1, + (void *)&RhpInterfaceDispatch2, + (void *)&RhpInterfaceDispatch4, + (void *)&RhpInterfaceDispatch8, + (void *)&RhpInterfaceDispatch16, + (void *)&RhpInterfaceDispatch32, + (void *)&RhpInterfaceDispatch64, +}; + +// Map a cache size into a linear index. +static UInt32 CacheSizeToIndex(UInt32 cCacheEntries) +{ + switch (cCacheEntries) + { + case 1: + return 0; + case 2: + return 1; + case 4: + return 2; + case 8: + return 3; + case 16: + return 4; + case 32: + return 5; + case 64: + return 6; + default: + UNREACHABLE(); + } +} + +// Allocates and initializes new cache of the given size. If given a previous version of the cache (guaranteed +// to be smaller) it will also pre-populate the new cache with the contents of the old. Additionally the +// address of the interface dispatch stub associated with this size of cache is returned. +static UIntNative AllocateCache(UInt32 cCacheEntries, InterfaceDispatchCache * pExistingCache, const DispatchCellInfo *pNewCellInfo, void ** ppStub) +{ + if (pNewCellInfo->CellType == DispatchCellType::VTableOffset) + { + ASSERT(pNewCellInfo->VTableOffset < InterfaceDispatchCell::IDC_MaxVTableOffsetPlusOne); + *ppStub = (void *)&RhpVTableOffsetDispatch; + ASSERT(!InterfaceDispatchCell::IsCache(pNewCellInfo->VTableOffset)); + return pNewCellInfo->VTableOffset; + } + + ASSERT((cCacheEntries >= 1) && (cCacheEntries <= CID_MAX_CACHE_SIZE)); + ASSERT((pExistingCache == NULL) || (pExistingCache->m_cEntries < cCacheEntries)); + + InterfaceDispatchCache * pCache = NULL; + + // Transform cache size back into a linear index. + UInt32 idxCacheSize = CacheSizeToIndex(cCacheEntries); + + // Attempt to allocate the head of the free list of the correct cache size. + if (g_rgFreeLists[idxCacheSize] != NULL) + { + CrstHolder lh(&g_sListLock); + + pCache = g_rgFreeLists[idxCacheSize]; + if (pCache != NULL) + { + g_rgFreeLists[idxCacheSize] = pCache->m_pNextFree; + CID_COUNTER_INC(CacheReallocates); + } + } + + if (pCache == NULL) + { + // No luck with the free list, allocate the cache from via the AllocHeap. + pCache = (InterfaceDispatchCache*)g_pAllocHeap->AllocAligned(sizeof(InterfaceDispatchCache) + + (sizeof(InterfaceDispatchCacheEntry) * cCacheEntries), + sizeof(void*) * 2); + if (pCache == NULL) + return NULL; + + CID_COUNTER_INC(CacheAllocates); +#ifdef FEATURE_CID_STATS + CID_g_cbMemoryAllocated += sizeof(InterfaceDispatchCacheEntry) * cCacheEntries; + CID_g_rgAllocatesBySize[idxCacheSize]++; +#endif + } + + // We have a cache block, now initialize it. + pCache->m_pNextFree = NULL; + pCache->m_cEntries = cCacheEntries; + pCache->m_cacheHeader.Initialize(pNewCellInfo); + + // Copy over entries from previous version of the cache (if any) and zero the rest. + if (pExistingCache) + { + memcpy(pCache->m_rgEntries, + pExistingCache->m_rgEntries, + sizeof(InterfaceDispatchCacheEntry) * pExistingCache->m_cEntries); + memset(&pCache->m_rgEntries[pExistingCache->m_cEntries], + 0, + (cCacheEntries - pExistingCache->m_cEntries) * sizeof(InterfaceDispatchCacheEntry)); + } + else + { + memset(pCache->m_rgEntries, + 0, + cCacheEntries * sizeof(InterfaceDispatchCacheEntry)); + } + + // Pass back the stub the corresponds to this cache size. + *ppStub = g_rgDispatchStubs[idxCacheSize]; + + return (UIntNative)pCache; +} + +// Discards a cache by adding it to a list of caches that may still be in use but will be made available for +// re-allocation at the next GC. +static void DiscardCache(InterfaceDispatchCache * pCache) +{ + CID_COUNTER_INC(CacheDiscards); + + CrstHolder lh(&g_sListLock); + +#if defined(HOST_AMD64) || defined(HOST_ARM64) + + // on AMD64 and ARM64, we can thread the list through the blocks directly + pCache->m_pNextFree = g_pDiscardedCacheList; + g_pDiscardedCacheList = pCache; + +#else // defined(HOST_AMD64) || defined(HOST_ARM64) + + // on other architectures, we cannot overwrite pCache->m_pNextFree yet + // because it shares storage with m_pCell which may still be used as a back + // pointer to the dispatch cell. + + // instead, allocate an auxiliary node (with its own auxiliary free list) + DiscardedCacheBlock * pDiscardedCacheBlock = g_pDiscardedCacheFree; + if (pDiscardedCacheBlock != NULL) + g_pDiscardedCacheFree = pDiscardedCacheBlock->m_pNext; + else + pDiscardedCacheBlock = (DiscardedCacheBlock *)g_pAllocHeap->Alloc(sizeof(DiscardedCacheBlock)); + + if (pDiscardedCacheBlock != NULL) // if we did NOT get the memory, we leak the discarded block + { + pDiscardedCacheBlock->m_pNext = g_pDiscardedCacheList; + pDiscardedCacheBlock->m_pCache = pCache; + + g_pDiscardedCacheList = pDiscardedCacheBlock; + } +#endif // defined(HOST_AMD64) || defined(HOST_ARM64) +} + +// Called during a GC to empty the list of discarded caches (which we can now guarantee aren't being accessed) +// and sort the results into the free lists we maintain for each cache size. +void ReclaimUnusedInterfaceDispatchCaches() +{ + // No need for any locks, we're not racing with any other threads any more. + + // Walk the list of discarded caches. +#if defined(HOST_AMD64) || defined(HOST_ARM64) + + // on AMD64, this is threaded directly through the cache blocks + InterfaceDispatchCache * pCache = g_pDiscardedCacheList; + while (pCache) + { + InterfaceDispatchCache * pNextCache = pCache->m_pNextFree; + + // Transform cache size back into a linear index. + UInt32 idxCacheSize = CacheSizeToIndex(pCache->m_cEntries); + + // Insert the cache onto the head of the correct free list. + pCache->m_pNextFree = g_rgFreeLists[idxCacheSize]; + g_rgFreeLists[idxCacheSize] = pCache; + + pCache = pNextCache; + } + +#else // defined(HOST_AMD64) || defined(HOST_ARM64) + + // on other architectures, we use an auxiliary list instead + DiscardedCacheBlock * pDiscardedCacheBlock = g_pDiscardedCacheList; + while (pDiscardedCacheBlock) + { + InterfaceDispatchCache * pCache = pDiscardedCacheBlock->m_pCache; + + // Transform cache size back into a linear index. + UInt32 idxCacheSize = CacheSizeToIndex(pCache->m_cEntries); + + // Insert the cache onto the head of the correct free list. + pCache->m_pNextFree = g_rgFreeLists[idxCacheSize]; + g_rgFreeLists[idxCacheSize] = pCache; + + // Insert the container to its own free list + DiscardedCacheBlock * pNextDiscardedCacheBlock = pDiscardedCacheBlock->m_pNext; + pDiscardedCacheBlock->m_pNext = g_pDiscardedCacheFree; + g_pDiscardedCacheFree = pDiscardedCacheBlock; + pDiscardedCacheBlock = pNextDiscardedCacheBlock; + } + +#endif // defined(HOST_AMD64) || defined(HOST_ARM64) + + // We processed all the discarded entries, so we can simply NULL the list head. + g_pDiscardedCacheList = NULL; +} + +// One time initialization of interface dispatch. +bool InitializeInterfaceDispatch() +{ + g_pAllocHeap = new AllocHeap(); + if (g_pAllocHeap == NULL) + return false; + + if (!g_pAllocHeap->Init()) + return false; + + g_sListLock.Init(CrstInterfaceDispatchGlobalLists, CRST_DEFAULT); + + return true; +} + +COOP_PINVOKE_HELPER(PTR_Code, RhpUpdateDispatchCellCache, (InterfaceDispatchCell * pCell, PTR_Code pTargetCode, EEType* pInstanceType, DispatchCellInfo *pNewCellInfo)) +{ + // Attempt to update the cache with this new mapping (if we have any cache at all, the initial state + // is none). + InterfaceDispatchCache * pCache = (InterfaceDispatchCache*)pCell->GetCache(); + UInt32 cOldCacheEntries = 0; + if (pCache != NULL) + { + InterfaceDispatchCacheEntry * pCacheEntry = pCache->m_rgEntries; + for (UInt32 i = 0; i < pCache->m_cEntries; i++, pCacheEntry++) + { + if (pCacheEntry->m_pInstanceType == NULL) + { + if (UpdateCacheEntryAtomically(pCacheEntry, pInstanceType, pTargetCode)) + return (PTR_Code)pTargetCode; + } + } + + cOldCacheEntries = pCache->m_cEntries; + } + + // Failed to update an existing cache, we need to allocate a new cache. The old one, if any, might + // still be in use so we can't simply reclaim it. Instead we keep it around until the next GC at which + // point we know no code is holding a reference to it. Particular cache sizes are associated with a + // (globally shared) stub which implicitly knows the size of the cache. + + if (cOldCacheEntries == CID_MAX_CACHE_SIZE) + { + // We already reached the maximum cache size we wish to allocate. For now don't attempt to cache + // the mapping we just did: there's no safe way to update the existing cache right now if it + // doesn't have an empty entries. There are schemes that would let us do this at the next GC point + // but it's not clear whether we should do this or re-tune the cache max size, we need to measure + // this. + CID_COUNTER_INC(CacheSizeOverflows); + return (PTR_Code)pTargetCode; + } + + UInt32 cNewCacheEntries = cOldCacheEntries ? cOldCacheEntries * 2 : 1; + void *pStub; + UIntNative newCacheValue = AllocateCache(cNewCacheEntries, pCache, pNewCellInfo, &pStub); + if (newCacheValue == 0) + { + CID_COUNTER_INC(CacheOutOfMemory); + return (PTR_Code)pTargetCode; + } + + if (InterfaceDispatchCell::IsCache(newCacheValue)) + { + pCache = (InterfaceDispatchCache*)newCacheValue; +#if !defined(HOST_AMD64) && !defined(HOST_ARM64) + // Set back pointer to interface dispatch cell for non-AMD64 and non-ARM64 + // for AMD64 and ARM64, we have enough registers to make this trick unnecessary + pCache->m_pCell = pCell; +#endif // !defined(HOST_AMD64) && !defined(HOST_ARM64) + + // Add entry to the first unused slot. + InterfaceDispatchCacheEntry * pCacheEntry = &pCache->m_rgEntries[cOldCacheEntries]; + pCacheEntry->m_pInstanceType = pInstanceType; + pCacheEntry->m_pTargetCode = pTargetCode; + } + + // Publish the new cache by atomically updating both the cache and stub pointers in the indirection + // cell. This returns us a cache to discard which may be NULL (no previous cache), the previous cache + // value or the cache we just allocated (another thread performed an update first). + InterfaceDispatchCache * pDiscardedCache = UpdateCellStubAndCache(pCell, pStub, newCacheValue); + if (pDiscardedCache) + DiscardCache(pDiscardedCache); + + return (PTR_Code)pTargetCode; +} + +COOP_PINVOKE_HELPER(PTR_Code, RhpSearchDispatchCellCache, (InterfaceDispatchCell * pCell, EEType* pInstanceType)) +{ + // This function must be implemented in native code so that we do not take a GC while walking the cache + InterfaceDispatchCache * pCache = (InterfaceDispatchCache*)pCell->GetCache(); + if (pCache != NULL) + { + InterfaceDispatchCacheEntry * pCacheEntry = pCache->m_rgEntries; + for (UInt32 i = 0; i < pCache->m_cEntries; i++, pCacheEntry++) + if (pCacheEntry->m_pInstanceType == pInstanceType) + return (PTR_Code)pCacheEntry->m_pTargetCode; + } + + return nullptr; +} + +// Given a dispatch cell, get the type and slot associated with it. This function MUST be implemented +// in cooperative native code, as the m_pCache field on the cell is unsafe to access from managed +// code due to its use of the GC state as a lock, and as lifetime control +COOP_PINVOKE_HELPER(void, RhpGetDispatchCellInfo, (InterfaceDispatchCell * pCell, DispatchCellInfo* pDispatchCellInfo)) +{ + *pDispatchCellInfo = pCell->GetDispatchCellInfo(); +} + +#endif // FEATURE_CACHED_INTERFACE_DISPATCH diff --git a/src/coreclr/src/nativeaot/Runtime/CachedInterfaceDispatch.h b/src/coreclr/src/nativeaot/Runtime/CachedInterfaceDispatch.h new file mode 100644 index 0000000000000..173dddff5513a --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/CachedInterfaceDispatch.h @@ -0,0 +1,46 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// ==--== +// +// Shared (non-architecture specific) portions of a mechanism to perform interface dispatch using an alternate +// mechanism to VSD that does not require runtime generation of code. +// +// ============================================================================ + +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH + +bool InitializeInterfaceDispatch(); +void ReclaimUnusedInterfaceDispatchCaches(); + +// Interface dispatch caches contain an array of these entries. An instance of a cache is paired with a stub +// that implicitly knows how many entries are contained. These entries must be aligned to twice the alignment +// of a pointer due to the synchonization mechanism used to update them at runtime. +struct InterfaceDispatchCacheEntry +{ + EEType * m_pInstanceType; // Potential type of the object instance being dispatched on + void * m_pTargetCode; // Method to dispatch to if the actual instance type matches the above +}; + +// The interface dispatch cache itself. As well as the entries we include the cache size (since logic such as +// cache miss processing needs to determine this value in a synchronized manner, so it can't be contained in +// the owning interface dispatch indirection cell) and a list entry used to link the caches in one of a couple +// of lists related to cache reclamation. +#pragma warning(push) +#pragma warning(disable:4200) // nonstandard extension used: zero-sized array in struct/union +struct InterfaceDispatchCell; +struct InterfaceDispatchCache +{ + InterfaceDispatchCacheHeader m_cacheHeader; + union + { + InterfaceDispatchCache * m_pNextFree; // next in free list +#ifndef HOST_AMD64 + InterfaceDispatchCell * m_pCell; // pointer back to interface dispatch cell - not used for AMD64 +#endif + }; + UInt32 m_cEntries; + InterfaceDispatchCacheEntry m_rgEntries[]; +}; +#pragma warning(pop) + +#endif // FEATURE_CACHED_INTERFACE_DISPATCH diff --git a/src/coreclr/src/nativeaot/Runtime/CallDescr.h b/src/coreclr/src/nativeaot/Runtime/CallDescr.h new file mode 100644 index 0000000000000..946b96d2c8e7c --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/CallDescr.h @@ -0,0 +1,13 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +struct CallDescrData +{ + uint8_t* pSrc; + int numStackSlots; + int fpReturnSize; + uint8_t* pArgumentRegisters; + uint8_t* pFloatArgumentRegisters; + void* pTarget; + void* pReturnBuffer; +}; diff --git a/src/coreclr/src/nativeaot/Runtime/CommonMacros.h b/src/coreclr/src/nativeaot/Runtime/CommonMacros.h new file mode 100644 index 0000000000000..54cb099bb2f8c --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/CommonMacros.h @@ -0,0 +1,228 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#ifndef __COMMONMACROS_H__ +#define __COMMONMACROS_H__ + +#include "rhassert.h" + +#define EXTERN_C extern "C" +#define FASTCALL __fastcall +#define STDCALL __stdcall +#define REDHAWK_API +#define REDHAWK_CALLCONV __fastcall + +#ifdef _MSC_VER + +#define MSVC_SAVE_WARNING_STATE() __pragma(warning(push)) +#define MSVC_DISABLE_WARNING(warn_num) __pragma(warning(disable: warn_num)) +#define MSVC_RESTORE_WARNING_STATE() __pragma(warning(pop)) + +#else + +#define MSVC_SAVE_WARNING_STATE() +#define MSVC_DISABLE_WARNING(warn_num) +#define MSVC_RESTORE_WARNING_STATE() + +#endif // _MSC_VER + +#ifndef COUNTOF +template +char (*COUNTOF_helper(_CountofType (&_Array)[_SizeOfArray]))[_SizeOfArray]; +#define COUNTOF(_Array) sizeof(*COUNTOF_helper(_Array)) +#endif // COUNTOF + +#ifndef offsetof +#define offsetof(s,m) (UIntNative)( (IntNative)&reinterpret_cast((((s *)0)->m)) ) +#endif // offsetof + +#ifndef FORCEINLINE +#define FORCEINLINE __forceinline +#endif + +#ifndef NOINLINE +#ifdef _MSC_VER +#define NOINLINE __declspec(noinline) +#else +#define NOINLINE __attribute__((noinline)) +#endif +#endif + +#ifndef __GCENV_BASE_INCLUDED__ + +// +// This macro returns val rounded up as necessary to be a multiple of alignment; alignment must be a power of 2 +// +inline UIntNative ALIGN_UP(UIntNative val, UIntNative alignment); +template +inline T* ALIGN_UP(T* val, UIntNative alignment); + +inline UIntNative ALIGN_DOWN(UIntNative val, UIntNative alignment); +template +inline T* ALIGN_DOWN(T* val, UIntNative alignment); + +#endif // !__GCENV_BASE_INCLUDED__ + +inline bool IS_ALIGNED(UIntNative val, UIntNative alignment); +template +inline bool IS_ALIGNED(T* val, UIntNative alignment); + +#ifndef DACCESS_COMPILE + +#ifndef ZeroMemory +#define ZeroMemory(_dst, _size) memset((_dst), 0, (_size)) +#endif + +//------------------------------------------------------------------------------------------------- +// min/max + +#ifndef min +#define min(_a, _b) ((_a) < (_b) ? (_a) : (_b)) +#endif +#ifndef max +#define max(_a, _b) ((_a) < (_b) ? (_b) : (_a)) +#endif + +#endif // !DACCESS_COMPILE + +//------------------------------------------------------------------------------------------------- +// Platform-specific defines + +#if defined(HOST_AMD64) + +#define LOG2_PTRSIZE 3 +#define POINTER_SIZE 8 + +#elif defined(HOST_X86) + +#define LOG2_PTRSIZE 2 +#define POINTER_SIZE 4 + +#elif defined(HOST_ARM) + +#define LOG2_PTRSIZE 2 +#define POINTER_SIZE 4 + +#elif defined(HOST_ARM64) + +#define LOG2_PTRSIZE 3 +#define POINTER_SIZE 8 + +#elif defined (HOST_WASM) + +#define LOG2_PTRSIZE 2 +#define POINTER_SIZE 4 + +#else +#error Unsupported target architecture +#endif + +#ifndef __GCENV_BASE_INCLUDED__ +#if defined(HOST_AMD64) + +#define DATA_ALIGNMENT 8 +#define OS_PAGE_SIZE 0x1000 + +#elif defined(HOST_X86) + +#define DATA_ALIGNMENT 4 +#ifndef OS_PAGE_SIZE +#define OS_PAGE_SIZE 0x1000 +#endif + +#elif defined(HOST_ARM) + +#define DATA_ALIGNMENT 4 +#ifndef OS_PAGE_SIZE +#define OS_PAGE_SIZE 0x1000 +#endif + +#elif defined(HOST_ARM64) + +#define DATA_ALIGNMENT 8 +#ifndef OS_PAGE_SIZE +#define OS_PAGE_SIZE 0x1000 +#endif + +#elif defined(HOST_WASM) + +#define DATA_ALIGNMENT 4 +#ifndef OS_PAGE_SIZE +#define OS_PAGE_SIZE 0x4 +#endif + +#else +#error Unsupported target architecture +#endif +#endif // __GCENV_BASE_INCLUDED__ + +#if defined(TARGET_ARM) +#define THUMB_CODE 1 +#endif + +// +// Define an unmanaged function called from managed code that needs to execute in co-operative GC mode. (There +// should be very few of these, most such functions will be simply p/invoked). +// +#define COOP_PINVOKE_HELPER(_rettype, _method, _args) EXTERN_C REDHAWK_API _rettype __fastcall _method _args +#ifdef HOST_X86 +// We have helpers that act like memcpy and memset from the CRT, so they need to be __cdecl. +#define COOP_PINVOKE_CDECL_HELPER(_rettype, _method, _args) EXTERN_C REDHAWK_API _rettype __cdecl _method _args +#else +#define COOP_PINVOKE_CDECL_HELPER COOP_PINVOKE_HELPER +#endif + +#ifndef DACCESS_COMPILE +#define IN_DAC(x) +#define NOT_IN_DAC(x) x +#else +#define IN_DAC(x) x +#define NOT_IN_DAC(x) +#endif + +#define INLINE inline + +enum STARTUP_TIMELINE_EVENT_ID +{ + PROCESS_ATTACH_BEGIN = 0, + NONGC_INIT_COMPLETE, + GC_INIT_COMPLETE, + PROCESS_ATTACH_COMPLETE, + + NUM_STARTUP_TIMELINE_EVENTS +}; + +#ifdef PROFILE_STARTUP +extern unsigned __int64 g_startupTimelineEvents[NUM_STARTUP_TIMELINE_EVENTS]; +#define STARTUP_TIMELINE_EVENT(eventid) PalQueryPerformanceCounter((LARGE_INTEGER*)&g_startupTimelineEvents[eventid]); +#else // PROFILE_STARTUP +#define STARTUP_TIMELINE_EVENT(eventid) +#endif // PROFILE_STARTUP + +#ifndef C_ASSERT +#define C_ASSERT(e) static_assert(e, #e) +#endif // C_ASSERT + +#ifdef __llvm__ +#define DECLSPEC_THREAD __thread +#else // __llvm__ +#define DECLSPEC_THREAD __declspec(thread) +#endif // !__llvm__ + +#ifndef __GCENV_BASE_INCLUDED__ +#if !defined(_INC_WINDOWS) +#ifdef _WIN32 +// this must exactly match the typedef used by windows.h +typedef long HRESULT; +#else +typedef int32_t HRESULT; +#endif + +#define S_OK 0x0 +#define E_FAIL 0x80004005 + +#define UNREFERENCED_PARAMETER(P) (void)(P) +#endif // !defined(_INC_WINDOWS) +#endif // __GCENV_BASE_INCLUDED__ + +#endif // __COMMONMACROS_H__ diff --git a/src/coreclr/src/nativeaot/Runtime/CommonMacros.inl b/src/coreclr/src/nativeaot/Runtime/CommonMacros.inl new file mode 100644 index 0000000000000..afc5032835ee1 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/CommonMacros.inl @@ -0,0 +1,74 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#ifndef __GCENV_BASE_INCLUDED__ + +// +// This macro returns val rounded up as necessary to be a multiple of alignment; alignment must be a power of 2 +// +inline UIntNative ALIGN_UP( UIntNative val, UIntNative alignment ) +{ + // alignment must be a power of 2 for this implementation to work (need modulo otherwise) + ASSERT( 0 == (alignment & (alignment - 1)) ); + UIntNative result = (val + (alignment - 1)) & ~(alignment - 1); + ASSERT( result >= val ); // check for overflow + + return result; +} + +template +inline T* ALIGN_UP(T* val, UIntNative alignment) +{ + return reinterpret_cast(ALIGN_UP(reinterpret_cast(val), alignment)); +} + +inline UIntNative ALIGN_DOWN( UIntNative val, UIntNative alignment ) +{ + // alignment must be a power of 2 for this implementation to work (need modulo otherwise) + ASSERT( 0 == (alignment & (alignment - 1)) ); + UIntNative result = val & ~(alignment - 1); + return result; +} + +template +inline T* ALIGN_DOWN(T* val, UIntNative alignment) +{ + return reinterpret_cast(ALIGN_DOWN(reinterpret_cast(val), alignment)); +} + +#endif // !__GCENV_BASE_INCLUDED__ + +inline bool IS_ALIGNED(UIntNative val, UIntNative alignment) +{ + ASSERT(0 == (alignment & (alignment - 1))); + return 0 == (val & (alignment - 1)); +} + +template +inline bool IS_ALIGNED(T* val, UIntNative alignment) +{ + ASSERT(0 == (alignment & (alignment - 1))); + return IS_ALIGNED(reinterpret_cast(val), alignment); +} + +// Convert from a PCODE to the corresponding PINSTR. On many architectures this will be the identity function; +// on ARM, this will mask off the THUMB bit. +inline TADDR PCODEToPINSTR(PCODE pc) +{ +#ifdef TARGET_ARM + return dac_cast(pc & ~THUMB_CODE); +#else + return dac_cast(pc); +#endif +} + +// Convert from a PINSTR to the corresponding PCODE. On many architectures this will be the identity function; +// on ARM, this will raise the THUMB bit. +inline PCODE PINSTRToPCODE(TADDR addr) +{ +#ifdef TARGET_ARM + return dac_cast(addr | THUMB_CODE); +#else + return dac_cast(addr); +#endif +} diff --git a/src/coreclr/src/nativeaot/Runtime/Crst.cpp b/src/coreclr/src/nativeaot/Runtime/Crst.cpp new file mode 100644 index 0000000000000..1fe0fdb6817e0 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/Crst.cpp @@ -0,0 +1,70 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +#include "common.h" +#include "CommonTypes.h" +#include "CommonMacros.h" +#include "PalRedhawkCommon.h" +#include "PalRedhawk.h" +#include "holder.h" +#include "Crst.h" + +void CrstStatic::Init(CrstType eType, CrstFlags eFlags) +{ + UNREFERENCED_PARAMETER(eType); + UNREFERENCED_PARAMETER(eFlags); +#ifndef DACCESS_COMPILE +#if defined(_DEBUG) + m_uiOwnerId.Clear(); +#endif // _DEBUG + PalInitializeCriticalSectionEx(&m_sCritSec, 0, 0); +#endif // !DACCESS_COMPILE +} + +void CrstStatic::Destroy() +{ +#ifndef DACCESS_COMPILE + PalDeleteCriticalSection(&m_sCritSec); +#endif // !DACCESS_COMPILE +} + +// static +void CrstStatic::Enter(CrstStatic *pCrst) +{ +#ifndef DACCESS_COMPILE + PalEnterCriticalSection(&pCrst->m_sCritSec); +#if defined(_DEBUG) + pCrst->m_uiOwnerId.SetToCurrentThread(); +#endif // _DEBUG +#else + UNREFERENCED_PARAMETER(pCrst); +#endif // !DACCESS_COMPILE +} + +// static +void CrstStatic::Leave(CrstStatic *pCrst) +{ +#ifndef DACCESS_COMPILE +#if defined(_DEBUG) + pCrst->m_uiOwnerId.Clear(); +#endif // _DEBUG + PalLeaveCriticalSection(&pCrst->m_sCritSec); +#else + UNREFERENCED_PARAMETER(pCrst); +#endif // !DACCESS_COMPILE +} + +#if defined(_DEBUG) +bool CrstStatic::OwnedByCurrentThread() +{ +#ifndef DACCESS_COMPILE + return m_uiOwnerId.IsCurrentThread(); +#else + return false; +#endif +} + +EEThreadId CrstStatic::GetHolderThreadId() +{ + return m_uiOwnerId; +} +#endif // _DEBUG diff --git a/src/coreclr/src/nativeaot/Runtime/Crst.h b/src/coreclr/src/nativeaot/Runtime/Crst.h new file mode 100644 index 0000000000000..658b0186429d0 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/Crst.h @@ -0,0 +1,127 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// +// ----------------------------------------------------------------------------------------------------------- +// +// Minimal Crst implementation based on CRITICAL_SECTION. Doesn't support much except for the basic locking +// functionality (in particular there is no rank violation checking). +// + +enum CrstType +{ + CrstHandleTable, + CrstDispatchCache, + CrstAllocHeap, + CrstGenericInstHashtab, + CrstMemAccessMgr, + CrstInterfaceDispatchGlobalLists, + CrstStressLog, + CrstRestrictedCallouts, + CrstGcStressControl, + CrstSuspendEE, + CrstCastCache, + CrstYieldProcessorNormalized, +}; + +enum CrstFlags +{ + CRST_DEFAULT = 0x0, + CRST_REENTRANCY = 0x0, + CRST_UNSAFE_SAMELEVEL = 0x0, + CRST_UNSAFE_ANYMODE = 0x0, + CRST_DEBUGGER_THREAD = 0x0, +}; + +// Static version of Crst with no default constructor (user must call Init() before use). +class CrstStatic +{ +public: + void Init(CrstType eType, CrstFlags eFlags = CRST_DEFAULT); + bool InitNoThrow(CrstType eType, CrstFlags eFlags = CRST_DEFAULT) { Init(eType, eFlags); return true; } + void Destroy(); + void Enter() { CrstStatic::Enter(this); } + void Leave() { CrstStatic::Leave(this); } + static void Enter(CrstStatic *pCrst); + static void Leave(CrstStatic *pCrst); +#if defined(_DEBUG) + bool OwnedByCurrentThread(); + EEThreadId GetHolderThreadId(); +#endif // _DEBUG + +private: + CRITICAL_SECTION m_sCritSec; +#if defined(_DEBUG) + EEThreadId m_uiOwnerId; +#endif // _DEBUG +}; + +// Non-static version that will initialize itself during construction. +class Crst : public CrstStatic +{ +public: + Crst(CrstType eType, CrstFlags eFlags = CRST_DEFAULT) + : CrstStatic() + { Init(eType, eFlags); } +}; + +// Holder for a Crst instance. +class CrstHolder +{ + CrstStatic * m_pLock; + +public: + CrstHolder(CrstStatic * pLock) + : m_pLock(pLock) + { + m_pLock->Enter(); + } + + ~CrstHolder() + { + m_pLock->Leave(); + } +}; + +class CrstHolderWithState +{ + CrstStatic * m_pLock; + bool m_fAcquired; + +public: + CrstHolderWithState(CrstStatic * pLock, bool fAcquire = true) + : m_pLock(pLock), m_fAcquired(fAcquire) + { + if (fAcquire) + m_pLock->Enter(); + } + + ~CrstHolderWithState() + { + if (m_fAcquired) + m_pLock->Leave(); + } + + void Acquire() + { + if (!m_fAcquired) + { + m_pLock->Enter(); + m_fAcquired = true; + } + } + + void Release() + { + if (m_fAcquired) + { + m_pLock->Leave(); + m_fAcquired = false; + } + } + + CrstStatic * GetValue() + { + return m_pLock; + } +}; diff --git a/src/coreclr/src/nativeaot/Runtime/Debug.h b/src/coreclr/src/nativeaot/Runtime/Debug.h new file mode 100644 index 0000000000000..79c9e5924eb36 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/Debug.h @@ -0,0 +1,101 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#pragma once + +// The following definitions are required for interop with the VS Debugger +// Prior to making any changes to these, please reach out to the VS Debugger +// team to make sure that your changes are not going to prevent the debugger +// from working. + +enum FuncEvalEntryPointMode : uint32_t +{ + FixedAddress = 0, + VirtualMethodSlotOnly = 1, + InterfaceDispatch = 2, +}; + +enum FuncEvalMode : uint32_t +{ + CallParameterizedFunction = 1, + NewStringWithLength = 2, + NewParameterizedArray = 3, + NewParameterizedObjectNoConstructor = 4, + NewParameterizedObject = 5, +}; + +enum DebuggerGcProtectionRequestKind : uint16_t +{ + EnsureConservativeReporting = 1, + RemoveConservativeReporting = 2, + EnsureHandle = 3, + RemoveHandle = 4 +}; + +/** + * This structure represents a request from the debugger to perform a GC protection related work. + */ +struct DebuggerGcProtectionRequest +{ + DebuggerGcProtectionRequestKind kind; + union + { + uint16_t size; + uint16_t type; + }; + uint32_t identifier; + uint64_t address; + uint64_t payload; /* TODO, FuncEval, what would be a better name for this? */ +}; + +enum DebuggerResponseKind : uint32_t +{ + FuncEvalCompleteWithReturn = 0, + FuncEvalCompleteWithException = 1, + FuncEvalParameterBufferReady = 2, + RequestBufferReady = 3, + ConservativeReportingBufferReady = 4, + HandleReady = 5, + FuncEvalCrossThreadDependency = 6, +}; + +struct DebuggerResponse +{ + DebuggerResponseKind kind; +}; + +struct DebuggerGcProtectionResponse +{ + DebuggerResponseKind kind; + uint32_t padding; + uint64_t bufferAddress; +}; + +struct DebuggerGcProtectionHandleReadyResponse +{ + DebuggerResponseKind kind; + uint32_t padding; + uint64_t payload; + uint64_t handle; +}; + +struct DebuggerFuncEvalCompleteWithReturnResponse +{ + DebuggerResponseKind kind; + uint32_t returnHandleIdentifier; + uint64_t returnAddress; +}; + +struct DebuggerFuncEvalParameterBufferReadyResponse +{ + DebuggerResponseKind kind; + uint32_t padding; + uint64_t bufferAddress; +}; + +struct DebuggerFuncEvalCrossThreadDependencyNotification +{ + DebuggerResponseKind kind; + uint32_t padding; + uint64_t payload; +}; diff --git a/src/coreclr/src/nativeaot/Runtime/DebugEventSource.cpp b/src/coreclr/src/nativeaot/Runtime/DebugEventSource.cpp new file mode 100644 index 0000000000000..21ed91ff9598e --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/DebugEventSource.cpp @@ -0,0 +1,209 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +#include "common.h" +#include "CommonTypes.h" +#include "CommonMacros.h" +#include "daccess.h" +#include "PalRedhawkCommon.h" +#include "PalRedhawk.h" +#include "rhassert.h" +#include "type_traits.hpp" +#include "slist.h" +#include "holder.h" +#include "Crst.h" +#include "RWLock.h" +#include "RuntimeInstance.h" +#include "gcrhinterface.h" +#include "shash.h" +#include "DebugEventSource.h" + +#include "slist.inl" + +#include "DebugEvents.h" + +GVAL_IMPL_INIT(UInt32, g_DebuggerEventsFilter, 0); + +#ifndef DACCESS_COMPILE + +bool EventEnabled(DebugEventType eventType) +{ + return ((int)eventType > 0) && + ((g_DebuggerEventsFilter & (1 << ((int)eventType-1))) != 0); +} + +void DebugEventSource::SendModuleLoadEvent(void* pAddressInModule) +{ + if(!EventEnabled(DEBUG_EVENT_TYPE_LOAD_MODULE)) + return; + DebugEventPayload payload; + payload.type = DEBUG_EVENT_TYPE_LOAD_MODULE; + payload.ModuleLoadUnload.pModuleHeader = (CORDB_ADDRESS)pAddressInModule; + SendRawEvent(&payload); +} + +void DebugEventSource::SendExceptionThrownEvent(CORDB_ADDRESS faultingIP, CORDB_ADDRESS faultingFrameSP) +{ + if(!EventEnabled(DEBUG_EVENT_TYPE_EXCEPTION_THROWN)) + return; + DebugEventPayload payload; + payload.type = DEBUG_EVENT_TYPE_EXCEPTION_THROWN; + payload.Exception.ip = faultingIP; + payload.Exception.sp = faultingFrameSP; + SendRawEvent(&payload); +} + +void DebugEventSource::SendExceptionCatchHandlerFoundEvent(CORDB_ADDRESS handlerIP, CORDB_ADDRESS HandlerFrameSP) +{ + if(!EventEnabled(DEBUG_EVENT_TYPE_EXCEPTION_CATCH_HANDLER_FOUND)) + return; + DebugEventPayload payload; + payload.type = DEBUG_EVENT_TYPE_EXCEPTION_CATCH_HANDLER_FOUND; + payload.Exception.ip = handlerIP; + payload.Exception.sp = HandlerFrameSP; + SendRawEvent(&payload); +} + +void DebugEventSource::SendExceptionUnhandledEvent() +{ + if(!EventEnabled(DEBUG_EVENT_TYPE_EXCEPTION_UNHANDLED)) + return; + DebugEventPayload payload; + payload.type = DEBUG_EVENT_TYPE_EXCEPTION_UNHANDLED; + payload.Exception.ip = (CORDB_ADDRESS)0; + payload.Exception.sp = (CORDB_ADDRESS)0; + SendRawEvent(&payload); +} + +void DebugEventSource::SendExceptionFirstPassFrameEnteredEvent(CORDB_ADDRESS ipInFrame, CORDB_ADDRESS frameSP) +{ + if(!EventEnabled(DEBUG_EVENT_TYPE_EXCEPTION_FIRST_PASS_FRAME_ENTER)) + return; + DebugEventPayload payload; + payload.type = DEBUG_EVENT_TYPE_EXCEPTION_FIRST_PASS_FRAME_ENTER; + payload.Exception.ip = ipInFrame; + payload.Exception.sp = frameSP; + SendRawEvent(&payload); +} + +void DebugEventSource::SendCustomEvent(void* payload, int length) +{ + if (!EventEnabled(DEBUG_EVENT_TYPE_CUSTOM)) + return; + DebugEventPayload rawPayload; + rawPayload.type = DEBUG_EVENT_TYPE_CUSTOM; + rawPayload.Custom.payload = (CORDB_ADDRESS)payload; + rawPayload.Custom.length = length; + SendRawEvent(&rawPayload); +} + +//--------------------------------------------------------------------------------------- +// +// Sends a raw managed debug event to the debugger. +// +// Arguments: +// pPayload - managed debug event data +// +// +// Notes: +// The entire process will get frozen by the debugger once we send. The debugger +// needs to resume the process. It may detach as well. +// See CordbProcess::DecodeEvent in mscordbi for decoding this event. These methods must stay in sync. +// +//--------------------------------------------------------------------------------------- +void DebugEventSource::SendRawEvent(DebugEventPayload* pPayload) +{ +#ifdef _MSC_VER + // We get to send an array of void* as data with the notification. + // The debugger can then use ReadProcessMemory to read through this array. + UInt64 rgData [] = { + (UInt64) CLRDBG_EXCEPTION_DATA_CHECKSUM, + (UInt64) GetRuntimeInstance()->GetPalInstance(), + (UInt64) pPayload + }; + + // + // Physically send the event via an OS Exception. We're using exceptions as a notification + // mechanism on top of the OS native debugging pipeline. + // + __try + { + const UInt32 dwFlags = 0; // continuable (eg, Debugger can continue GH) + // RaiseException treats arguments as pointer sized values, but we encoded 3 QWORDS. + // On 32 bit platforms we have 6 elements, on 64 bit platforms we have 3 elements + RaiseException(CLRDBG_NOTIFICATION_EXCEPTION_CODE, dwFlags, 3*sizeof(UInt64)/sizeof(UInt32*), (UInt32*)rgData); + + // If debugger continues "GH" (DBG_CONTINUE), then we land here. + // This is the expected path for a well-behaved ICorDebug debugger. + } + __except(1) + { + // We can get here if: + // An ICorDebug aware debugger enabled the debug events AND + // a) the debugger detached during the event OR + // b) the debugger continues "GN" (DBG_EXCEPTION_NOT_HANDLED) - this would be considered a badly written debugger + // + // there is no great harm in reaching here but it is a needless perf-cost + } +#endif // _MSC_VER +} + +//keep these synced with the enumeration in exceptionhandling.cs +enum ExceptionEventKind +{ + EEK_Thrown=1, + EEK_CatchHandlerFound=2, + EEK_Unhandled=4, + EEK_FirstPassFrameEntered=8 +}; + +//Called by the C# exception dispatch code with events to send to the debugger +EXTERN_C REDHAWK_API void __cdecl RhpSendExceptionEventToDebugger(ExceptionEventKind eventKind, void* ip, void* sp) +{ + CORDB_ADDRESS cordbIP = (CORDB_ADDRESS)ip; + CORDB_ADDRESS cordbSP = (CORDB_ADDRESS)sp; +#if HOST_ARM + // clear the THUMB-bit from IP + cordbIP &= ~1; +#endif + + if(eventKind == EEK_Thrown) + { + DebugEventSource::SendExceptionThrownEvent(cordbIP, cordbSP); + } + else if(eventKind == EEK_CatchHandlerFound) + { + DebugEventSource::SendExceptionCatchHandlerFoundEvent(cordbIP, cordbSP); + } + else if(eventKind == EEK_Unhandled) + { + DebugEventSource::SendExceptionUnhandledEvent(); + } + else if(eventKind == EEK_FirstPassFrameEntered) + { + DebugEventSource::SendExceptionFirstPassFrameEnteredEvent(cordbIP, cordbSP); + } +} + +// Called to cache the current events the debugger is listening for in the C# implemented exception layer +// Filtering in managed code prevents making unneeded p/invokes +COOP_PINVOKE_HELPER(ExceptionEventKind, RhpGetRequestedExceptionEvents, ()) +{ + int mask = 0; + if(EventEnabled(DEBUG_EVENT_TYPE_EXCEPTION_THROWN)) + mask |= EEK_Thrown; + if(EventEnabled(DEBUG_EVENT_TYPE_EXCEPTION_CATCH_HANDLER_FOUND)) + mask |= EEK_CatchHandlerFound; + if(EventEnabled(DEBUG_EVENT_TYPE_EXCEPTION_UNHANDLED)) + mask |= EEK_Unhandled; + if(EventEnabled(DEBUG_EVENT_TYPE_EXCEPTION_FIRST_PASS_FRAME_ENTER)) + mask |= EEK_FirstPassFrameEntered; + return (ExceptionEventKind)mask; +} + +//Called by the C# func eval code to hand shake with the debugger +COOP_PINVOKE_HELPER(void, RhpSendCustomEventToDebugger, (void* payload, int length)) +{ + DebugEventSource::SendCustomEvent(payload, length); +} + +#endif //!DACCESS_COMPILE diff --git a/src/coreclr/src/nativeaot/Runtime/DebugEventSource.h b/src/coreclr/src/nativeaot/Runtime/DebugEventSource.h new file mode 100644 index 0000000000000..51ea208f6b7ef --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/DebugEventSource.h @@ -0,0 +1,40 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// ----------------------------------------------------------------------------------------------------------- +// Support for emitting debug events with particular payloads that a managed-aware debugger can listen for. +// The events are generated using 1st chance SEH exceptions that the debugger should immediately continue +// so the exception never dispatches back into runtime code. However just in case the debugger disconnects +// or doesn't behave well we've got a backstop catch handler that will prevent it from escaping the code in +// DebugEventSource. +// ----------------------------------------------------------------------------------------------------------- + +#ifndef __DEBUG_EVENT_SOURCE_H_ +#define __DEBUG_EVENT_SOURCE_H_ + +// This global is set from out of process using the debugger. It controls which events are emitted. +GVAL_DECL(UInt32, g_DebuggerEventsFilter); + +typedef UInt64 CORDB_ADDRESS; + +#ifndef DACCESS_COMPILE + +struct DebugEventPayload; + +class DebugEventSource +{ +public: + static void SendModuleLoadEvent(void* addressInModule); + static void SendExceptionThrownEvent(CORDB_ADDRESS faultingIP, CORDB_ADDRESS faultingFrameSP); + static void SendExceptionCatchHandlerFoundEvent(CORDB_ADDRESS handlerIP, CORDB_ADDRESS HandlerFrameSP); + static void SendExceptionUnhandledEvent(); + static void SendExceptionFirstPassFrameEnteredEvent(CORDB_ADDRESS ipInFrame, CORDB_ADDRESS frameSP); + static void SendCustomEvent(void* payload, int length); +private: + static void SendRawEvent(DebugEventPayload* payload); +}; + + +#endif //!DACCESS_COMPILE + + +#endif // __DEBUG_EVENT_SOURCE_H_ diff --git a/src/coreclr/src/nativeaot/Runtime/DebugFuncEval.cpp b/src/coreclr/src/nativeaot/Runtime/DebugFuncEval.cpp new file mode 100644 index 0000000000000..cf9764b2c7f95 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/DebugFuncEval.cpp @@ -0,0 +1,88 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include "common.h" +#include "CommonTypes.h" +#include "DebugFuncEval.h" +#include "rhassert.h" +#include "RWLock.h" +#include "slist.h" +#include "RuntimeInstance.h" + +GVAL_IMPL_INIT(UInt32, g_FuncEvalMode, 0); +GVAL_IMPL_INIT(UInt32, g_FuncEvalParameterBufferSize, 0); +GVAL_IMPL_INIT(UInt64, g_MostRecentFuncEvalHijackInstructionPointer, 0); + +#ifndef DACCESS_COMPILE + +/* static */ UInt32 DebugFuncEval::GetFuncEvalParameterBufferSize() +{ + return g_FuncEvalParameterBufferSize; +} + +/* static */ UInt32 DebugFuncEval::GetFuncEvalMode() +{ + return g_FuncEvalMode; +} + +/* static */ UInt64 DebugFuncEval::GetMostRecentFuncEvalHijackInstructionPointer() +{ + return g_MostRecentFuncEvalHijackInstructionPointer; +} + +/// +/// Retrieve the global FuncEval parameter buffer size. +/// +/// +/// During debugging, if a FuncEval is requested, +/// the func eval infrastructure needs to know how much buffer to allocate for the debugger to +/// write the parameter information in. The C# supporting code will call this API to obtain the +/// buffer size. By that time, the value should have been set through the UpdateFuncEvalParameterBufferSize() +/// method on the ISosRedhawk7 interface. +/// +EXTERN_C REDHAWK_API UInt32 __cdecl RhpGetFuncEvalParameterBufferSize() +{ + return DebugFuncEval::GetFuncEvalParameterBufferSize(); +} + +/// +/// Retrieve the global FuncEval mode. +/// +/// +/// During debugging, if a FuncEval is requested, +/// the func eval infrastructure needs to know what mode to execute the FuncEval request +/// The C# supporting code will call this API to obtain the mode. By that time, the value +/// should have been set through the UpdateFuncEvalMode() method on the ISosRedhawk7 interface. +/// +EXTERN_C REDHAWK_API UInt32 __cdecl RhpGetFuncEvalMode() +{ + return DebugFuncEval::GetFuncEvalMode(); +} + +/// +/// Initiate the func eval abort +/// +/// +/// This is the entry point of FuncEval abort +/// When the debugger decides to abort the FuncEval, it will create a remote thread calling this function. +/// This function will call back into the DebugFuncEvalAbortHelper to perform the abort. +EXTERN_C REDHAWK_API void __cdecl RhpInitiateFuncEvalAbort(void* pointerFromDebugger) +{ + DebugFuncEvalAbortHelperFunctionType debugFuncEvalAbortHelperFunction = (DebugFuncEvalAbortHelperFunctionType)GetRuntimeInstance()->GetClasslibFunctionFromCodeAddress((void*)g_MostRecentFuncEvalHijackInstructionPointer, ClasslibFunctionId::DebugFuncEvalAbortHelper); + ASSERT(debugFuncEvalAbortHelperFunction != nullptr); + debugFuncEvalAbortHelperFunction((Int64)pointerFromDebugger); +} + +#else + +UInt64 DebugFuncEval::GetMostRecentFuncEvalHijackInstructionPointer() +{ + return g_MostRecentFuncEvalHijackInstructionPointer; +} + +#endif //!DACCESS_COMPILE + +EXTERN_C void RhpDebugFuncEvalHelper(void*, void*); +GPTR_IMPL_INIT(PTR_VOID, g_RhpDebugFuncEvalHelperAddr, (void **)(&RhpDebugFuncEvalHelper)); + +GPTR_IMPL_INIT(PTR_VOID, g_RhpInitiateFuncEvalAbortAddr, (void**)&RhpInitiateFuncEvalAbort); diff --git a/src/coreclr/src/nativeaot/Runtime/DebugFuncEval.h b/src/coreclr/src/nativeaot/Runtime/DebugFuncEval.h new file mode 100644 index 0000000000000..7ddac1cf95963 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/DebugFuncEval.h @@ -0,0 +1,72 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// ----------------------------------------------------------------------------------------------------------- +// Support for evaluating expression in the debuggee during debugging +// ----------------------------------------------------------------------------------------------------------- + +#ifndef __DEBUG_FUNC_EVAL_H__ +#define __DEBUG_FUNC_EVAL_H__ + +#include "common.h" +#include "CommonTypes.h" +#include "CommonMacros.h" +#include "daccess.h" + +#ifndef DACCESS_COMPILE + +typedef void(*DebugFuncEvalAbortHelperFunctionType)(UInt64); + +class DebugFuncEval +{ +public: + /// + /// Retrieve the global FuncEval parameter buffer size. + /// + /// + /// During debugging, if a FuncEval is requested, + /// the func eval infrastructure needs to know how much buffer to allocate for the debugger to + /// write the parameter information in. The C# supporting code will call this API to obtain the + /// buffer size. By that time, the value should have been set through the UpdateFuncEvalParameterSize() + /// method on the ISosRedhawk7 interface. + /// + static UInt32 GetFuncEvalParameterBufferSize(); + + /// + /// Retrieve the global FuncEval mode. + /// + /// + /// During debugging, if a FuncEval is requested, + /// the func eval infrastructure needs to know what mode to execute the FuncEval request + /// The C# supporting code will call this API to obtain the mode. By that time, the value + /// should have been set through the UpdateFuncEvalMode() method on the ISosRedhawk7 interface. + /// + static UInt32 GetFuncEvalMode(); + + /// + /// Retrieve the most recent FuncEval Hijack instruction pointer + /// + /// + /// The most recent FuncEval Hijack instruction pointer is set through the debugger + /// It is used for the stack walker to understand the hijack frame + /// + static UInt64 GetMostRecentFuncEvalHijackInstructionPointer(); +}; + +#else + +class DebugFuncEval +{ +public: + /// + /// Retrieve the most recent FuncEval Hijack instruction pointer + /// + /// + /// The most recent FuncEval Hijack instruction pointer is set through the debugger + /// It is used for the stack walker to understand the hijack frame + /// + static UInt64 GetMostRecentFuncEvalHijackInstructionPointer(); +}; + +#endif //!DACCESS_COMPILE + +#endif // __DEBUG_FUNC_EVAL_H__ diff --git a/src/coreclr/src/nativeaot/Runtime/DebuggerHook.cpp b/src/coreclr/src/nativeaot/Runtime/DebuggerHook.cpp new file mode 100644 index 0000000000000..0138ac0d156a9 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/DebuggerHook.cpp @@ -0,0 +1,235 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include "common.h" +#include "CommonTypes.h" +#include "CommonMacros.h" +#include "daccess.h" +#include "gcrhinterface.h" +#include "DebuggerHook.h" +#include "DebugEventSource.h" + +GVAL_IMPL_INIT(UInt32, g_numGcProtectionRequests, 0); + +#ifndef DACCESS_COMPILE + +/* static */ DebuggerProtectedBufferListNode* DebuggerHook::s_debuggerProtectedBuffers = nullptr; + +/* static */ DebuggerOwnedHandleListNode* DebuggerHook::s_debuggerOwnedHandles = nullptr; + +/* static */ UInt32 DebuggerHook::s_debuggeeInitiatedHandleIdentifier = 2; + +/* static */ void DebuggerHook::OnBeforeGcCollection() +{ + if (g_numGcProtectionRequests > 0) + { + // The debugger has some requests with respect to GC protection. + // Here we are allocating a buffer to store them + DebuggerGcProtectionRequest* requests = new (nothrow) DebuggerGcProtectionRequest[g_numGcProtectionRequests]; + + // Notifying the debugger the buffer is ready to use + DebuggerGcProtectionResponse response; + response.kind = DebuggerResponseKind::RequestBufferReady; + response.bufferAddress = (uint64_t)requests; + DebugEventSource::SendCustomEvent((void*)&response, sizeof(response)); + + // ... debugger magic happen here ... + + // The debugger has filled the requests array + for (uint32_t i = 0; i < g_numGcProtectionRequests; i++) + { + if (requests[i].kind == DebuggerGcProtectionRequestKind::EnsureConservativeReporting) + { + // If the request requires extra memory, allocate for it + requests[i].address = (uint64_t)new (nothrow) uint8_t[requests[i].size]; + + // The debugger will handle the case when address is nullptr (we have to break our promise) + } + } + + // TODO, FuncEval, consider an optimization to eliminate this message when they is nothing required from the + // debugger side to fill + + response.kind = DebuggerResponseKind::ConservativeReportingBufferReady; + DebugEventSource::SendCustomEvent((void*)&response, sizeof(response)); + + // ... debugger magic happen here again ... + + for (uint32_t i = 0; i < g_numGcProtectionRequests; i++) + { + DebuggerGcProtectionRequest* request = requests + i; + switch(request->kind) + { + case DebuggerGcProtectionRequestKind::EnsureConservativeReporting: + EnsureConservativeReporting(request); + break; + + case DebuggerGcProtectionRequestKind::RemoveConservativeReporting: + RemoveConservativeReporting(request); + break; + + case DebuggerGcProtectionRequestKind::EnsureHandle: + EnsureHandle(request); + break; + + case DebuggerGcProtectionRequestKind::RemoveHandle: + RemoveHandle(request); + break; + + default: + assert("Debugger is providing an invalid request kind." && false); + } + } + + g_numGcProtectionRequests = 0; + } +} + +/* static */ UInt32 DebuggerHook::RecordDebuggeeInitiatedHandle(void* objectHandle) +{ + DebuggerOwnedHandleListNode* head = new (nothrow) DebuggerOwnedHandleListNode(); + if (head == nullptr) + { + return 0; + } + + head->handle = objectHandle; + head->identifier = DebuggerHook::s_debuggeeInitiatedHandleIdentifier; + head->next = s_debuggerOwnedHandles; + s_debuggerOwnedHandles = head; + + s_debuggeeInitiatedHandleIdentifier += 2; + + return head->identifier; +} + +/* static */ void DebuggerHook::EnsureConservativeReporting(DebuggerGcProtectionRequest* request) +{ + DebuggerProtectedBufferListNode* tail = DebuggerHook::s_debuggerProtectedBuffers; + s_debuggerProtectedBuffers = new (std::nothrow) DebuggerProtectedBufferListNode(); + if (s_debuggerProtectedBuffers == nullptr) + { + s_debuggerProtectedBuffers = tail; + // TODO, FuncEval, we cannot handle the debugger request to protect a buffer (we have to break our promise) + // TODO, FuncEval, we need to figure out how to communicate this broken promise to the debugger + } + else + { + s_debuggerProtectedBuffers->address = request->address; + s_debuggerProtectedBuffers->size = request->size; + s_debuggerProtectedBuffers->identifier = request->identifier; + s_debuggerProtectedBuffers->next = tail; + } +} + +/* static */ void DebuggerHook::RemoveConservativeReporting(DebuggerGcProtectionRequest* request) +{ + DebuggerProtectedBufferListNode* prev = nullptr; + DebuggerProtectedBufferListNode* curr = DebuggerHook::s_debuggerProtectedBuffers; + while (true) + { + if (curr == nullptr) + { + assert("Debugger is trying to remove a conservative reporting entry which is no longer exist." && false); + break; + } + if (curr->identifier == request->identifier) + { + DebuggerProtectedBufferListNode* toDelete = curr; + if (prev == nullptr) + { + // We are trying to remove the head of the linked list + DebuggerHook::s_debuggerProtectedBuffers = curr->next; + } + else + { + prev->next = curr->next; + } + + delete toDelete; + break; + } + else + { + prev = curr; + curr = curr->next; + } + } +} + +/* static */ void DebuggerHook::EnsureHandle(DebuggerGcProtectionRequest* request) +{ + DebuggerOwnedHandleListNode* tail = DebuggerHook::s_debuggerOwnedHandles; + s_debuggerOwnedHandles = new (std::nothrow) DebuggerOwnedHandleListNode(); + if (s_debuggerOwnedHandles == nullptr) + { + s_debuggerOwnedHandles = tail; + // TODO, FuncEval, we cannot handle the debugger request to protect a buffer (we have to break our promise) + // TODO, FuncEval, we need to figure out how to communicate this broken promise to the debugger + } + else + { + int handleType = (int)request->type; + void* handle = RedhawkGCInterface::CreateTypedHandle((void*)request->address, handleType); + + DebuggerGcProtectionHandleReadyResponse response; + response.kind = DebuggerResponseKind::HandleReady; + response.payload = request->payload; + response.handle = (uint64_t)handle; + DebugEventSource::SendCustomEvent((void*)&response, sizeof(response)); + + s_debuggerOwnedHandles->handle = handle; + s_debuggerOwnedHandles->identifier = request->identifier; + s_debuggerOwnedHandles->next = tail; + } +} + +/* static */ void DebuggerHook::RemoveHandle(DebuggerGcProtectionRequest* request) +{ + DebuggerOwnedHandleListNode* prev = nullptr; + DebuggerOwnedHandleListNode* curr = DebuggerHook::s_debuggerOwnedHandles; + while (true) + { + if (curr == nullptr) + { + assert("Debugger is trying to remove a gc handle entry which is no longer exist." && false); + break; + } + if (curr->identifier == request->identifier) + { + DebuggerOwnedHandleListNode* toDelete = curr; + RedhawkGCInterface::DestroyTypedHandle(toDelete->handle); + + if (prev == nullptr) + { + // We are trying to remove the head of the linked list + DebuggerHook::s_debuggerOwnedHandles = curr->next; + } + else + { + prev->next = curr->next; + } + + delete toDelete; + break; + } + else + { + prev = curr; + curr = curr->next; + } + } +} + +EXTERN_C REDHAWK_API UInt32 __cdecl RhpRecordDebuggeeInitiatedHandle(void* objectHandle) +{ + return DebuggerHook::RecordDebuggeeInitiatedHandle(objectHandle); +} + +EXTERN_C REDHAWK_API void __cdecl RhpVerifyDebuggerCleanup() +{ + assert(DebuggerHook::s_debuggerOwnedHandles == nullptr); + assert(DebuggerHook::s_debuggerProtectedBuffers == nullptr); +} + +#endif // !DACCESS_COMPILE diff --git a/src/coreclr/src/nativeaot/Runtime/DebuggerHook.h b/src/coreclr/src/nativeaot/Runtime/DebuggerHook.h new file mode 100644 index 0000000000000..86ef5066f50e3 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/DebuggerHook.h @@ -0,0 +1,52 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// ----------------------------------------------------------------------------------------------------------- +// Support for evaluating expression in the debuggee during debugging +// ----------------------------------------------------------------------------------------------------------- + +#ifndef __DEBUGGER_HOOK_H__ +#define __DEBUGGER_HOOK_H__ + +#include "common.h" +#include "CommonTypes.h" +#ifdef DACCESS_COMPILE +#include "CommonMacros.h" +#endif +#include "daccess.h" +#include "Debug.h" + +#ifndef DACCESS_COMPILE + +struct DebuggerProtectedBufferListNode +{ + UInt64 address; + UInt16 size; + UInt32 identifier; + struct DebuggerProtectedBufferListNode* next; +}; + +struct DebuggerOwnedHandleListNode +{ + void* handle; + UInt32 identifier; + struct DebuggerOwnedHandleListNode* next; +}; + +class DebuggerHook +{ +public: + static void OnBeforeGcCollection(); + static UInt32 RecordDebuggeeInitiatedHandle(void* handle); + static DebuggerProtectedBufferListNode* s_debuggerProtectedBuffers; + static DebuggerOwnedHandleListNode* s_debuggerOwnedHandles; +private: + static void EnsureConservativeReporting(DebuggerGcProtectionRequest* request); + static void RemoveConservativeReporting(DebuggerGcProtectionRequest* request); + static void EnsureHandle(DebuggerGcProtectionRequest* request); + static void RemoveHandle(DebuggerGcProtectionRequest* request); + static UInt32 s_debuggeeInitiatedHandleIdentifier; +}; + +#endif //!DACCESS_COMPILE + +#endif // __DEBUGGER_HOOK_H__ diff --git a/src/coreclr/src/nativeaot/Runtime/EHHelpers.cpp b/src/coreclr/src/nativeaot/Runtime/EHHelpers.cpp new file mode 100644 index 0000000000000..3df7fcecb196d --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/EHHelpers.cpp @@ -0,0 +1,484 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +#include "common.h" +#ifndef DACCESS_COMPILE +#include "CommonTypes.h" +#include "CommonMacros.h" +#include "daccess.h" +#include "rhassert.h" +#include "slist.h" +#include "gcrhinterface.h" +#include "shash.h" +#include "RWLock.h" +#include "TypeManager.h" +#include "varint.h" +#include "PalRedhawkCommon.h" +#include "PalRedhawk.h" +#include "holder.h" +#include "Crst.h" +#include "RuntimeInstance.h" +#include "event.h" +#include "regdisplay.h" +#include "StackFrameIterator.h" +#include "thread.h" +#include "threadstore.h" +#include "threadstore.inl" +#include "stressLog.h" +#include "rhbinder.h" +#include "eetype.h" +#include "eetype.inl" + +COOP_PINVOKE_HELPER(Boolean, RhpEHEnumInitFromStackFrameIterator, ( + StackFrameIterator* pFrameIter, void ** pMethodStartAddressOut, EHEnum* pEHEnum)) +{ + ICodeManager * pCodeManager = pFrameIter->GetCodeManager(); + pEHEnum->m_pCodeManager = pCodeManager; + + return pCodeManager->EHEnumInit(pFrameIter->GetMethodInfo(), pMethodStartAddressOut, &pEHEnum->m_state); +} + +COOP_PINVOKE_HELPER(Boolean, RhpEHEnumNext, (EHEnum* pEHEnum, EHClause* pEHClause)) +{ + return pEHEnum->m_pCodeManager->EHEnumNext(&pEHEnum->m_state, pEHClause); +} + +// Unmanaged helper to locate one of two classlib-provided functions that the runtime needs to +// implement throwing of exceptions out of Rtm, and fail-fast. This may return NULL if the classlib +// found via the provided address does not have the necessary exports. +COOP_PINVOKE_HELPER(void *, RhpGetClasslibFunctionFromCodeAddress, (void * address, ClasslibFunctionId functionId)) +{ + return GetRuntimeInstance()->GetClasslibFunctionFromCodeAddress(address, functionId); +} + +// Unmanaged helper to locate one of two classlib-provided functions that the runtime needs to +// implement throwing of exceptions out of Rtm, and fail-fast. This may return NULL if the classlib +// found via the provided address does not have the necessary exports. +COOP_PINVOKE_HELPER(void *, RhpGetClasslibFunctionFromEEType, (EEType * pEEType, ClasslibFunctionId functionId)) +{ + return pEEType->GetTypeManagerPtr()->AsTypeManager()->GetClasslibFunction(functionId); +} + +COOP_PINVOKE_HELPER(void, RhpValidateExInfoStack, ()) +{ + Thread * pThisThread = ThreadStore::GetCurrentThread(); + pThisThread->ValidateExInfoStack(); +} + +COOP_PINVOKE_HELPER(void, RhpClearThreadDoNotTriggerGC, ()) +{ + Thread * pThisThread = ThreadStore::GetCurrentThread(); + + if (!pThisThread->IsDoNotTriggerGcSet()) + RhFailFast(); + + pThisThread->ClearDoNotTriggerGc(); +} + +COOP_PINVOKE_HELPER(void, RhpSetThreadDoNotTriggerGC, ()) +{ + Thread * pThisThread = ThreadStore::GetCurrentThread(); + + if (pThisThread->IsDoNotTriggerGcSet()) + RhFailFast(); + + pThisThread->SetDoNotTriggerGc(); +} + +COOP_PINVOKE_HELPER(Int32, RhGetModuleFileName, (HANDLE moduleHandle, _Out_ const TCHAR** pModuleNameOut)) +{ + return PalGetModuleFileName(pModuleNameOut, moduleHandle); +} + +COOP_PINVOKE_HELPER(void, RhpCopyContextFromExInfo, (void * pOSContext, Int32 cbOSContext, PAL_LIMITED_CONTEXT * pPalContext)) +{ + UNREFERENCED_PARAMETER(cbOSContext); + ASSERT(cbOSContext >= sizeof(CONTEXT)); + CONTEXT* pContext = (CONTEXT *)pOSContext; +#if defined(UNIX_AMD64_ABI) + pContext->Rip = pPalContext->IP; + pContext->Rsp = pPalContext->Rsp; + pContext->Rbp = pPalContext->Rbp; + pContext->Rdx = pPalContext->Rdx; + pContext->Rax = pPalContext->Rax; + pContext->Rbx = pPalContext->Rbx; + pContext->R12 = pPalContext->R12; + pContext->R13 = pPalContext->R13; + pContext->R14 = pPalContext->R14; + pContext->R15 = pPalContext->R15; +#elif defined(HOST_AMD64) + pContext->Rip = pPalContext->IP; + pContext->Rsp = pPalContext->Rsp; + pContext->Rbp = pPalContext->Rbp; + pContext->Rdi = pPalContext->Rdi; + pContext->Rsi = pPalContext->Rsi; + pContext->Rax = pPalContext->Rax; + pContext->Rbx = pPalContext->Rbx; + pContext->R12 = pPalContext->R12; + pContext->R13 = pPalContext->R13; + pContext->R14 = pPalContext->R14; + pContext->R15 = pPalContext->R15; +#elif defined(HOST_X86) + pContext->Eip = pPalContext->IP; + pContext->Esp = pPalContext->Rsp; + pContext->Ebp = pPalContext->Rbp; + pContext->Edi = pPalContext->Rdi; + pContext->Esi = pPalContext->Rsi; + pContext->Eax = pPalContext->Rax; + pContext->Ebx = pPalContext->Rbx; +#elif defined(HOST_ARM) + pContext->R0 = pPalContext->R0; + pContext->R4 = pPalContext->R4; + pContext->R5 = pPalContext->R5; + pContext->R6 = pPalContext->R6; + pContext->R7 = pPalContext->R7; + pContext->R8 = pPalContext->R8; + pContext->R9 = pPalContext->R9; + pContext->R10 = pPalContext->R10; + pContext->R11 = pPalContext->R11; + pContext->Sp = pPalContext->SP; + pContext->Lr = pPalContext->LR; + pContext->Pc = pPalContext->IP; +#elif defined(HOST_ARM64) + pContext->X0 = pPalContext->X0; + pContext->X1 = pPalContext->X1; + // TODO: Copy registers X2-X7 when we start supporting HVA's + pContext->X19 = pPalContext->X19; + pContext->X20 = pPalContext->X20; + pContext->X21 = pPalContext->X21; + pContext->X22 = pPalContext->X22; + pContext->X23 = pPalContext->X23; + pContext->X24 = pPalContext->X24; + pContext->X25 = pPalContext->X25; + pContext->X26 = pPalContext->X26; + pContext->X27 = pPalContext->X27; + pContext->X28 = pPalContext->X28; + pContext->Fp = pPalContext->FP; + pContext->Sp = pPalContext->SP; + pContext->Lr = pPalContext->LR; + pContext->Pc = pPalContext->IP; +#elif defined(HOST_WASM) + // No registers, no work to do yet +#else +#error Not Implemented for this architecture -- RhpCopyContextFromExInfo +#endif +} + +#if defined(HOST_AMD64) || defined(HOST_ARM) || defined(HOST_X86) || defined(HOST_ARM64) +struct DISPATCHER_CONTEXT +{ + UIntNative ControlPc; + // N.B. There is more here (so this struct isn't the right size), but we ignore everything else +}; + +#ifdef HOST_X86 +struct EXCEPTION_REGISTRATION_RECORD +{ + UIntNative Next; + UIntNative Handler; +}; +#endif // HOST_X86 + +EXTERN_C void __cdecl RhpFailFastForPInvokeExceptionPreemp(IntNative PInvokeCallsiteReturnAddr, + void* pExceptionRecord, void* pContextRecord); +EXTERN_C void REDHAWK_CALLCONV RhpFailFastForPInvokeExceptionCoop(IntNative PInvokeCallsiteReturnAddr, + void* pExceptionRecord, void* pContextRecord); +Int32 __stdcall RhpVectoredExceptionHandler(PEXCEPTION_POINTERS pExPtrs); + +EXTERN_C Int32 __stdcall RhpPInvokeExceptionGuard(PEXCEPTION_RECORD pExceptionRecord, + UIntNative EstablisherFrame, + PCONTEXT pContextRecord, + DISPATCHER_CONTEXT * pDispatcherContext) +{ + UNREFERENCED_PARAMETER(EstablisherFrame); +#ifdef APP_LOCAL_RUNTIME + UNREFERENCED_PARAMETER(pDispatcherContext); + // + // When running on Windows 8.1 RTM, we cannot register our vectored exception handler, because that + // version of MRT100.dll does not support it. However, the binder sets this function as the personality + // routine for every reverse p/invoke, so we can handle hardware exceptions from managed code here. + // + EXCEPTION_POINTERS pointers; + pointers.ExceptionRecord = pExceptionRecord; + pointers.ContextRecord = pContextRecord; + + if (RhpVectoredExceptionHandler(&pointers) == EXCEPTION_CONTINUE_EXECUTION) + return ExceptionContinueExecution; +#endif //APP_LOCAL_RUNTIME + + Thread * pThread = ThreadStore::GetCurrentThread(); + + // If the thread is currently in the "do not trigger GC" mode, we must not allocate, we must not reverse pinvoke, or + // return from a pinvoke. All of these things will deadlock with the GC and they all become increasingly likely as + // exception dispatch kicks off. So we just nip this in the bud as early as possible with a FailFast. The most + // likely case where this occurs is in our GC-callouts for Jupiter lifetime management -- in that case, we have + // managed code that calls to native code (without pinvoking) which might have a bug that causes an AV. + if (pThread->IsDoNotTriggerGcSet()) + RhFailFast(); + + // We promote exceptions that were not converted to managed exceptions to a FailFast. However, we have to + // be careful because we got here via OS SEH infrastructure and, therefore, don't know what GC mode we're + // currently in. As a result, since we're calling back into managed code to handle the FailFast, we must + // correctly call either a UnmanagedCallersOnly or a RuntimeExport version of the same method. + if (pThread->IsCurrentThreadInCooperativeMode()) + { + // Cooperative mode -- Typically, RhpVectoredExceptionHandler will handle this because the faulting IP will be + // in managed code. But sometimes we AV on a bad call indirect or something similar. In that situation, we can + // use the dispatcher context or exception registration record to find the relevant classlib. +#ifdef HOST_X86 + IntNative classlibBreadcrumb = ((EXCEPTION_REGISTRATION_RECORD*)EstablisherFrame)->Handler; +#else + IntNative classlibBreadcrumb = pDispatcherContext->ControlPc; +#endif + RhpFailFastForPInvokeExceptionCoop(classlibBreadcrumb, pExceptionRecord, pContextRecord); + } + else + { + // Preemptive mode -- the classlib associated with the last pinvoke owns the fail fast behavior. + IntNative pinvokeCallsiteReturnAddr = (IntNative)pThread->GetCurrentThreadPInvokeReturnAddress(); + RhpFailFastForPInvokeExceptionPreemp(pinvokeCallsiteReturnAddr, pExceptionRecord, pContextRecord); + } + + return 0; +} +#else +EXTERN_C Int32 RhpPInvokeExceptionGuard() +{ + ASSERT_UNCONDITIONALLY("RhpPInvokeExceptionGuard NYI for this architecture!"); + RhFailFast(); + return 0; +} +#endif + +#if defined(HOST_AMD64) || defined(HOST_ARM) || defined(HOST_X86) || defined(HOST_ARM64) || defined(HOST_WASM) +EXTERN_C REDHAWK_API void __fastcall RhpThrowHwEx(); +#else +COOP_PINVOKE_HELPER(void, RhpThrowHwEx, ()) +{ + ASSERT_UNCONDITIONALLY("RhpThrowHwEx NYI for this architecture!"); +} +COOP_PINVOKE_HELPER(void, RhpThrowEx, ()) +{ + ASSERT_UNCONDITIONALLY("RhpThrowEx NYI for this architecture!"); +} +COOP_PINVOKE_HELPER(void, RhpCallCatchFunclet, ()) +{ + ASSERT_UNCONDITIONALLY("RhpCallCatchFunclet NYI for this architecture!"); +} +COOP_PINVOKE_HELPER(void, RhpCallFinallyFunclet, ()) +{ + ASSERT_UNCONDITIONALLY("RhpCallFinallyFunclet NYI for this architecture!"); +} +COOP_PINVOKE_HELPER(void, RhpCallFilterFunclet, ()) +{ + ASSERT_UNCONDITIONALLY("RhpCallFilterFunclet NYI for this architecture!"); +} +COOP_PINVOKE_HELPER(void, RhpRethrow, ()) +{ + ASSERT_UNCONDITIONALLY("RhpRethrow NYI for this architecture!"); +} + +EXTERN_C void* RhpCallCatchFunclet2 = NULL; +EXTERN_C void* RhpCallFinallyFunclet2 = NULL; +EXTERN_C void* RhpCallFilterFunclet2 = NULL; +EXTERN_C void* RhpThrowEx2 = NULL; +EXTERN_C void* RhpThrowHwEx2 = NULL; +EXTERN_C void* RhpRethrow2 = NULL; +#endif + +EXTERN_C void * RhpAssignRefAVLocation; +EXTERN_C void * RhpCheckedAssignRefAVLocation; +EXTERN_C void * RhpCheckedLockCmpXchgAVLocation; +EXTERN_C void * RhpCheckedXchgAVLocation; +EXTERN_C void * RhpLockCmpXchg32AVLocation; +EXTERN_C void * RhpLockCmpXchg64AVLocation; +EXTERN_C void * RhpCopyMultibyteDestAVLocation; +EXTERN_C void * RhpCopyMultibyteSrcAVLocation; +EXTERN_C void * RhpCopyMultibyteNoGCRefsDestAVLocation; +EXTERN_C void * RhpCopyMultibyteNoGCRefsSrcAVLocation; +EXTERN_C void * RhpCopyMultibyteWithWriteBarrierDestAVLocation; +EXTERN_C void * RhpCopyMultibyteWithWriteBarrierSrcAVLocation; +EXTERN_C void * RhpCopyAnyWithWriteBarrierDestAVLocation; +EXTERN_C void * RhpCopyAnyWithWriteBarrierSrcAVLocation; + +static bool InWriteBarrierHelper(UIntNative faultingIP) +{ +#ifndef USE_PORTABLE_HELPERS + static UIntNative writeBarrierAVLocations[] = + { + (UIntNative)&RhpAssignRefAVLocation, + (UIntNative)&RhpCheckedAssignRefAVLocation, + (UIntNative)&RhpCheckedLockCmpXchgAVLocation, + (UIntNative)&RhpCheckedXchgAVLocation, + (UIntNative)&RhpLockCmpXchg32AVLocation, + (UIntNative)&RhpLockCmpXchg64AVLocation, + }; + + // compare the IP against the list of known possible AV locations in the write barrier helpers + for (size_t i = 0; i < sizeof(writeBarrierAVLocations)/sizeof(writeBarrierAVLocations[0]); i++) + { +#if defined(HOST_AMD64) || defined(HOST_X86) + // Verify that the runtime is not linked with incremental linking enabled. Incremental linking + // wraps every method symbol with a jump stub that breaks the following check. + ASSERT(*(UInt8*)writeBarrierAVLocations[i] != 0xE9); // jmp XXXXXXXX +#endif + + if (writeBarrierAVLocations[i] == faultingIP) + return true; + } +#endif // USE_PORTABLE_HELPERS + + return false; +} + +static UIntNative UnwindWriteBarrierToCaller( +#ifdef TARGET_UNIX + PAL_LIMITED_CONTEXT * pContext +#else + _CONTEXT * pContext +#endif + ) +{ +#if defined(_DEBUG) + UIntNative faultingIP = pContext->GetIp(); + ASSERT(InWriteBarrierHelper(faultingIP)); +#endif +#if defined(HOST_AMD64) || defined(HOST_X86) + // simulate a ret instruction + UIntNative sp = pContext->GetSp(); + UIntNative adjustedFaultingIP = *(UIntNative *)sp; + pContext->SetSp(sp+sizeof(UIntNative)); // pop the stack +#elif defined(HOST_ARM) || defined(HOST_ARM64) + UIntNative adjustedFaultingIP = pContext->GetLr(); +#else + UIntNative adjustedFaultingIP = 0; // initializing to make the compiler happy + PORTABILITY_ASSERT("UnwindWriteBarrierToCaller"); +#endif + return adjustedFaultingIP; +} + +#ifdef TARGET_UNIX + +Int32 __stdcall RhpHardwareExceptionHandler(UIntNative faultCode, UIntNative faultAddress, + PAL_LIMITED_CONTEXT* palContext, UIntNative* arg0Reg, UIntNative* arg1Reg) +{ + UIntNative faultingIP = palContext->GetIp(); + + ICodeManager * pCodeManager = GetRuntimeInstance()->FindCodeManagerByAddress((PTR_VOID)faultingIP); + if ((pCodeManager != NULL) || (faultCode == STATUS_ACCESS_VIOLATION && InWriteBarrierHelper(faultingIP))) + { + // Make sure that the OS does not use our internal fault codes + ASSERT(faultCode != STATUS_REDHAWK_NULL_REFERENCE && faultCode != STATUS_REDHAWK_WRITE_BARRIER_NULL_REFERENCE); + + if (faultCode == STATUS_ACCESS_VIOLATION) + { + if (faultAddress < NULL_AREA_SIZE) + { + faultCode = pCodeManager ? STATUS_REDHAWK_NULL_REFERENCE : STATUS_REDHAWK_WRITE_BARRIER_NULL_REFERENCE; + } + + if (pCodeManager == NULL) + { + // we were AV-ing in a write barrier helper - unwind our way to our caller + faultingIP = UnwindWriteBarrierToCaller(palContext); + } + } + else if (faultCode == STATUS_STACK_OVERFLOW) + { + // Do not use ASSERT_UNCONDITIONALLY here. It will crash because of it consumes too much stack. + + PalPrintFatalError("\nProcess is terminating due to StackOverflowException.\n"); + RhFailFast(); + } + + *arg0Reg = faultCode; + *arg1Reg = faultingIP; + palContext->SetIp((UIntNative)&RhpThrowHwEx); + + return EXCEPTION_CONTINUE_EXECUTION; + } + + return EXCEPTION_CONTINUE_SEARCH; +} + +#else // TARGET_UNIX + +Int32 __stdcall RhpVectoredExceptionHandler(PEXCEPTION_POINTERS pExPtrs) +{ + UIntNative faultingIP = pExPtrs->ContextRecord->GetIp(); + + ICodeManager * pCodeManager = GetRuntimeInstance()->FindCodeManagerByAddress((PTR_VOID)faultingIP); + UIntNative faultCode = pExPtrs->ExceptionRecord->ExceptionCode; + if ((pCodeManager != NULL) || (faultCode == STATUS_ACCESS_VIOLATION && InWriteBarrierHelper(faultingIP))) + { + // Make sure that the OS does not use our internal fault codes + ASSERT(faultCode != STATUS_REDHAWK_NULL_REFERENCE && faultCode != STATUS_REDHAWK_WRITE_BARRIER_NULL_REFERENCE); + + if (faultCode == STATUS_ACCESS_VIOLATION) + { + if (pExPtrs->ExceptionRecord->ExceptionInformation[1] < NULL_AREA_SIZE) + { + faultCode = pCodeManager ? STATUS_REDHAWK_NULL_REFERENCE : STATUS_REDHAWK_WRITE_BARRIER_NULL_REFERENCE; + } + + if (pCodeManager == NULL) + { + // we were AV-ing in a write barrier helper - unwind our way to our caller + faultingIP = UnwindWriteBarrierToCaller(pExPtrs->ContextRecord); + } + } + else if (faultCode == STATUS_STACK_OVERFLOW) + { + // Do not use ASSERT_UNCONDITIONALLY here. It will crash because of it consumes too much stack. + + PalPrintFatalError("\nProcess is terminating due to StackOverflowException.\n"); + PalRaiseFailFastException(pExPtrs->ExceptionRecord, pExPtrs->ContextRecord, 0); + } + + pExPtrs->ContextRecord->SetIp((UIntNative)&RhpThrowHwEx); + pExPtrs->ContextRecord->SetArg0Reg(faultCode); + pExPtrs->ContextRecord->SetArg1Reg(faultingIP); + + return EXCEPTION_CONTINUE_EXECUTION; + } + + { + static UInt8 *s_pbRuntimeModuleLower = NULL; + static UInt8 *s_pbRuntimeModuleUpper = NULL; + + // If this is the first time through this path then calculate the upper and lower bounds of the + // runtime module. Note we could be racing to calculate this but it doesn't matter since the results + // should always agree. + if ((s_pbRuntimeModuleLower == NULL) || (s_pbRuntimeModuleUpper == NULL)) + { + // Get the module handle for this runtime. Do this by passing an address definitely within the + // module (the address of this function) to GetModuleHandleEx with the "from address" flag. + HANDLE hRuntimeModule = PalGetModuleHandleFromPointer(reinterpret_cast(RhpVectoredExceptionHandler)); + if (!hRuntimeModule) + { + ASSERT_UNCONDITIONALLY("Failed to locate our own module handle"); + RhFailFast(); + } + + PalGetModuleBounds(hRuntimeModule, &s_pbRuntimeModuleLower, &s_pbRuntimeModuleUpper); + } + + if (((UInt8*)faultingIP >= s_pbRuntimeModuleLower) && ((UInt8*)faultingIP < s_pbRuntimeModuleUpper)) + { + // Generally any form of hardware exception within the runtime itself is considered a fatal error. + // Note this includes the managed code within the runtime. + ASSERT_UNCONDITIONALLY("Hardware exception raised inside the runtime."); + PalRaiseFailFastException(pExPtrs->ExceptionRecord, pExPtrs->ContextRecord, 0); + } + } + + return EXCEPTION_CONTINUE_SEARCH; +} + +#endif // TARGET_UNIX + +COOP_PINVOKE_HELPER(void, RhpFallbackFailFast, ()) +{ + RhFailFast(); +} + +#endif // !DACCESS_COMPILE diff --git a/src/coreclr/src/nativeaot/Runtime/EtwEvents.h b/src/coreclr/src/nativeaot/Runtime/EtwEvents.h new file mode 100644 index 0000000000000..a023af9d464ea --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/EtwEvents.h @@ -0,0 +1,904 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// **** This file is auto-generated. Do not edit by hand. **** +// +// Instead ensure this file and EtwEvents.man are checked-out from source code control, locate the PUCLR ETW +// manifest file (it should be in puclr\ndp\clr\src\VM\ClrEtwAll.man), copy it into the rh\src\rtetw +// directory and run the following command from an rhenv window: +// perl EtwImportClrEvents.pl +// +// This script consults EtwEventFilter.txt to determine which events to extract from the CLR manifest. It then +// merges any additional Redhawk-specific events from EtwRedhawkEvents.xml. The result is an updated version +// of this header file plus EtwEvents.man, a new ETW manifest file describing the final Redhawk events which +// can be registered with the system via the following command: +// wevtutil im EtwEvents.man +// + +#ifndef __RH_ETW_DEFS_INCLUDED +#define __RH_ETW_DEFS_INCLUDED + +#if defined(FEATURE_ETW) && !defined(DACCESS_COMPILE) + +#ifndef RH_ETW_INLINE +#define RH_ETW_INLINE __declspec(noinline) __inline +#endif + +struct RH_ETW_CONTEXT +{ + TRACEHANDLE RegistrationHandle; + TRACEHANDLE Logger; + UInt64 MatchAnyKeyword; + UInt64 MatchAllKeyword; + EVENT_FILTER_DESCRIPTOR * FilterData; + UInt32 Flags; + UInt32 IsEnabled; + UInt8 Level; + UInt8 Reserve; +}; + +UInt32 EtwCallback(UInt32 IsEnabled, RH_ETW_CONTEXT * CallbackContext); + +__declspec(noinline) __inline void __stdcall +RhEtwControlCallback(GUID * /*SourceId*/, UInt32 IsEnabled, UInt8 Level, UInt64 MatchAnyKeyword, UInt64 MatchAllKeyword, EVENT_FILTER_DESCRIPTOR * FilterData, void * CallbackContext) +{ + RH_ETW_CONTEXT * Ctx = (RH_ETW_CONTEXT*)CallbackContext; + if (Ctx == NULL) + return; + Ctx->Level = Level; + Ctx->MatchAnyKeyword = MatchAnyKeyword; + Ctx->MatchAllKeyword = MatchAllKeyword; + Ctx->FilterData = FilterData; + Ctx->IsEnabled = IsEnabled; + EtwCallback(IsEnabled, (RH_ETW_CONTEXT*)CallbackContext); +} + +__declspec(noinline) __inline bool __stdcall + RhEventTracingEnabled(RH_ETW_CONTEXT * EnableInfo, + const EVENT_DESCRIPTOR * EventDescriptor) +{ + if (!EnableInfo) + return false; + if ((EventDescriptor->Level <= EnableInfo->Level) || (EnableInfo->Level == 0)) + { + if ((EventDescriptor->Keyword == (ULONGLONG)0) || + ((EventDescriptor->Keyword & EnableInfo->MatchAnyKeyword) && + ((EventDescriptor->Keyword & EnableInfo->MatchAllKeyword) == EnableInfo->MatchAllKeyword))) + return true; + } + return false; +} + +#define ETW_EVENT_ENABLED(Context, EventDescriptor) (Context.IsEnabled && RhEventTracingEnabled(&Context, &EventDescriptor)) + +extern "C" __declspec(selectany) const GUID MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER = {0x1095638c, 0x8748, 0x4c7a, {0xb3, 0x9e, 0xba, 0xea, 0x27, 0xb9, 0xc5, 0x89}}; + +extern "C" __declspec(selectany) const EVENT_DESCRIPTOR BGC1stConEnd = {0xd, 0x0, 0x10, 0x4, 0x1b, 0x1, 0x8000000000000001}; +extern "C" __declspec(selectany) const EVENT_DESCRIPTOR BGC1stNonConEnd = {0xc, 0x0, 0x10, 0x4, 0x1a, 0x1, 0x8000000000000001}; +extern "C" __declspec(selectany) const EVENT_DESCRIPTOR BGC2ndConBegin = {0x10, 0x0, 0x10, 0x4, 0x1e, 0x1, 0x8000000000000001}; +extern "C" __declspec(selectany) const EVENT_DESCRIPTOR BGC2ndConEnd = {0x11, 0x0, 0x10, 0x4, 0x1f, 0x1, 0x8000000000000001}; +extern "C" __declspec(selectany) const EVENT_DESCRIPTOR BGC2ndNonConBegin = {0xe, 0x0, 0x10, 0x4, 0x1c, 0x1, 0x8000000000000001}; +extern "C" __declspec(selectany) const EVENT_DESCRIPTOR BGC2ndNonConEnd = {0xf, 0x0, 0x10, 0x4, 0x1d, 0x1, 0x8000000000000001}; +extern "C" __declspec(selectany) const EVENT_DESCRIPTOR BGCAllocWaitBegin = {0x17, 0x0, 0x10, 0x4, 0x25, 0x1, 0x8000000000000001}; +extern "C" __declspec(selectany) const EVENT_DESCRIPTOR BGCAllocWaitEnd = {0x18, 0x0, 0x10, 0x4, 0x26, 0x1, 0x8000000000000001}; +extern "C" __declspec(selectany) const EVENT_DESCRIPTOR BGCBegin = {0xb, 0x0, 0x10, 0x4, 0x19, 0x1, 0x8000000000000001}; +extern "C" __declspec(selectany) const EVENT_DESCRIPTOR BGCDrainMark = {0x14, 0x0, 0x10, 0x4, 0x22, 0x1, 0x8000000000000001}; +extern "C" __declspec(selectany) const EVENT_DESCRIPTOR BGCOverflow = {0x16, 0x0, 0x10, 0x4, 0x24, 0x1, 0x8000000000000001}; +extern "C" __declspec(selectany) const EVENT_DESCRIPTOR BGCPlanEnd = {0x12, 0x0, 0x10, 0x4, 0x20, 0x1, 0x8000000000000001}; +extern "C" __declspec(selectany) const EVENT_DESCRIPTOR BGCRevisit = {0x15, 0x0, 0x10, 0x4, 0x23, 0x1, 0x8000000000000001}; +extern "C" __declspec(selectany) const EVENT_DESCRIPTOR BGCSweepEnd = {0x13, 0x0, 0x10, 0x4, 0x21, 0x1, 0x8000000000000001}; +extern "C" __declspec(selectany) const EVENT_DESCRIPTOR GCFullNotify_V1 = {0x19, 0x1, 0x10, 0x4, 0x13, 0x1, 0x8000000000000001}; +extern "C" __declspec(selectany) const EVENT_DESCRIPTOR GCGlobalHeapHistory_V1 = {0x5, 0x1, 0x10, 0x4, 0x12, 0x1, 0x8000000000000001}; +extern "C" __declspec(selectany) const EVENT_DESCRIPTOR GCJoin_V1 = {0x6, 0x1, 0x10, 0x5, 0x14, 0x1, 0x8000000000000001}; +extern "C" __declspec(selectany) const EVENT_DESCRIPTOR GCOptimized_V1 = {0x3, 0x1, 0x10, 0x5, 0x10, 0x1, 0x8000000000000001}; +extern "C" __declspec(selectany) const EVENT_DESCRIPTOR GCPerHeapHistory = {0x4, 0x2, 0x10, 0x4, 0x11, 0x1, 0x8000000000000001}; +extern "C" __declspec(selectany) const EVENT_DESCRIPTOR GCSettings = {0x2, 0x0, 0x10, 0x4, 0xe, 0x1, 0x8000000000000001}; +extern "C" __declspec(selectany) const EVENT_DESCRIPTOR PinPlugAtGCTime = {0xc7, 0x0, 0x10, 0x5, 0x2c, 0x1, 0x8000000000000001}; +extern "C" __declspec(selectany) const EVENT_DESCRIPTOR PrvDestroyGCHandle = {0xc3, 0x0, 0x10, 0x5, 0x2b, 0x1, 0x8000000000004000}; +extern "C" __declspec(selectany) const EVENT_DESCRIPTOR PrvGCMarkCards_V1 = {0xa, 0x1, 0x10, 0x4, 0x18, 0x1, 0x8000000000000001}; +extern "C" __declspec(selectany) const EVENT_DESCRIPTOR PrvGCMarkFinalizeQueueRoots_V1 = {0x8, 0x1, 0x10, 0x4, 0x16, 0x1, 0x8000000000000001}; +extern "C" __declspec(selectany) const EVENT_DESCRIPTOR PrvGCMarkHandles_V1 = {0x9, 0x1, 0x10, 0x4, 0x17, 0x1, 0x8000000000000001}; +extern "C" __declspec(selectany) const EVENT_DESCRIPTOR PrvGCMarkStackRoots_V1 = {0x7, 0x1, 0x10, 0x4, 0x15, 0x1, 0x8000000000000001}; +extern "C" __declspec(selectany) const EVENT_DESCRIPTOR PrvSetGCHandle = {0xc2, 0x0, 0x10, 0x5, 0x2a, 0x1, 0x8000000000004000}; + +extern "C" __declspec(selectany) REGHANDLE Microsoft_Windows_Redhawk_GC_PrivateHandle; +extern "C" __declspec(selectany) RH_ETW_CONTEXT MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_Context; + +#define RH_ETW_REGISTER_Microsoft_Windows_Redhawk_GC_Private() do { PalEventRegister(&MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER, RhEtwControlCallback, &MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_Context, &Microsoft_Windows_Redhawk_GC_PrivateHandle); } while (false) +#define RH_ETW_UNREGISTER_Microsoft_Windows_Redhawk_GC_Private() do { PalEventUnregister(Microsoft_Windows_Redhawk_GC_PrivateHandle); } while (false) + +#define FireEtwBGC1stConEnd(ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PrivateHandle, &BGC1stConEnd)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_GCNoUserData(Microsoft_Windows_Redhawk_GC_PrivateHandle, &BGC1stConEnd, ClrInstanceID) : 0 + +#define FireEtwBGC1stNonConEnd(ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PrivateHandle, &BGC1stNonConEnd)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_GCNoUserData(Microsoft_Windows_Redhawk_GC_PrivateHandle, &BGC1stNonConEnd, ClrInstanceID) : 0 + +#define FireEtwBGC2ndConBegin(ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PrivateHandle, &BGC2ndConBegin)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_GCNoUserData(Microsoft_Windows_Redhawk_GC_PrivateHandle, &BGC2ndConBegin, ClrInstanceID) : 0 + +#define FireEtwBGC2ndConEnd(ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PrivateHandle, &BGC2ndConEnd)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_GCNoUserData(Microsoft_Windows_Redhawk_GC_PrivateHandle, &BGC2ndConEnd, ClrInstanceID) : 0 + +#define FireEtwBGC2ndNonConBegin(ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PrivateHandle, &BGC2ndNonConBegin)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_GCNoUserData(Microsoft_Windows_Redhawk_GC_PrivateHandle, &BGC2ndNonConBegin, ClrInstanceID) : 0 + +#define FireEtwBGC2ndNonConEnd(ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PrivateHandle, &BGC2ndNonConEnd)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_GCNoUserData(Microsoft_Windows_Redhawk_GC_PrivateHandle, &BGC2ndNonConEnd, ClrInstanceID) : 0 + +#define FireEtwBGCAllocWaitBegin(Reason, ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PrivateHandle, &BGCAllocWaitBegin)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_BGCAllocWait(Microsoft_Windows_Redhawk_GC_PrivateHandle, &BGCAllocWaitBegin, Reason, ClrInstanceID) : 0 + +#define FireEtwBGCAllocWaitEnd(Reason, ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PrivateHandle, &BGCAllocWaitEnd)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_BGCAllocWait(Microsoft_Windows_Redhawk_GC_PrivateHandle, &BGCAllocWaitEnd, Reason, ClrInstanceID) : 0 + +#define FireEtwBGCBegin(ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PrivateHandle, &BGCBegin)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_GCNoUserData(Microsoft_Windows_Redhawk_GC_PrivateHandle, &BGCBegin, ClrInstanceID) : 0 + +#define FireEtwBGCDrainMark(Objects, ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PrivateHandle, &BGCDrainMark)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_BGCDrainMark(Microsoft_Windows_Redhawk_GC_PrivateHandle, &BGCDrainMark, Objects, ClrInstanceID) : 0 + +#define FireEtwBGCOverflow(Min, Max, Objects, IsLarge, ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PrivateHandle, &BGCOverflow)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_BGCOverflow(Microsoft_Windows_Redhawk_GC_PrivateHandle, &BGCOverflow, Min, Max, Objects, IsLarge, ClrInstanceID) : 0 + +#define FireEtwBGCPlanEnd(ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PrivateHandle, &BGCPlanEnd)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_GCNoUserData(Microsoft_Windows_Redhawk_GC_PrivateHandle, &BGCPlanEnd, ClrInstanceID) : 0 + +#define FireEtwBGCRevisit(Pages, Objects, IsLarge, ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PrivateHandle, &BGCRevisit)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_BGCRevisit(Microsoft_Windows_Redhawk_GC_PrivateHandle, &BGCRevisit, Pages, Objects, IsLarge, ClrInstanceID) : 0 + +#define FireEtwBGCSweepEnd(ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PrivateHandle, &BGCSweepEnd)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_GCNoUserData(Microsoft_Windows_Redhawk_GC_PrivateHandle, &BGCSweepEnd, ClrInstanceID) : 0 + +#define FireEtwGCFullNotify_V1(GenNumber, IsAlloc, ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PrivateHandle, &GCFullNotify_V1)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_GCFullNotify_V1(Microsoft_Windows_Redhawk_GC_PrivateHandle, &GCFullNotify_V1, GenNumber, IsAlloc, ClrInstanceID) : 0 + +#define FireEtwGCGlobalHeapHistory_V1(FinalYoungestDesired, NumHeaps, CondemnedGeneration, Gen0ReductionCount, Reason, GlobalMechanisms, ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PrivateHandle, &GCGlobalHeapHistory_V1)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_GCGlobalHeap_V1(Microsoft_Windows_Redhawk_GC_PrivateHandle, &GCGlobalHeapHistory_V1, FinalYoungestDesired, NumHeaps, CondemnedGeneration, Gen0ReductionCount, Reason, GlobalMechanisms, ClrInstanceID) : 0 + +#define FireEtwGCJoin_V1(Heap, JoinTime, JoinType, ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PrivateHandle, &GCJoin_V1)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_GCJoin_V1(Microsoft_Windows_Redhawk_GC_PrivateHandle, &GCJoin_V1, Heap, JoinTime, JoinType, ClrInstanceID) : 0 + +#define FireEtwGCOptimized_V1(DesiredAllocation, NewAllocation, GenerationNumber, ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PrivateHandle, &GCOptimized_V1)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_GCOptimized_V1(Microsoft_Windows_Redhawk_GC_PrivateHandle, &GCOptimized_V1, DesiredAllocation, NewAllocation, GenerationNumber, ClrInstanceID) : 0 + +#define FireEtwGCPerHeapHistory() (MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PrivateHandle, &GCPerHeapHistory)) ? TemplateEventDescriptor(Microsoft_Windows_Redhawk_GC_PrivateHandle, &GCPerHeapHistory) : 0 + +#define FireEtwGCSettings(SegmentSize, LargeObjectSegmentSize, ServerGC) (MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PrivateHandle, &GCSettings)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_GCSettings(Microsoft_Windows_Redhawk_GC_PrivateHandle, &GCSettings, SegmentSize, LargeObjectSegmentSize, ServerGC) : 0 + +#define FireEtwPinPlugAtGCTime(PlugStart, PlugEnd, GapBeforeSize, ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PrivateHandle, &PinPlugAtGCTime)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_PinPlugAtGCTime(Microsoft_Windows_Redhawk_GC_PrivateHandle, &PinPlugAtGCTime, PlugStart, PlugEnd, GapBeforeSize, ClrInstanceID) : 0 + +#define FireEtwPrvDestroyGCHandle(HandleID, ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PrivateHandle, &PrvDestroyGCHandle)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_PrvDestroyGCHandle(Microsoft_Windows_Redhawk_GC_PrivateHandle, &PrvDestroyGCHandle, HandleID, ClrInstanceID) : 0 + +#define FireEtwPrvGCMarkCards_V1(HeapNum, ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PrivateHandle, &PrvGCMarkCards_V1)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_PrvGCMark_V1(Microsoft_Windows_Redhawk_GC_PrivateHandle, &PrvGCMarkCards_V1, HeapNum, ClrInstanceID) : 0 + +#define FireEtwPrvGCMarkFinalizeQueueRoots_V1(HeapNum, ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PrivateHandle, &PrvGCMarkFinalizeQueueRoots_V1)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_PrvGCMark_V1(Microsoft_Windows_Redhawk_GC_PrivateHandle, &PrvGCMarkFinalizeQueueRoots_V1, HeapNum, ClrInstanceID) : 0 + +#define FireEtwPrvGCMarkHandles_V1(HeapNum, ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PrivateHandle, &PrvGCMarkHandles_V1)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_PrvGCMark_V1(Microsoft_Windows_Redhawk_GC_PrivateHandle, &PrvGCMarkHandles_V1, HeapNum, ClrInstanceID) : 0 + +#define FireEtwPrvGCMarkStackRoots_V1(HeapNum, ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PrivateHandle, &PrvGCMarkStackRoots_V1)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_PrvGCMark_V1(Microsoft_Windows_Redhawk_GC_PrivateHandle, &PrvGCMarkStackRoots_V1, HeapNum, ClrInstanceID) : 0 + +#define FireEtwPrvSetGCHandle(HandleID, ObjectID, Kind, Generation, AppDomainID, ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PrivateHandle, &PrvSetGCHandle)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_PrvSetGCHandle(Microsoft_Windows_Redhawk_GC_PrivateHandle, &PrvSetGCHandle, HandleID, ObjectID, Kind, Generation, AppDomainID, ClrInstanceID) : 0 + +RH_ETW_INLINE UInt32 +Template_MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_BGCAllocWait(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, UInt32 Reason, UInt16 ClrInstanceID) +{ + EVENT_DATA_DESCRIPTOR EventData[2]; + EventDataDescCreate(&EventData[0], &Reason, sizeof(UInt32)); + EventDataDescCreate(&EventData[1], &ClrInstanceID, sizeof(UInt16)); + return PalEventWrite(RegHandle, Descriptor, 2, EventData); +} + +RH_ETW_INLINE UInt32 +Template_MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_BGCDrainMark(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, UInt64 Objects, UInt16 ClrInstanceID) +{ + EVENT_DATA_DESCRIPTOR EventData[2]; + EventDataDescCreate(&EventData[0], &Objects, sizeof(UInt64)); + EventDataDescCreate(&EventData[1], &ClrInstanceID, sizeof(UInt16)); + return PalEventWrite(RegHandle, Descriptor, 2, EventData); +} + +RH_ETW_INLINE UInt32 +Template_MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_BGCOverflow(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, UInt64 Min, UInt64 Max, UInt64 Objects, UInt32 IsLarge, UInt16 ClrInstanceID) +{ + EVENT_DATA_DESCRIPTOR EventData[5]; + EventDataDescCreate(&EventData[0], &Min, sizeof(UInt64)); + EventDataDescCreate(&EventData[1], &Max, sizeof(UInt64)); + EventDataDescCreate(&EventData[2], &Objects, sizeof(UInt64)); + EventDataDescCreate(&EventData[3], &IsLarge, sizeof(UInt32)); + EventDataDescCreate(&EventData[4], &ClrInstanceID, sizeof(UInt16)); + return PalEventWrite(RegHandle, Descriptor, 5, EventData); +} + +RH_ETW_INLINE UInt32 +Template_MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_BGCRevisit(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, UInt64 Pages, UInt64 Objects, UInt32 IsLarge, UInt16 ClrInstanceID) +{ + EVENT_DATA_DESCRIPTOR EventData[4]; + EventDataDescCreate(&EventData[0], &Pages, sizeof(UInt64)); + EventDataDescCreate(&EventData[1], &Objects, sizeof(UInt64)); + EventDataDescCreate(&EventData[2], &IsLarge, sizeof(UInt32)); + EventDataDescCreate(&EventData[3], &ClrInstanceID, sizeof(UInt16)); + return PalEventWrite(RegHandle, Descriptor, 4, EventData); +} + +RH_ETW_INLINE UInt32 +Template_MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_GCFullNotify_V1(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, UInt32 GenNumber, UInt32 IsAlloc, UInt16 ClrInstanceID) +{ + EVENT_DATA_DESCRIPTOR EventData[3]; + EventDataDescCreate(&EventData[0], &GenNumber, sizeof(UInt32)); + EventDataDescCreate(&EventData[1], &IsAlloc, sizeof(UInt32)); + EventDataDescCreate(&EventData[2], &ClrInstanceID, sizeof(UInt16)); + return PalEventWrite(RegHandle, Descriptor, 3, EventData); +} + +RH_ETW_INLINE UInt32 +Template_MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_GCGlobalHeap_V1(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, UInt64 FinalYoungestDesired, Int32 NumHeaps, UInt32 CondemnedGeneration, UInt32 Gen0ReductionCount, UInt32 Reason, UInt32 GlobalMechanisms, UInt16 ClrInstanceID) +{ + EVENT_DATA_DESCRIPTOR EventData[7]; + EventDataDescCreate(&EventData[0], &FinalYoungestDesired, sizeof(UInt64)); + EventDataDescCreate(&EventData[1], &NumHeaps, sizeof(Int32)); + EventDataDescCreate(&EventData[2], &CondemnedGeneration, sizeof(UInt32)); + EventDataDescCreate(&EventData[3], &Gen0ReductionCount, sizeof(UInt32)); + EventDataDescCreate(&EventData[4], &Reason, sizeof(UInt32)); + EventDataDescCreate(&EventData[5], &GlobalMechanisms, sizeof(UInt32)); + EventDataDescCreate(&EventData[6], &ClrInstanceID, sizeof(UInt16)); + return PalEventWrite(RegHandle, Descriptor, 7, EventData); +} + +RH_ETW_INLINE UInt32 +Template_MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_GCJoin_V1(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, UInt32 Heap, UInt32 JoinTime, UInt32 JoinType, UInt16 ClrInstanceID) +{ + EVENT_DATA_DESCRIPTOR EventData[4]; + EventDataDescCreate(&EventData[0], &Heap, sizeof(UInt32)); + EventDataDescCreate(&EventData[1], &JoinTime, sizeof(UInt32)); + EventDataDescCreate(&EventData[2], &JoinType, sizeof(UInt32)); + EventDataDescCreate(&EventData[3], &ClrInstanceID, sizeof(UInt16)); + return PalEventWrite(RegHandle, Descriptor, 4, EventData); +} + +RH_ETW_INLINE UInt32 +Template_MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_GCNoUserData(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, UInt16 ClrInstanceID) +{ + EVENT_DATA_DESCRIPTOR EventData[1]; + EventDataDescCreate(&EventData[0], &ClrInstanceID, sizeof(UInt16)); + return PalEventWrite(RegHandle, Descriptor, 1, EventData); +} + +RH_ETW_INLINE UInt32 +Template_MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_GCOptimized_V1(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, UInt64 DesiredAllocation, UInt64 NewAllocation, UInt32 GenerationNumber, UInt16 ClrInstanceID) +{ + EVENT_DATA_DESCRIPTOR EventData[4]; + EventDataDescCreate(&EventData[0], &DesiredAllocation, sizeof(UInt64)); + EventDataDescCreate(&EventData[1], &NewAllocation, sizeof(UInt64)); + EventDataDescCreate(&EventData[2], &GenerationNumber, sizeof(UInt32)); + EventDataDescCreate(&EventData[3], &ClrInstanceID, sizeof(UInt16)); + return PalEventWrite(RegHandle, Descriptor, 4, EventData); +} + +RH_ETW_INLINE UInt32 +Template_MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_GCSettings(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, UInt64 SegmentSize, UInt64 LargeObjectSegmentSize, UInt32_BOOL ServerGC) +{ + EVENT_DATA_DESCRIPTOR EventData[3]; + EventDataDescCreate(&EventData[0], &SegmentSize, sizeof(UInt64)); + EventDataDescCreate(&EventData[1], &LargeObjectSegmentSize, sizeof(UInt64)); + EventDataDescCreate(&EventData[2], &ServerGC, sizeof(UInt32_BOOL)); + return PalEventWrite(RegHandle, Descriptor, 3, EventData); +} + +RH_ETW_INLINE UInt32 +Template_MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_PinPlugAtGCTime(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, void* PlugStart, void* PlugEnd, void* GapBeforeSize, UInt16 ClrInstanceID) +{ + EVENT_DATA_DESCRIPTOR EventData[4]; + EventDataDescCreate(&EventData[0], &PlugStart, sizeof(void*)); + EventDataDescCreate(&EventData[1], &PlugEnd, sizeof(void*)); + EventDataDescCreate(&EventData[2], &GapBeforeSize, sizeof(void*)); + EventDataDescCreate(&EventData[3], &ClrInstanceID, sizeof(UInt16)); + return PalEventWrite(RegHandle, Descriptor, 4, EventData); +} + +RH_ETW_INLINE UInt32 +Template_MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_PrvDestroyGCHandle(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, void* HandleID, UInt16 ClrInstanceID) +{ + EVENT_DATA_DESCRIPTOR EventData[2]; + EventDataDescCreate(&EventData[0], &HandleID, sizeof(void*)); + EventDataDescCreate(&EventData[1], &ClrInstanceID, sizeof(UInt16)); + return PalEventWrite(RegHandle, Descriptor, 2, EventData); +} + +RH_ETW_INLINE UInt32 +Template_MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_PrvGCMark_V1(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, UInt32 HeapNum, UInt16 ClrInstanceID) +{ + EVENT_DATA_DESCRIPTOR EventData[2]; + EventDataDescCreate(&EventData[0], &HeapNum, sizeof(UInt32)); + EventDataDescCreate(&EventData[1], &ClrInstanceID, sizeof(UInt16)); + return PalEventWrite(RegHandle, Descriptor, 2, EventData); +} + +RH_ETW_INLINE UInt32 +Template_MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_PrvSetGCHandle(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, void* HandleID, void* ObjectID, UInt32 Kind, UInt32 Generation, UInt64 AppDomainID, UInt16 ClrInstanceID) +{ + EVENT_DATA_DESCRIPTOR EventData[6]; + EventDataDescCreate(&EventData[0], &HandleID, sizeof(void*)); + EventDataDescCreate(&EventData[1], &ObjectID, sizeof(void*)); + EventDataDescCreate(&EventData[2], &Kind, sizeof(UInt32)); + EventDataDescCreate(&EventData[3], &Generation, sizeof(UInt32)); + EventDataDescCreate(&EventData[4], &AppDomainID, sizeof(UInt64)); + EventDataDescCreate(&EventData[5], &ClrInstanceID, sizeof(UInt16)); + return PalEventWrite(RegHandle, Descriptor, 6, EventData); +} + +extern "C" __declspec(selectany) const GUID MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER = {0x47c3ba0c, 0x77f1, 0x4eb0, {0x8d, 0x4d, 0xae, 0xf4, 0x47, 0xf1, 0x6a, 0x85}}; + +extern "C" __declspec(selectany) const EVENT_DESCRIPTOR BulkType = {0xf, 0x0, 0x10, 0x4, 0xa, 0x15, 0x8000000000080000}; +extern "C" __declspec(selectany) const EVENT_DESCRIPTOR DestroyGCHandle = {0x1f, 0x0, 0x10, 0x4, 0x22, 0x1, 0x8000000000000002}; +extern "C" __declspec(selectany) const EVENT_DESCRIPTOR ExceptionThrown_V1 = {0x50, 0x1, 0x10, 0x2, 0x1, 0x7, 0x8000000200008000}; +extern "C" __declspec(selectany) const EVENT_DESCRIPTOR GCAllocationTick_V1 = {0xa, 0x1, 0x10, 0x5, 0xb, 0x1, 0x8000000000000001}; +extern "C" __declspec(selectany) const EVENT_DESCRIPTOR GCAllocationTick_V2 = {0xa, 0x2, 0x10, 0x5, 0xb, 0x1, 0x8000000000000001}; +extern "C" __declspec(selectany) const EVENT_DESCRIPTOR GCAllocationTick_V3 = {0xa, 0x3, 0x10, 0x5, 0xb, 0x1, 0x8000000000000001}; +extern "C" __declspec(selectany) const EVENT_DESCRIPTOR GCBulkEdge = {0x13, 0x0, 0x10, 0x4, 0x17, 0x1, 0x8000000000100000}; +extern "C" __declspec(selectany) const EVENT_DESCRIPTOR GCBulkMovedObjectRanges = {0x16, 0x0, 0x10, 0x4, 0x1a, 0x1, 0x8000000000400000}; +extern "C" __declspec(selectany) const EVENT_DESCRIPTOR GCBulkNode = {0x12, 0x0, 0x10, 0x4, 0x16, 0x1, 0x8000000000100000}; +extern "C" __declspec(selectany) const EVENT_DESCRIPTOR GCBulkRCW = {0x25, 0x0, 0x10, 0x4, 0x27, 0x1, 0x8000000000100000}; +extern "C" __declspec(selectany) const EVENT_DESCRIPTOR GCBulkRootCCW = {0x24, 0x0, 0x10, 0x4, 0x26, 0x1, 0x8000000000100000}; +extern "C" __declspec(selectany) const EVENT_DESCRIPTOR GCBulkRootConditionalWeakTableElementEdge = {0x11, 0x0, 0x10, 0x4, 0x15, 0x1, 0x8000000000100000}; +extern "C" __declspec(selectany) const EVENT_DESCRIPTOR GCBulkRootEdge = {0x10, 0x0, 0x10, 0x4, 0x14, 0x1, 0x8000000000100000}; +extern "C" __declspec(selectany) const EVENT_DESCRIPTOR GCBulkSurvivingObjectRanges = {0x15, 0x0, 0x10, 0x4, 0x19, 0x1, 0x8000000000400000}; +extern "C" __declspec(selectany) const EVENT_DESCRIPTOR GCCreateConcurrentThread_V1 = {0xb, 0x1, 0x10, 0x4, 0xc, 0x1, 0x8000000000010001}; +extern "C" __declspec(selectany) const EVENT_DESCRIPTOR GCCreateSegment_V1 = {0x5, 0x1, 0x10, 0x4, 0x86, 0x1, 0x8000000000000001}; +extern "C" __declspec(selectany) const EVENT_DESCRIPTOR GCEnd_V1 = {0x2, 0x1, 0x10, 0x4, 0x2, 0x1, 0x8000000000000001}; +extern "C" __declspec(selectany) const EVENT_DESCRIPTOR GCFreeSegment_V1 = {0x6, 0x1, 0x10, 0x4, 0x87, 0x1, 0x8000000000000001}; +extern "C" __declspec(selectany) const EVENT_DESCRIPTOR GCGenerationRange = {0x17, 0x0, 0x10, 0x4, 0x1b, 0x1, 0x8000000000400000}; +extern "C" __declspec(selectany) const EVENT_DESCRIPTOR GCGlobalHeapHistory_V2 = {0xcd, 0x2, 0x10, 0x4, 0xcd, 0x1, 0x8000000000000001}; +extern "C" __declspec(selectany) const EVENT_DESCRIPTOR GCHeapStats_V1 = {0x4, 0x1, 0x10, 0x4, 0x85, 0x1, 0x8000000000000001}; +extern "C" __declspec(selectany) const EVENT_DESCRIPTOR GCJoin_V2 = {0xcb, 0x2, 0x10, 0x5, 0xcb, 0x1, 0x8000000000000001}; +extern "C" __declspec(selectany) const EVENT_DESCRIPTOR GCMarkFinalizeQueueRoots = {0x1a, 0x0, 0x10, 0x4, 0x1d, 0x1, 0x8000000000000001}; +extern "C" __declspec(selectany) const EVENT_DESCRIPTOR GCMarkHandles = {0x1b, 0x0, 0x10, 0x4, 0x1e, 0x1, 0x8000000000000001}; +extern "C" __declspec(selectany) const EVENT_DESCRIPTOR GCMarkOlderGenerationRoots = {0x1c, 0x0, 0x10, 0x4, 0x1f, 0x1, 0x8000000000000001}; +extern "C" __declspec(selectany) const EVENT_DESCRIPTOR GCMarkStackRoots = {0x19, 0x0, 0x10, 0x4, 0x1c, 0x1, 0x8000000000000001}; +extern "C" __declspec(selectany) const EVENT_DESCRIPTOR GCMarkWithType = {0xca, 0x0, 0x10, 0x4, 0xca, 0x1, 0x8000000000000001}; +extern "C" __declspec(selectany) const EVENT_DESCRIPTOR GCPerHeapHistory_V3 = {0xcc, 0x3, 0x10, 0x4, 0xcc, 0x1, 0x8000000000000001}; +extern "C" __declspec(selectany) const EVENT_DESCRIPTOR GCRestartEEBegin_V1 = {0x7, 0x1, 0x10, 0x4, 0x88, 0x1, 0x8000000000000001}; +extern "C" __declspec(selectany) const EVENT_DESCRIPTOR GCRestartEEEnd_V1 = {0x3, 0x1, 0x10, 0x4, 0x84, 0x1, 0x8000000000000001}; +extern "C" __declspec(selectany) const EVENT_DESCRIPTOR GCStart_V1 = {0x1, 0x1, 0x10, 0x4, 0x1, 0x1, 0x8000000000000001}; +extern "C" __declspec(selectany) const EVENT_DESCRIPTOR GCStart_V2 = {0x1, 0x2, 0x10, 0x4, 0x1, 0x1, 0x8000000000000001}; +extern "C" __declspec(selectany) const EVENT_DESCRIPTOR GCSuspendEEBegin_V1 = {0x9, 0x1, 0x10, 0x4, 0xa, 0x1, 0x8000000000000001}; +extern "C" __declspec(selectany) const EVENT_DESCRIPTOR GCSuspendEEEnd_V1 = {0x8, 0x1, 0x10, 0x4, 0x89, 0x1, 0x8000000000000001}; +extern "C" __declspec(selectany) const EVENT_DESCRIPTOR GCTerminateConcurrentThread_V1 = {0xc, 0x1, 0x10, 0x4, 0xd, 0x1, 0x8000000000010001}; +extern "C" __declspec(selectany) const EVENT_DESCRIPTOR GCTriggered = {0x23, 0x0, 0x10, 0x4, 0x23, 0x1, 0x8000000000000001}; +extern "C" __declspec(selectany) const EVENT_DESCRIPTOR ModuleLoad_V2 = {0x98, 0x2, 0x10, 0x4, 0x21, 0xa, 0x8000000020000008}; +extern "C" __declspec(selectany) const EVENT_DESCRIPTOR SetGCHandle = {0x1e, 0x0, 0x10, 0x4, 0x21, 0x1, 0x8000000000000002}; + +extern "C" __declspec(selectany) REGHANDLE Microsoft_Windows_Redhawk_GC_PublicHandle; +extern "C" __declspec(selectany) RH_ETW_CONTEXT MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Context; + +#define RH_ETW_REGISTER_Microsoft_Windows_Redhawk_GC_Public() do { PalEventRegister(&MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER, RhEtwControlCallback, &MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Context, &Microsoft_Windows_Redhawk_GC_PublicHandle); } while (false) +#define RH_ETW_UNREGISTER_Microsoft_Windows_Redhawk_GC_Public() do { PalEventUnregister(Microsoft_Windows_Redhawk_GC_PublicHandle); } while (false) + +#define FireEtwBulkType(Count, ClrInstanceID, Values_Len_, Values) (MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PublicHandle, &BulkType)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_BulkType(Microsoft_Windows_Redhawk_GC_PublicHandle, &BulkType, Count, ClrInstanceID, Values_Len_, Values) : 0 + +#define FireEtwDestroyGCHandle(HandleID, ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PublicHandle, &DestroyGCHandle)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_DestroyGCHandle(Microsoft_Windows_Redhawk_GC_PublicHandle, &DestroyGCHandle, HandleID, ClrInstanceID) : 0 + +#define FireEtwExceptionThrown_V1(ExceptionType, ExceptionMessage, ExceptionEIP, ExceptionHRESULT, ExceptionFlags, ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PublicHandle, &ExceptionThrown_V1)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Exception(Microsoft_Windows_Redhawk_GC_PublicHandle, &ExceptionThrown_V1, ExceptionType, ExceptionMessage, ExceptionEIP, ExceptionHRESULT, ExceptionFlags, ClrInstanceID) : 0 + +#define FireEtwGCAllocationTick_V1(AllocationAmount, AllocationKind, ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCAllocationTick_V1)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCAllocationTick_V1(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCAllocationTick_V1, AllocationAmount, AllocationKind, ClrInstanceID) : 0 + +#define FireEtwGCAllocationTick_V2(AllocationAmount, AllocationKind, ClrInstanceID, AllocationAmount64, TypeID, TypeName, HeapIndex) (MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCAllocationTick_V2)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCAllocationTick_V2(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCAllocationTick_V2, AllocationAmount, AllocationKind, ClrInstanceID, AllocationAmount64, TypeID, TypeName, HeapIndex) : 0 + +#define FireEtwGCAllocationTick_V3(AllocationAmount, AllocationKind, ClrInstanceID, AllocationAmount64, TypeID, TypeName, HeapIndex, Address) (MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCAllocationTick_V3)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCAllocationTick_V3(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCAllocationTick_V3, AllocationAmount, AllocationKind, ClrInstanceID, AllocationAmount64, TypeID, TypeName, HeapIndex, Address) : 0 + +#define FireEtwGCBulkEdge(Index, Count, ClrInstanceID, Values_Len_, Values) (MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCBulkEdge)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCBulkEdge(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCBulkEdge, Index, Count, ClrInstanceID, Values_Len_, Values) : 0 + +#define FireEtwGCBulkMovedObjectRanges(Index, Count, ClrInstanceID, Values_Len_, Values) (MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCBulkMovedObjectRanges)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCBulkMovedObjectRanges(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCBulkMovedObjectRanges, Index, Count, ClrInstanceID, Values_Len_, Values) : 0 + +#define FireEtwGCBulkNode(Index, Count, ClrInstanceID, Values_Len_, Values) (MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCBulkNode)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCBulkNode(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCBulkNode, Index, Count, ClrInstanceID, Values_Len_, Values) : 0 + +#define FireEtwGCBulkRCW(Count, ClrInstanceID, Values_Len_, Values) (MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCBulkRCW)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCBulkRCW(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCBulkRCW, Count, ClrInstanceID, Values_Len_, Values) : 0 + +#define FireEtwGCBulkRootCCW(Count, ClrInstanceID, Values_Len_, Values) (MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCBulkRootCCW)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCBulkRootCCW(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCBulkRootCCW, Count, ClrInstanceID, Values_Len_, Values) : 0 + +#define FireEtwGCBulkRootConditionalWeakTableElementEdge(Index, Count, ClrInstanceID, Values_Len_, Values) (MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCBulkRootConditionalWeakTableElementEdge)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCBulkRootConditionalWeakTableElementEdge(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCBulkRootConditionalWeakTableElementEdge, Index, Count, ClrInstanceID, Values_Len_, Values) : 0 + +#define FireEtwGCBulkRootEdge(Index, Count, ClrInstanceID, Values_Len_, Values) (MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCBulkRootEdge)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCBulkRootEdge(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCBulkRootEdge, Index, Count, ClrInstanceID, Values_Len_, Values) : 0 + +#define FireEtwGCBulkSurvivingObjectRanges(Index, Count, ClrInstanceID, Values_Len_, Values) (MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCBulkSurvivingObjectRanges)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCBulkSurvivingObjectRanges(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCBulkSurvivingObjectRanges, Index, Count, ClrInstanceID, Values_Len_, Values) : 0 + +#define FireEtwGCCreateConcurrentThread_V1(ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCCreateConcurrentThread_V1)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCCreateConcurrentThread(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCCreateConcurrentThread_V1, ClrInstanceID) : 0 + +#define FireEtwGCCreateSegment_V1(Address, Size, Type, ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCCreateSegment_V1)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCCreateSegment_V1(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCCreateSegment_V1, Address, Size, Type, ClrInstanceID) : 0 + +#define FireEtwGCEnd_V1(Count, Depth, ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCEnd_V1)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCEnd_V1(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCEnd_V1, Count, Depth, ClrInstanceID) : 0 + +#define FireEtwGCFreeSegment_V1(Address, ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCFreeSegment_V1)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCFreeSegment_V1(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCFreeSegment_V1, Address, ClrInstanceID) : 0 + +#define FireEtwGCGenerationRange(Generation, RangeStart, RangeUsedLength, RangeReservedLength, ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCGenerationRange)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCGenerationRange(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCGenerationRange, Generation, RangeStart, RangeUsedLength, RangeReservedLength, ClrInstanceID) : 0 + +#define FireEtwGCGlobalHeapHistory_V2(FinalYoungestDesired, NumHeaps, CondemnedGeneration, Gen0ReductionCount, Reason, GlobalMechanisms, ClrInstanceID, PauseMode, MemoryPressure) (MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCGlobalHeapHistory_V2)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCGlobalHeap_V2(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCGlobalHeapHistory_V2, FinalYoungestDesired, NumHeaps, CondemnedGeneration, Gen0ReductionCount, Reason, GlobalMechanisms, ClrInstanceID, PauseMode, MemoryPressure) : 0 + +#define FireEtwGCHeapStats_V1(GenerationSize0, TotalPromotedSize0, GenerationSize1, TotalPromotedSize1, GenerationSize2, TotalPromotedSize2, GenerationSize3, TotalPromotedSize3, FinalizationPromotedSize, FinalizationPromotedCount, PinnedObjectCount, SinkBlockCount, GCHandleCount, ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCHeapStats_V1)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCHeapStats_V1(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCHeapStats_V1, GenerationSize0, TotalPromotedSize0, GenerationSize1, TotalPromotedSize1, GenerationSize2, TotalPromotedSize2, GenerationSize3, TotalPromotedSize3, FinalizationPromotedSize, FinalizationPromotedCount, PinnedObjectCount, SinkBlockCount, GCHandleCount, ClrInstanceID) : 0 + +#define FireEtwGCJoin_V2(Heap, JoinTime, JoinType, ClrInstanceID, JoinID) (MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCJoin_V2)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCJoin_V2(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCJoin_V2, Heap, JoinTime, JoinType, ClrInstanceID, JoinID) : 0 + +#define FireEtwGCMarkFinalizeQueueRoots(HeapNum, ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCMarkFinalizeQueueRoots)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCMark(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCMarkFinalizeQueueRoots, HeapNum, ClrInstanceID) : 0 + +#define FireEtwGCMarkHandles(HeapNum, ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCMarkHandles)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCMark(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCMarkHandles, HeapNum, ClrInstanceID) : 0 + +#define FireEtwGCMarkOlderGenerationRoots(HeapNum, ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCMarkOlderGenerationRoots)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCMark(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCMarkOlderGenerationRoots, HeapNum, ClrInstanceID) : 0 + +#define FireEtwGCMarkStackRoots(HeapNum, ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCMarkStackRoots)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCMark(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCMarkStackRoots, HeapNum, ClrInstanceID) : 0 + +#define FireEtwGCMarkWithType(HeapNum, ClrInstanceID, Type, Bytes) (MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCMarkWithType)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCMarkWithType(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCMarkWithType, HeapNum, ClrInstanceID, Type, Bytes) : 0 + +#define FireEtwGCPerHeapHistory_V3(ClrInstanceID, FreeListAllocated, FreeListRejected, EndOfSegAllocated, CondemnedAllocated, PinnedAllocated, PinnedAllocatedAdvance, RunningFreeListEfficiency, CondemnReasons0, CondemnReasons1, CompactMechanisms, ExpandMechanisms, HeapIndex, ExtraGen0Commit, Count, Values_Len_, Values) (MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCPerHeapHistory_V3)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCPerHeapHistory_V3(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCPerHeapHistory_V3, ClrInstanceID, FreeListAllocated, FreeListRejected, EndOfSegAllocated, CondemnedAllocated, PinnedAllocated, PinnedAllocatedAdvance, RunningFreeListEfficiency, CondemnReasons0, CondemnReasons1, CompactMechanisms, ExpandMechanisms, HeapIndex, ExtraGen0Commit, Count, Values_Len_, Values) : 0 + +#define FireEtwGCRestartEEBegin_V1(ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCRestartEEBegin_V1)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCNoUserData(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCRestartEEBegin_V1, ClrInstanceID) : 0 + +#define FireEtwGCRestartEEEnd_V1(ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCRestartEEEnd_V1)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCNoUserData(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCRestartEEEnd_V1, ClrInstanceID) : 0 + +#define FireEtwGCStart_V1(Count, Depth, Reason, Type, ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCStart_V1)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCStart_V1(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCStart_V1, Count, Depth, Reason, Type, ClrInstanceID) : 0 + +#define FireEtwGCStart_V2(Count, Depth, Reason, Type, ClrInstanceID, ClientSequenceNumber) (MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCStart_V2)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCStart_V2(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCStart_V2, Count, Depth, Reason, Type, ClrInstanceID, ClientSequenceNumber) : 0 + +#define FireEtwGCSuspendEEBegin_V1(Reason, Count, ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCSuspendEEBegin_V1)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCSuspendEE_V1(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCSuspendEEBegin_V1, Reason, Count, ClrInstanceID) : 0 + +#define FireEtwGCSuspendEEEnd_V1(ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCSuspendEEEnd_V1)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCNoUserData(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCSuspendEEEnd_V1, ClrInstanceID) : 0 + +#define FireEtwGCTerminateConcurrentThread_V1(ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCTerminateConcurrentThread_V1)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCTerminateConcurrentThread(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCTerminateConcurrentThread_V1, ClrInstanceID) : 0 + +#define FireEtwGCTriggered(Reason, ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCTriggered)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCTriggered(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCTriggered, Reason, ClrInstanceID) : 0 + +#define FireEtwModuleLoad_V2(ModuleID, AssemblyID, ModuleFlags, Reserved1, ModuleILPath, ModuleNativePath, ClrInstanceID, ManagedPdbSignature, ManagedPdbAge, ManagedPdbBuildPath, NativePdbSignature, NativePdbAge, NativePdbBuildPath) (MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PublicHandle, &ModuleLoad_V2)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_ModuleLoadUnload_V2(Microsoft_Windows_Redhawk_GC_PublicHandle, &ModuleLoad_V2, ModuleID, AssemblyID, ModuleFlags, Reserved1, ModuleILPath, ModuleNativePath, ClrInstanceID, ManagedPdbSignature, ManagedPdbAge, ManagedPdbBuildPath, NativePdbSignature, NativePdbAge, NativePdbBuildPath) : 0 + +#define FireEtwSetGCHandle(HandleID, ObjectID, Kind, Generation, AppDomainID, ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PublicHandle, &SetGCHandle)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_SetGCHandle(Microsoft_Windows_Redhawk_GC_PublicHandle, &SetGCHandle, HandleID, ObjectID, Kind, Generation, AppDomainID, ClrInstanceID) : 0 + +RH_ETW_INLINE UInt32 +Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_BulkType(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, UInt32 Count, UInt16 ClrInstanceID, ULONG Values_Len_, const PVOID Values) +{ + EVENT_DATA_DESCRIPTOR EventData[11]; + EventDataDescCreate(&EventData[0], &Count, sizeof(UInt32)); + EventDataDescCreate(&EventData[1], &ClrInstanceID, sizeof(UInt16)); + EventDataDescCreate(&EventData[2], Values, Count * Values_Len_); + return PalEventWrite(RegHandle, Descriptor, 3, EventData); +} + +RH_ETW_INLINE UInt32 +Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_DestroyGCHandle(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, void* HandleID, UInt16 ClrInstanceID) +{ + EVENT_DATA_DESCRIPTOR EventData[2]; + EventDataDescCreate(&EventData[0], &HandleID, sizeof(void*)); + EventDataDescCreate(&EventData[1], &ClrInstanceID, sizeof(UInt16)); + return PalEventWrite(RegHandle, Descriptor, 2, EventData); +} + +RH_ETW_INLINE UInt32 +Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Exception(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, LPCWSTR ExceptionType, LPCWSTR ExceptionMessage, void* ExceptionEIP, UInt32 ExceptionHRESULT, UInt16 ExceptionFlags, UInt16 ClrInstanceID) +{ + EVENT_DATA_DESCRIPTOR EventData[6]; + EventDataDescCreate(&EventData[0], (ExceptionType != NULL) ? ExceptionType : L"", (ExceptionType != NULL) ? (ULONG)((wcslen(ExceptionType) + 1) * sizeof(WCHAR)) : (ULONG)sizeof(L"")); + EventDataDescCreate(&EventData[1], (ExceptionMessage != NULL) ? ExceptionMessage : L"", (ExceptionMessage != NULL) ? (ULONG)((wcslen(ExceptionMessage) + 1) * sizeof(WCHAR)) : (ULONG)sizeof(L"")); + EventDataDescCreate(&EventData[2], &ExceptionEIP, sizeof(void*)); + EventDataDescCreate(&EventData[3], &ExceptionHRESULT, sizeof(UInt32)); + EventDataDescCreate(&EventData[4], &ExceptionFlags, sizeof(UInt16)); + EventDataDescCreate(&EventData[5], &ClrInstanceID, sizeof(UInt16)); + return PalEventWrite(RegHandle, Descriptor, 6, EventData); +} + +RH_ETW_INLINE UInt32 +Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCAllocationTick_V1(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, UInt32 AllocationAmount, UInt32 AllocationKind, UInt16 ClrInstanceID) +{ + EVENT_DATA_DESCRIPTOR EventData[3]; + EventDataDescCreate(&EventData[0], &AllocationAmount, sizeof(UInt32)); + EventDataDescCreate(&EventData[1], &AllocationKind, sizeof(UInt32)); + EventDataDescCreate(&EventData[2], &ClrInstanceID, sizeof(UInt16)); + return PalEventWrite(RegHandle, Descriptor, 3, EventData); +} + +RH_ETW_INLINE UInt32 +Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCAllocationTick_V2(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, UInt32 AllocationAmount, UInt32 AllocationKind, UInt16 ClrInstanceID, UInt64 AllocationAmount64, void* TypeID, LPCWSTR TypeName, UInt32 HeapIndex) +{ + EVENT_DATA_DESCRIPTOR EventData[7]; + EventDataDescCreate(&EventData[0], &AllocationAmount, sizeof(UInt32)); + EventDataDescCreate(&EventData[1], &AllocationKind, sizeof(UInt32)); + EventDataDescCreate(&EventData[2], &ClrInstanceID, sizeof(UInt16)); + EventDataDescCreate(&EventData[3], &AllocationAmount64, sizeof(UInt64)); + EventDataDescCreate(&EventData[4], &TypeID, sizeof(void*)); + EventDataDescCreate(&EventData[5], (TypeName != NULL) ? TypeName : L"", (TypeName != NULL) ? (ULONG)((wcslen(TypeName) + 1) * sizeof(WCHAR)) : (ULONG)sizeof(L"")); + EventDataDescCreate(&EventData[6], &HeapIndex, sizeof(UInt32)); + return PalEventWrite(RegHandle, Descriptor, 7, EventData); +} + +RH_ETW_INLINE UInt32 +Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCAllocationTick_V3(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, UInt32 AllocationAmount, UInt32 AllocationKind, UInt16 ClrInstanceID, UInt64 AllocationAmount64, void* TypeID, LPCWSTR TypeName, UInt32 HeapIndex, void* Address) +{ + EVENT_DATA_DESCRIPTOR EventData[8]; + EventDataDescCreate(&EventData[0], &AllocationAmount, sizeof(UInt32)); + EventDataDescCreate(&EventData[1], &AllocationKind, sizeof(UInt32)); + EventDataDescCreate(&EventData[2], &ClrInstanceID, sizeof(UInt16)); + EventDataDescCreate(&EventData[3], &AllocationAmount64, sizeof(UInt64)); + EventDataDescCreate(&EventData[4], &TypeID, sizeof(void*)); + EventDataDescCreate(&EventData[5], (TypeName != NULL) ? TypeName : L"", (TypeName != NULL) ? (ULONG)((wcslen(TypeName) + 1) * sizeof(WCHAR)) : (ULONG)sizeof(L"")); + EventDataDescCreate(&EventData[6], &HeapIndex, sizeof(UInt32)); + EventDataDescCreate(&EventData[7], &Address, sizeof(void*)); + return PalEventWrite(RegHandle, Descriptor, 8, EventData); +} + +RH_ETW_INLINE UInt32 +Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCBulkEdge(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, UInt32 Index, UInt32 Count, UInt16 ClrInstanceID, ULONG Values_Len_, const PVOID Values) +{ + EVENT_DATA_DESCRIPTOR EventData[6]; + EventDataDescCreate(&EventData[0], &Index, sizeof(UInt32)); + EventDataDescCreate(&EventData[1], &Count, sizeof(UInt32)); + EventDataDescCreate(&EventData[2], &ClrInstanceID, sizeof(UInt16)); + EventDataDescCreate(&EventData[3], Values, Count * Values_Len_); + return PalEventWrite(RegHandle, Descriptor, 4, EventData); +} + +RH_ETW_INLINE UInt32 +Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCBulkMovedObjectRanges(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, UInt32 Index, UInt32 Count, UInt16 ClrInstanceID, ULONG Values_Len_, const PVOID Values) +{ + EVENT_DATA_DESCRIPTOR EventData[7]; + EventDataDescCreate(&EventData[0], &Index, sizeof(UInt32)); + EventDataDescCreate(&EventData[1], &Count, sizeof(UInt32)); + EventDataDescCreate(&EventData[2], &ClrInstanceID, sizeof(UInt16)); + EventDataDescCreate(&EventData[3], Values, Count * Values_Len_); + return PalEventWrite(RegHandle, Descriptor, 4, EventData); +} + +RH_ETW_INLINE UInt32 +Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCBulkNode(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, UInt32 Index, UInt32 Count, UInt16 ClrInstanceID, ULONG Values_Len_, const PVOID Values) +{ + EVENT_DATA_DESCRIPTOR EventData[8]; + EventDataDescCreate(&EventData[0], &Index, sizeof(UInt32)); + EventDataDescCreate(&EventData[1], &Count, sizeof(UInt32)); + EventDataDescCreate(&EventData[2], &ClrInstanceID, sizeof(UInt16)); + EventDataDescCreate(&EventData[3], Values, Count * Values_Len_); + return PalEventWrite(RegHandle, Descriptor, 4, EventData); +} + +RH_ETW_INLINE UInt32 +Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCBulkRCW(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, UInt32 Count, UInt16 ClrInstanceID, ULONG Values_Len_, const PVOID Values) +{ + EVENT_DATA_DESCRIPTOR EventData[9]; + EventDataDescCreate(&EventData[0], &Count, sizeof(UInt32)); + EventDataDescCreate(&EventData[1], &ClrInstanceID, sizeof(UInt16)); + EventDataDescCreate(&EventData[2], Values, Count * Values_Len_); + return PalEventWrite(RegHandle, Descriptor, 3, EventData); +} + +RH_ETW_INLINE UInt32 +Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCBulkRootCCW(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, UInt32 Count, UInt16 ClrInstanceID, ULONG Values_Len_, const PVOID Values) +{ + EVENT_DATA_DESCRIPTOR EventData[10]; + EventDataDescCreate(&EventData[0], &Count, sizeof(UInt32)); + EventDataDescCreate(&EventData[1], &ClrInstanceID, sizeof(UInt16)); + EventDataDescCreate(&EventData[2], Values, Count * Values_Len_); + return PalEventWrite(RegHandle, Descriptor, 3, EventData); +} + +RH_ETW_INLINE UInt32 +Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCBulkRootConditionalWeakTableElementEdge(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, UInt32 Index, UInt32 Count, UInt16 ClrInstanceID, ULONG Values_Len_, const PVOID Values) +{ + EVENT_DATA_DESCRIPTOR EventData[7]; + EventDataDescCreate(&EventData[0], &Index, sizeof(UInt32)); + EventDataDescCreate(&EventData[1], &Count, sizeof(UInt32)); + EventDataDescCreate(&EventData[2], &ClrInstanceID, sizeof(UInt16)); + EventDataDescCreate(&EventData[3], Values, Count * Values_Len_); + return PalEventWrite(RegHandle, Descriptor, 4, EventData); +} + +RH_ETW_INLINE UInt32 +Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCBulkRootEdge(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, UInt32 Index, UInt32 Count, UInt16 ClrInstanceID, ULONG Values_Len_, const PVOID Values) +{ + EVENT_DATA_DESCRIPTOR EventData[8]; + EventDataDescCreate(&EventData[0], &Index, sizeof(UInt32)); + EventDataDescCreate(&EventData[1], &Count, sizeof(UInt32)); + EventDataDescCreate(&EventData[2], &ClrInstanceID, sizeof(UInt16)); + EventDataDescCreate(&EventData[3], Values, Count * Values_Len_); + return PalEventWrite(RegHandle, Descriptor, 4, EventData); +} + +RH_ETW_INLINE UInt32 +Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCBulkSurvivingObjectRanges(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, UInt32 Index, UInt32 Count, UInt16 ClrInstanceID, ULONG Values_Len_, const PVOID Values) +{ + EVENT_DATA_DESCRIPTOR EventData[6]; + EventDataDescCreate(&EventData[0], &Index, sizeof(UInt32)); + EventDataDescCreate(&EventData[1], &Count, sizeof(UInt32)); + EventDataDescCreate(&EventData[2], &ClrInstanceID, sizeof(UInt16)); + EventDataDescCreate(&EventData[3], Values, Count * Values_Len_); + return PalEventWrite(RegHandle, Descriptor, 4, EventData); +} + +RH_ETW_INLINE UInt32 +Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCCreateConcurrentThread(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, UInt16 ClrInstanceID) +{ + EVENT_DATA_DESCRIPTOR EventData[1]; + EventDataDescCreate(&EventData[0], &ClrInstanceID, sizeof(UInt16)); + return PalEventWrite(RegHandle, Descriptor, 1, EventData); +} + +RH_ETW_INLINE UInt32 +Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCCreateSegment_V1(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, UInt64 Address, UInt64 Size, UInt32 Type, UInt16 ClrInstanceID) +{ + EVENT_DATA_DESCRIPTOR EventData[4]; + EventDataDescCreate(&EventData[0], &Address, sizeof(UInt64)); + EventDataDescCreate(&EventData[1], &Size, sizeof(UInt64)); + EventDataDescCreate(&EventData[2], &Type, sizeof(UInt32)); + EventDataDescCreate(&EventData[3], &ClrInstanceID, sizeof(UInt16)); + return PalEventWrite(RegHandle, Descriptor, 4, EventData); +} + +RH_ETW_INLINE UInt32 +Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCEnd_V1(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, UInt32 Count, UInt32 Depth, UInt16 ClrInstanceID) +{ + EVENT_DATA_DESCRIPTOR EventData[3]; + EventDataDescCreate(&EventData[0], &Count, sizeof(UInt32)); + EventDataDescCreate(&EventData[1], &Depth, sizeof(UInt32)); + EventDataDescCreate(&EventData[2], &ClrInstanceID, sizeof(UInt16)); + return PalEventWrite(RegHandle, Descriptor, 3, EventData); +} + +RH_ETW_INLINE UInt32 +Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCFreeSegment_V1(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, UInt64 Address, UInt16 ClrInstanceID) +{ + EVENT_DATA_DESCRIPTOR EventData[2]; + EventDataDescCreate(&EventData[0], &Address, sizeof(UInt64)); + EventDataDescCreate(&EventData[1], &ClrInstanceID, sizeof(UInt16)); + return PalEventWrite(RegHandle, Descriptor, 2, EventData); +} + +RH_ETW_INLINE UInt32 +Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCGenerationRange(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, UInt8 Generation, void* RangeStart, UInt64 RangeUsedLength, UInt64 RangeReservedLength, UInt16 ClrInstanceID) +{ + EVENT_DATA_DESCRIPTOR EventData[5]; + EventDataDescCreate(&EventData[0], &Generation, sizeof(UInt8)); + EventDataDescCreate(&EventData[1], &RangeStart, sizeof(void*)); + EventDataDescCreate(&EventData[2], &RangeUsedLength, sizeof(UInt64)); + EventDataDescCreate(&EventData[3], &RangeReservedLength, sizeof(UInt64)); + EventDataDescCreate(&EventData[4], &ClrInstanceID, sizeof(UInt16)); + return PalEventWrite(RegHandle, Descriptor, 5, EventData); +} + +RH_ETW_INLINE UInt32 +Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCGlobalHeap_V2(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, UInt64 FinalYoungestDesired, Int32 NumHeaps, UInt32 CondemnedGeneration, UInt32 Gen0ReductionCount, UInt32 Reason, UInt32 GlobalMechanisms, UInt16 ClrInstanceID, UInt32 PauseMode, UInt32 MemoryPressure) +{ + EVENT_DATA_DESCRIPTOR EventData[9]; + EventDataDescCreate(&EventData[0], &FinalYoungestDesired, sizeof(UInt64)); + EventDataDescCreate(&EventData[1], &NumHeaps, sizeof(Int32)); + EventDataDescCreate(&EventData[2], &CondemnedGeneration, sizeof(UInt32)); + EventDataDescCreate(&EventData[3], &Gen0ReductionCount, sizeof(UInt32)); + EventDataDescCreate(&EventData[4], &Reason, sizeof(UInt32)); + EventDataDescCreate(&EventData[5], &GlobalMechanisms, sizeof(UInt32)); + EventDataDescCreate(&EventData[6], &ClrInstanceID, sizeof(UInt16)); + EventDataDescCreate(&EventData[7], &PauseMode, sizeof(UInt32)); + EventDataDescCreate(&EventData[8], &MemoryPressure, sizeof(UInt32)); + return PalEventWrite(RegHandle, Descriptor, 9, EventData); +} + +RH_ETW_INLINE UInt32 +Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCHeapStats_V1(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, UInt64 GenerationSize0, UInt64 TotalPromotedSize0, UInt64 GenerationSize1, UInt64 TotalPromotedSize1, UInt64 GenerationSize2, UInt64 TotalPromotedSize2, UInt64 GenerationSize3, UInt64 TotalPromotedSize3, UInt64 FinalizationPromotedSize, UInt64 FinalizationPromotedCount, UInt32 PinnedObjectCount, UInt32 SinkBlockCount, UInt32 GCHandleCount, UInt16 ClrInstanceID) +{ + EVENT_DATA_DESCRIPTOR EventData[14]; + EventDataDescCreate(&EventData[0], &GenerationSize0, sizeof(UInt64)); + EventDataDescCreate(&EventData[1], &TotalPromotedSize0, sizeof(UInt64)); + EventDataDescCreate(&EventData[2], &GenerationSize1, sizeof(UInt64)); + EventDataDescCreate(&EventData[3], &TotalPromotedSize1, sizeof(UInt64)); + EventDataDescCreate(&EventData[4], &GenerationSize2, sizeof(UInt64)); + EventDataDescCreate(&EventData[5], &TotalPromotedSize2, sizeof(UInt64)); + EventDataDescCreate(&EventData[6], &GenerationSize3, sizeof(UInt64)); + EventDataDescCreate(&EventData[7], &TotalPromotedSize3, sizeof(UInt64)); + EventDataDescCreate(&EventData[8], &FinalizationPromotedSize, sizeof(UInt64)); + EventDataDescCreate(&EventData[9], &FinalizationPromotedCount, sizeof(UInt64)); + EventDataDescCreate(&EventData[10], &PinnedObjectCount, sizeof(UInt32)); + EventDataDescCreate(&EventData[11], &SinkBlockCount, sizeof(UInt32)); + EventDataDescCreate(&EventData[12], &GCHandleCount, sizeof(UInt32)); + EventDataDescCreate(&EventData[13], &ClrInstanceID, sizeof(UInt16)); + return PalEventWrite(RegHandle, Descriptor, 14, EventData); +} + +RH_ETW_INLINE UInt32 +Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCJoin_V2(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, UInt32 Heap, UInt32 JoinTime, UInt32 JoinType, UInt16 ClrInstanceID, UInt32 JoinID) +{ + EVENT_DATA_DESCRIPTOR EventData[5]; + EventDataDescCreate(&EventData[0], &Heap, sizeof(UInt32)); + EventDataDescCreate(&EventData[1], &JoinTime, sizeof(UInt32)); + EventDataDescCreate(&EventData[2], &JoinType, sizeof(UInt32)); + EventDataDescCreate(&EventData[3], &ClrInstanceID, sizeof(UInt16)); + EventDataDescCreate(&EventData[4], &JoinID, sizeof(UInt32)); + return PalEventWrite(RegHandle, Descriptor, 5, EventData); +} + +RH_ETW_INLINE UInt32 +Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCMark(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, UInt32 HeapNum, UInt16 ClrInstanceID) +{ + EVENT_DATA_DESCRIPTOR EventData[2]; + EventDataDescCreate(&EventData[0], &HeapNum, sizeof(UInt32)); + EventDataDescCreate(&EventData[1], &ClrInstanceID, sizeof(UInt16)); + return PalEventWrite(RegHandle, Descriptor, 2, EventData); +} + +RH_ETW_INLINE UInt32 +Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCMarkWithType(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, UInt32 HeapNum, UInt16 ClrInstanceID, UInt32 Type, UInt64 Bytes) +{ + EVENT_DATA_DESCRIPTOR EventData[4]; + EventDataDescCreate(&EventData[0], &HeapNum, sizeof(UInt32)); + EventDataDescCreate(&EventData[1], &ClrInstanceID, sizeof(UInt16)); + EventDataDescCreate(&EventData[2], &Type, sizeof(UInt32)); + EventDataDescCreate(&EventData[3], &Bytes, sizeof(UInt64)); + return PalEventWrite(RegHandle, Descriptor, 4, EventData); +} + +RH_ETW_INLINE UInt32 +Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCNoUserData(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, UInt16 ClrInstanceID) +{ + EVENT_DATA_DESCRIPTOR EventData[1]; + EventDataDescCreate(&EventData[0], &ClrInstanceID, sizeof(UInt16)); + return PalEventWrite(RegHandle, Descriptor, 1, EventData); +} + +RH_ETW_INLINE UInt32 +Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCPerHeapHistory_V3(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, UInt16 ClrInstanceID, void* FreeListAllocated, void* FreeListRejected, void* EndOfSegAllocated, void* CondemnedAllocated, void* PinnedAllocated, void* PinnedAllocatedAdvance, UInt32 RunningFreeListEfficiency, UInt32 CondemnReasons0, UInt32 CondemnReasons1, UInt32 CompactMechanisms, UInt32 ExpandMechanisms, UInt32 HeapIndex, void* ExtraGen0Commit, UInt32 Count, ULONG Values_Len_, const PVOID Values) +{ + EVENT_DATA_DESCRIPTOR EventData[26]; + EventDataDescCreate(&EventData[0], &ClrInstanceID, sizeof(UInt16)); + EventDataDescCreate(&EventData[1], &FreeListAllocated, sizeof(void*)); + EventDataDescCreate(&EventData[2], &FreeListRejected, sizeof(void*)); + EventDataDescCreate(&EventData[3], &EndOfSegAllocated, sizeof(void*)); + EventDataDescCreate(&EventData[4], &CondemnedAllocated, sizeof(void*)); + EventDataDescCreate(&EventData[5], &PinnedAllocated, sizeof(void*)); + EventDataDescCreate(&EventData[6], &PinnedAllocatedAdvance, sizeof(void*)); + EventDataDescCreate(&EventData[7], &RunningFreeListEfficiency, sizeof(UInt32)); + EventDataDescCreate(&EventData[8], &CondemnReasons0, sizeof(UInt32)); + EventDataDescCreate(&EventData[9], &CondemnReasons1, sizeof(UInt32)); + EventDataDescCreate(&EventData[10], &CompactMechanisms, sizeof(UInt32)); + EventDataDescCreate(&EventData[11], &ExpandMechanisms, sizeof(UInt32)); + EventDataDescCreate(&EventData[12], &HeapIndex, sizeof(UInt32)); + EventDataDescCreate(&EventData[13], &ExtraGen0Commit, sizeof(void*)); + EventDataDescCreate(&EventData[14], &Count, sizeof(UInt32)); + EventDataDescCreate(&EventData[15], Values, Count * Values_Len_); + return PalEventWrite(RegHandle, Descriptor, 16, EventData); +} + +RH_ETW_INLINE UInt32 +Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCStart_V1(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, UInt32 Count, UInt32 Depth, UInt32 Reason, UInt32 Type, UInt16 ClrInstanceID) +{ + EVENT_DATA_DESCRIPTOR EventData[5]; + EventDataDescCreate(&EventData[0], &Count, sizeof(UInt32)); + EventDataDescCreate(&EventData[1], &Depth, sizeof(UInt32)); + EventDataDescCreate(&EventData[2], &Reason, sizeof(UInt32)); + EventDataDescCreate(&EventData[3], &Type, sizeof(UInt32)); + EventDataDescCreate(&EventData[4], &ClrInstanceID, sizeof(UInt16)); + return PalEventWrite(RegHandle, Descriptor, 5, EventData); +} + +RH_ETW_INLINE UInt32 +Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCStart_V2(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, UInt32 Count, UInt32 Depth, UInt32 Reason, UInt32 Type, UInt16 ClrInstanceID, UInt64 ClientSequenceNumber) +{ + EVENT_DATA_DESCRIPTOR EventData[6]; + EventDataDescCreate(&EventData[0], &Count, sizeof(UInt32)); + EventDataDescCreate(&EventData[1], &Depth, sizeof(UInt32)); + EventDataDescCreate(&EventData[2], &Reason, sizeof(UInt32)); + EventDataDescCreate(&EventData[3], &Type, sizeof(UInt32)); + EventDataDescCreate(&EventData[4], &ClrInstanceID, sizeof(UInt16)); + EventDataDescCreate(&EventData[5], &ClientSequenceNumber, sizeof(UInt64)); + return PalEventWrite(RegHandle, Descriptor, 6, EventData); +} + +RH_ETW_INLINE UInt32 +Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCSuspendEE_V1(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, UInt32 Reason, UInt32 Count, UInt16 ClrInstanceID) +{ + EVENT_DATA_DESCRIPTOR EventData[3]; + EventDataDescCreate(&EventData[0], &Reason, sizeof(UInt32)); + EventDataDescCreate(&EventData[1], &Count, sizeof(UInt32)); + EventDataDescCreate(&EventData[2], &ClrInstanceID, sizeof(UInt16)); + return PalEventWrite(RegHandle, Descriptor, 3, EventData); +} + +RH_ETW_INLINE UInt32 +Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCTerminateConcurrentThread(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, UInt16 ClrInstanceID) +{ + EVENT_DATA_DESCRIPTOR EventData[1]; + EventDataDescCreate(&EventData[0], &ClrInstanceID, sizeof(UInt16)); + return PalEventWrite(RegHandle, Descriptor, 1, EventData); +} + +RH_ETW_INLINE UInt32 +Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCTriggered(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, UInt32 Reason, UInt16 ClrInstanceID) +{ + EVENT_DATA_DESCRIPTOR EventData[2]; + EventDataDescCreate(&EventData[0], &Reason, sizeof(UInt32)); + EventDataDescCreate(&EventData[1], &ClrInstanceID, sizeof(UInt16)); + return PalEventWrite(RegHandle, Descriptor, 2, EventData); +} + +RH_ETW_INLINE UInt32 +Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_ModuleLoadUnload_V2(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, UInt64 ModuleID, UInt64 AssemblyID, UInt32 ModuleFlags, UInt32 Reserved1, LPCWSTR ModuleILPath, LPCWSTR ModuleNativePath, UInt16 ClrInstanceID, const GUID* ManagedPdbSignature, UInt32 ManagedPdbAge, LPCWSTR ManagedPdbBuildPath, const GUID* NativePdbSignature, UInt32 NativePdbAge, LPCWSTR NativePdbBuildPath) +{ + EVENT_DATA_DESCRIPTOR EventData[13]; + EventDataDescCreate(&EventData[0], &ModuleID, sizeof(UInt64)); + EventDataDescCreate(&EventData[1], &AssemblyID, sizeof(UInt64)); + EventDataDescCreate(&EventData[2], &ModuleFlags, sizeof(UInt32)); + EventDataDescCreate(&EventData[3], &Reserved1, sizeof(UInt32)); + EventDataDescCreate(&EventData[4], (ModuleILPath != NULL) ? ModuleILPath : L"", (ModuleILPath != NULL) ? (ULONG)((wcslen(ModuleILPath) + 1) * sizeof(WCHAR)) : (ULONG)sizeof(L"")); + EventDataDescCreate(&EventData[5], (ModuleNativePath != NULL) ? ModuleNativePath : L"", (ModuleNativePath != NULL) ? (ULONG)((wcslen(ModuleNativePath) + 1) * sizeof(WCHAR)) : (ULONG)sizeof(L"")); + EventDataDescCreate(&EventData[6], &ClrInstanceID, sizeof(UInt16)); + EventDataDescCreate(&EventData[7], ManagedPdbSignature, sizeof(*(ManagedPdbSignature))); + EventDataDescCreate(&EventData[8], &ManagedPdbAge, sizeof(UInt32)); + EventDataDescCreate(&EventData[9], (ManagedPdbBuildPath != NULL) ? ManagedPdbBuildPath : L"", (ManagedPdbBuildPath != NULL) ? (ULONG)((wcslen(ManagedPdbBuildPath) + 1) * sizeof(WCHAR)) : (ULONG)sizeof(L"")); + EventDataDescCreate(&EventData[10], NativePdbSignature, sizeof(*(NativePdbSignature))); + EventDataDescCreate(&EventData[11], &NativePdbAge, sizeof(UInt32)); + EventDataDescCreate(&EventData[12], (NativePdbBuildPath != NULL) ? NativePdbBuildPath : L"", (NativePdbBuildPath != NULL) ? (ULONG)((wcslen(NativePdbBuildPath) + 1) * sizeof(WCHAR)) : (ULONG)sizeof(L"")); + return PalEventWrite(RegHandle, Descriptor, 13, EventData); +} + +RH_ETW_INLINE UInt32 +Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_SetGCHandle(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, void* HandleID, void* ObjectID, UInt32 Kind, UInt32 Generation, UInt64 AppDomainID, UInt16 ClrInstanceID) +{ + EVENT_DATA_DESCRIPTOR EventData[6]; + EventDataDescCreate(&EventData[0], &HandleID, sizeof(void*)); + EventDataDescCreate(&EventData[1], &ObjectID, sizeof(void*)); + EventDataDescCreate(&EventData[2], &Kind, sizeof(UInt32)); + EventDataDescCreate(&EventData[3], &Generation, sizeof(UInt32)); + EventDataDescCreate(&EventData[4], &AppDomainID, sizeof(UInt64)); + EventDataDescCreate(&EventData[5], &ClrInstanceID, sizeof(UInt16)); + return PalEventWrite(RegHandle, Descriptor, 6, EventData); +} + +RH_ETW_INLINE UInt32 +TemplateEventDescriptor(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor) +{ + return PalEventWrite(RegHandle, Descriptor, 0, NULL); +} + +#else // FEATURE_ETW + +#define ETW_EVENT_ENABLED(Context, EventDescriptor) false + +#define FireEtwBGC1stConEnd(ClrInstanceID) +#define FireEtwBGC1stNonConEnd(ClrInstanceID) +#define FireEtwBGC2ndConBegin(ClrInstanceID) +#define FireEtwBGC2ndConEnd(ClrInstanceID) +#define FireEtwBGC2ndNonConBegin(ClrInstanceID) +#define FireEtwBGC2ndNonConEnd(ClrInstanceID) +#define FireEtwBGCAllocWaitBegin(Reason, ClrInstanceID) +#define FireEtwBGCAllocWaitEnd(Reason, ClrInstanceID) +#define FireEtwBGCBegin(ClrInstanceID) +#define FireEtwBGCDrainMark(Objects, ClrInstanceID) +#define FireEtwBGCOverflow(Min, Max, Objects, IsLarge, ClrInstanceID) +#define FireEtwBGCPlanEnd(ClrInstanceID) +#define FireEtwBGCRevisit(Pages, Objects, IsLarge, ClrInstanceID) +#define FireEtwBGCSweepEnd(ClrInstanceID) +#define FireEtwGCFullNotify_V1(GenNumber, IsAlloc, ClrInstanceID) +#define FireEtwGCGlobalHeapHistory_V1(FinalYoungestDesired, NumHeaps, CondemnedGeneration, Gen0ReductionCount, Reason, GlobalMechanisms, ClrInstanceID) +#define FireEtwGCJoin_V1(Heap, JoinTime, JoinType, ClrInstanceID) +#define FireEtwGCOptimized_V1(DesiredAllocation, NewAllocation, GenerationNumber, ClrInstanceID) +#define FireEtwGCPerHeapHistory() +#define FireEtwGCSettings(SegmentSize, LargeObjectSegmentSize, ServerGC) +#define FireEtwPinPlugAtGCTime(PlugStart, PlugEnd, GapBeforeSize, ClrInstanceID) +#define FireEtwPrvDestroyGCHandle(HandleID, ClrInstanceID) +#define FireEtwPrvGCMarkCards_V1(HeapNum, ClrInstanceID) +#define FireEtwPrvGCMarkFinalizeQueueRoots_V1(HeapNum, ClrInstanceID) +#define FireEtwPrvGCMarkHandles_V1(HeapNum, ClrInstanceID) +#define FireEtwPrvGCMarkStackRoots_V1(HeapNum, ClrInstanceID) +#define FireEtwPrvSetGCHandle(HandleID, ObjectID, Kind, Generation, AppDomainID, ClrInstanceID) + +#define FireEtwBulkType(Count, ClrInstanceID, Values_Len_, Values) +#define FireEtwDestroyGCHandle(HandleID, ClrInstanceID) +#define FireEtwExceptionThrown_V1(ExceptionType, ExceptionMessage, ExceptionEIP, ExceptionHRESULT, ExceptionFlags, ClrInstanceID) +#define FireEtwGCAllocationTick_V1(AllocationAmount, AllocationKind, ClrInstanceID) +#define FireEtwGCAllocationTick_V2(AllocationAmount, AllocationKind, ClrInstanceID, AllocationAmount64, TypeID, TypeName, HeapIndex) +#define FireEtwGCAllocationTick_V3(AllocationAmount, AllocationKind, ClrInstanceID, AllocationAmount64, TypeID, TypeName, HeapIndex, Address) +#define FireEtwGCBulkEdge(Index, Count, ClrInstanceID, Values_Len_, Values) +#define FireEtwGCBulkMovedObjectRanges(Index, Count, ClrInstanceID, Values_Len_, Values) +#define FireEtwGCBulkNode(Index, Count, ClrInstanceID, Values_Len_, Values) +#define FireEtwGCBulkRCW(Count, ClrInstanceID, Values_Len_, Values) +#define FireEtwGCBulkRootCCW(Count, ClrInstanceID, Values_Len_, Values) +#define FireEtwGCBulkRootConditionalWeakTableElementEdge(Index, Count, ClrInstanceID, Values_Len_, Values) +#define FireEtwGCBulkRootEdge(Index, Count, ClrInstanceID, Values_Len_, Values) +#define FireEtwGCBulkSurvivingObjectRanges(Index, Count, ClrInstanceID, Values_Len_, Values) +#define FireEtwGCCreateConcurrentThread_V1(ClrInstanceID) +#define FireEtwGCCreateSegment_V1(Address, Size, Type, ClrInstanceID) +#define FireEtwGCEnd_V1(Count, Depth, ClrInstanceID) +#define FireEtwGCFreeSegment_V1(Address, ClrInstanceID) +#define FireEtwGCGenerationRange(Generation, RangeStart, RangeUsedLength, RangeReservedLength, ClrInstanceID) +#define FireEtwGCGlobalHeapHistory_V2(FinalYoungestDesired, NumHeaps, CondemnedGeneration, Gen0ReductionCount, Reason, GlobalMechanisms, ClrInstanceID, PauseMode, MemoryPressure) +#define FireEtwGCHeapStats_V1(GenerationSize0, TotalPromotedSize0, GenerationSize1, TotalPromotedSize1, GenerationSize2, TotalPromotedSize2, GenerationSize3, TotalPromotedSize3, FinalizationPromotedSize, FinalizationPromotedCount, PinnedObjectCount, SinkBlockCount, GCHandleCount, ClrInstanceID) +#define FireEtwGCJoin_V2(Heap, JoinTime, JoinType, ClrInstanceID, JoinID) +#define FireEtwGCMarkFinalizeQueueRoots(HeapNum, ClrInstanceID) +#define FireEtwGCMarkHandles(HeapNum, ClrInstanceID) +#define FireEtwGCMarkOlderGenerationRoots(HeapNum, ClrInstanceID) +#define FireEtwGCMarkStackRoots(HeapNum, ClrInstanceID) +#define FireEtwGCMarkWithType(HeapNum, ClrInstanceID, Type, Bytes) +#define FireEtwGCPerHeapHistory_V3(ClrInstanceID, FreeListAllocated, FreeListRejected, EndOfSegAllocated, CondemnedAllocated, PinnedAllocated, PinnedAllocatedAdvance, RunningFreeListEfficiency, CondemnReasons0, CondemnReasons1, CompactMechanisms, ExpandMechanisms, HeapIndex, ExtraGen0Commit, Count, Values_Len_, Values) +#define FireEtwGCRestartEEBegin_V1(ClrInstanceID) +#define FireEtwGCRestartEEEnd_V1(ClrInstanceID) +#define FireEtwGCStart_V1(Count, Depth, Reason, Type, ClrInstanceID) +#define FireEtwGCStart_V2(Count, Depth, Reason, Type, ClrInstanceID, ClientSequenceNumber) +#define FireEtwGCSuspendEEBegin_V1(Reason, Count, ClrInstanceID) +#define FireEtwGCSuspendEEEnd_V1(ClrInstanceID) +#define FireEtwGCTerminateConcurrentThread_V1(ClrInstanceID) +#define FireEtwGCTriggered(Reason, ClrInstanceID) +#define FireEtwModuleLoad_V2(ModuleID, AssemblyID, ModuleFlags, Reserved1, ModuleILPath, ModuleNativePath, ClrInstanceID, ManagedPdbSignature, ManagedPdbAge, ManagedPdbBuildPath, NativePdbSignature, NativePdbAge, NativePdbBuildPath) +#define FireEtwSetGCHandle(HandleID, ObjectID, Kind, Generation, AppDomainID, ClrInstanceID) + +#endif // FEATURE_ETW + +#endif // !__RH_ETW_DEFS_INCLUDED diff --git a/src/coreclr/src/nativeaot/Runtime/FinalizerHelpers.cpp b/src/coreclr/src/nativeaot/Runtime/FinalizerHelpers.cpp new file mode 100644 index 0000000000000..84104ab005ded --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/FinalizerHelpers.cpp @@ -0,0 +1,262 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// +// Unmanaged helpers called by the managed finalizer thread. +// +#include "common.h" +#include "gcenv.h" +#include "gcheaputilities.h" + +#include "slist.h" +#include "gcrhinterface.h" +#include "RWLock.h" +#include "RuntimeInstance.h" +#include "shash.h" + +#include "regdisplay.h" +#include "StackFrameIterator.h" + +#include "thread.h" +#include "threadstore.h" +#include "threadstore.inl" +#include "thread.inl" + +#include "yieldprocessornormalized.h" + +GPTR_DECL(Thread, g_pFinalizerThread); + +CLREventStatic g_FinalizerEvent; +CLREventStatic g_FinalizerDoneEvent; + +// Finalizer method implemented by redhawkm. +extern "C" void __cdecl ProcessFinalizers(); + +// Unmanaged front-end to the finalizer thread. We require this because at the point the GC creates the +// finalizer thread we're still executing the DllMain for RedhawkU. At that point we can't run managed code +// successfully (in particular module initialization code has not run for RedhawkM). Instead this method waits +// for the first finalization request (by which time everything must be up and running) and kicks off the +// managed portion of the thread at that point. +UInt32 WINAPI FinalizerStart(void* pContext) +{ + HANDLE hFinalizerEvent = (HANDLE)pContext; + + ThreadStore::AttachCurrentThread(); + Thread * pThread = ThreadStore::GetCurrentThread(); + + // Disallow gcstress on this thread to work around the current implementation's limitation that it will + // get into an infinite loop if performed on the finalizer thread. + pThread->SetSuppressGcStress(); + + g_pFinalizerThread = PTR_Thread(pThread); + + // We have some time until the first finalization request - use the time to calibrate normalized waits. + EnsureYieldProcessorNormalizedInitialized(); + + // Wait for a finalization request. + UInt32 uResult = PalWaitForSingleObjectEx(hFinalizerEvent, INFINITE, FALSE); + ASSERT(uResult == WAIT_OBJECT_0); + + // Since we just consumed the request (and the event is auto-reset) we must set the event again so the + // managed finalizer code will immediately start processing the queue when we run it. + UInt32_BOOL fResult = PalSetEvent(hFinalizerEvent); + ASSERT(fResult); + + // Run the managed portion of the finalizer. Until we implement (non-process) shutdown this call will + // never return. + + ProcessFinalizers(); + + ASSERT(!"Finalizer thread should never return"); + return 0; +} + +bool RhStartFinalizerThread() +{ +#ifdef APP_LOCAL_RUNTIME + + // + // On app-local runtimes, if we're running with the fallback PAL code (meaning we don't have IManagedRuntimeServices) + // then we use the WinRT ThreadPool to create the finalizer thread. This might fail at startup, if the current thread + // hasn't been CoInitialized. So we need to retry this later. We use fFinalizerThreadCreated to track whether we've + // successfully created the finalizer thread yet, and also as a sort of lock to make sure two threads don't try + // to create the finalizer thread at the same time. + // + static volatile Int32 fFinalizerThreadCreated; + + if (Interlocked::Exchange(&fFinalizerThreadCreated, 1) != 1) + { + if (!PalStartFinalizerThread(FinalizerStart, (void*)g_FinalizerEvent.GetOSEvent())) + { + // Need to try again another time... + Interlocked::Exchange(&fFinalizerThreadCreated, 0); + } + } + + // We always return true, so the GC can start even if we failed. + return true; + +#else // APP_LOCAL_RUNTIME + + // + // If this isn't an app-local runtime, then the PAL will just call CreateThread directly, which should succeed + // under normal circumstances. + // + if (PalStartFinalizerThread(FinalizerStart, (void*)g_FinalizerEvent.GetOSEvent())) + return true; + else + return false; + +#endif // APP_LOCAL_RUNTIME +} + +bool RhInitializeFinalization() +{ + // Allocate the events the GC expects the finalizer thread to have. The g_FinalizerEvent event is signalled + // by the GC whenever it completes a collection where it found otherwise unreachable finalizable objects. + // The g_FinalizerDoneEvent is set by the finalizer thread every time it wakes up and drains the + // queue of finalizable objects. It's mainly used by GC.WaitForPendingFinalizers(). + if (!g_FinalizerEvent.CreateAutoEventNoThrow(false)) + return false; + if (!g_FinalizerDoneEvent.CreateManualEventNoThrow(false)) + return false; + + // Create the finalizer thread itself. + if (!RhStartFinalizerThread()) + return false; + + return true; +} + +void RhEnableFinalization() +{ + g_FinalizerEvent.Set(); +} + +EXTERN_C REDHAWK_API void __cdecl RhInitializeFinalizerThread() +{ +#ifdef APP_LOCAL_RUNTIME + // We may have failed to create the finalizer thread at startup. + // Try again now. + RhStartFinalizerThread(); +#endif + + g_FinalizerEvent.Set(); +} + +EXTERN_C REDHAWK_API void __cdecl RhWaitForPendingFinalizers(UInt32_BOOL allowReentrantWait) +{ + // This must be called via p/invoke rather than RuntimeImport since it blocks and could starve the GC if + // called in cooperative mode. + ASSERT(!ThreadStore::GetCurrentThread()->IsCurrentThreadInCooperativeMode()); + + // Can't call this from the finalizer thread itself. + if (ThreadStore::GetCurrentThread() != g_pFinalizerThread) + { + // Clear any current indication that a finalization pass is finished and wake the finalizer thread up + // (if there's no work to do it'll set the done event immediately). + g_FinalizerDoneEvent.Reset(); + g_FinalizerEvent.Set(); + +#ifdef APP_LOCAL_RUNTIME + // We may have failed to create the finalizer thread at startup. + // Try again now. + RhStartFinalizerThread(); +#endif + + // Wait for the finalizer thread to get back to us. + g_FinalizerDoneEvent.Wait(INFINITE, false, allowReentrantWait); + } +} + +// Block the current thread until at least one object needs to be finalized (returns true) or memory is low +// (returns false and the finalizer thread should initiate a garbage collection). +EXTERN_C REDHAWK_API UInt32_BOOL __cdecl RhpWaitForFinalizerRequest() +{ + // We can wait for two events; finalization queue has been populated and low memory resource notification. + // But if the latter is signalled we shouldn't wait on it again immediately -- if the garbage collection + // the finalizer thread initiates as a result is not sufficient to remove the low memory condition the + // event will still be signalled and we'll end up looping doing cpu intensive collections, which won't + // help the situation at all and could make it worse. So we remember whether the last event we reported + // was low memory and if so we'll wait at least two seconds (the CLR value) on just a finalization + // request. + static bool fLastEventWasLowMemory = false; + + IGCHeap * pHeap = GCHeapUtilities::GetGCHeap(); + + // Wait in a loop because we may have to retry if we decide to only wait for finalization events but the + // two second timeout expires. + do + { + HANDLE lowMemEvent = NULL; +#if 0 // TODO: hook up low memory notification + lowMemEvent = pHeap->GetLowMemoryNotificationEvent(); + HANDLE rgWaitHandles[] = { g_FinalizerEvent.GetOSEvent(), lowMemEvent }; + UInt32 cWaitHandles = (fLastEventWasLowMemory || (lowMemEvent == NULL)) ? 1 : 2; + UInt32 uTimeout = fLastEventWasLowMemory ? 2000 : INFINITE; + + UInt32 uResult = PalWaitForMultipleObjectsEx(cWaitHandles, rgWaitHandles, FALSE, uTimeout, FALSE); +#else + UInt32 uResult = PalWaitForSingleObjectEx(g_FinalizerEvent.GetOSEvent(), INFINITE, FALSE); +#endif + + switch (uResult) + { + case WAIT_OBJECT_0: + // At least one object is ready for finalization. + return TRUE; + + case WAIT_OBJECT_0 + 1: + // Memory is low, tell the finalizer thread to garbage collect. + ASSERT(!fLastEventWasLowMemory); + fLastEventWasLowMemory = true; + return FALSE; + + case WAIT_TIMEOUT: + // We were waiting only for finalization events but didn't get one within the timeout period. Go + // back to waiting for any event. + ASSERT(fLastEventWasLowMemory); + fLastEventWasLowMemory = false; + break; + + default: + ASSERT(!"Unexpected PalWaitForMultipleObjectsEx() result"); + return FALSE; + } + } while (true); +} + +// Indicate that the current round of finalizations is complete. +EXTERN_C REDHAWK_API void __cdecl RhpSignalFinalizationComplete() +{ + g_FinalizerDoneEvent.Set(); +} + +// +// The following helpers are special in that they interact with internal GC state or directly manipulate +// managed references so they're called with a special co-operative p/invoke. +// + +// Fetch next object which needs finalization or return null if we've reached the end of the list. +COOP_PINVOKE_HELPER(OBJECTREF, RhpGetNextFinalizableObject, ()) +{ + while (true) + { + // Get the next finalizable object. If we get back NULL we've reached the end of the list. + OBJECTREF refNext = GCHeapUtilities::GetGCHeap()->GetNextFinalizable(); + if (refNext == NULL) + return NULL; + + // The queue may contain objects which have been marked as finalized already (via GC.SuppressFinalize() + // for instance). Skip finalization for these but reset the flag so that the object can be put back on + // the list with RegisterForFinalization(). + if (refNext->GetHeader()->GetBits() & BIT_SBLK_FINALIZER_RUN) + { + refNext->GetHeader()->ClrBit(BIT_SBLK_FINALIZER_RUN); + continue; + } + + // We've found the first finalizable object, return it to the caller. + return refNext; + } +} diff --git a/src/coreclr/src/nativeaot/Runtime/GCHelpers.cpp b/src/coreclr/src/nativeaot/Runtime/GCHelpers.cpp new file mode 100644 index 0000000000000..66f04a1b0a2c5 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/GCHelpers.cpp @@ -0,0 +1,433 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// +// Unmanaged helpers exposed by the System.GC managed class. +// + +#include "common.h" +#include "gcenv.h" +#include "gcenv.ee.h" +#include "gcheaputilities.h" +#include "RestrictedCallouts.h" + +#include "gcrhinterface.h" + +#include "PalRedhawkCommon.h" +#include "slist.h" +#include "varint.h" +#include "regdisplay.h" +#include "StackFrameIterator.h" + +#include "thread.h" +#include "RWLock.h" +#include "threadstore.h" +#include "threadstore.inl" +#include "thread.inl" + +EXTERN_C REDHAWK_API void __cdecl RhpCollect(UInt32 uGeneration, UInt32 uMode) +{ + // This must be called via p/invoke rather than RuntimeImport to make the stack crawlable. + + Thread * pCurThread = ThreadStore::GetCurrentThread(); + + pCurThread->SetupHackPInvokeTunnel(); + pCurThread->DisablePreemptiveMode(); + + ASSERT(!pCurThread->IsDoNotTriggerGcSet()); + GCHeapUtilities::GetGCHeap()->GarbageCollect(uGeneration, FALSE, uMode); + + pCurThread->EnablePreemptiveMode(); +} + +EXTERN_C REDHAWK_API Int64 __cdecl RhpGetGcTotalMemory() +{ + // This must be called via p/invoke rather than RuntimeImport to make the stack crawlable. + + Thread * pCurThread = ThreadStore::GetCurrentThread(); + + pCurThread->SetupHackPInvokeTunnel(); + pCurThread->DisablePreemptiveMode(); + + Int64 ret = GCHeapUtilities::GetGCHeap()->GetTotalBytesInUse(); + + pCurThread->EnablePreemptiveMode(); + + return ret; +} + +EXTERN_C REDHAWK_API Int32 __cdecl RhpStartNoGCRegion(Int64 totalSize, Boolean hasLohSize, Int64 lohSize, Boolean disallowFullBlockingGC) +{ + Thread *pCurThread = ThreadStore::GetCurrentThread(); + ASSERT(!pCurThread->IsCurrentThreadInCooperativeMode()); + + pCurThread->SetupHackPInvokeTunnel(); + pCurThread->DisablePreemptiveMode(); + + int result = GCHeapUtilities::GetGCHeap()->StartNoGCRegion(totalSize, hasLohSize, lohSize, disallowFullBlockingGC); + + pCurThread->EnablePreemptiveMode(); + + return result; +} + +EXTERN_C REDHAWK_API Int32 __cdecl RhpEndNoGCRegion() +{ + ASSERT(!ThreadStore::GetCurrentThread()->IsCurrentThreadInCooperativeMode()); + + return GCHeapUtilities::GetGCHeap()->EndNoGCRegion(); +} + +COOP_PINVOKE_HELPER(void, RhSuppressFinalize, (OBJECTREF refObj)) +{ + if (!refObj->get_EEType()->HasFinalizer()) + return; + GCHeapUtilities::GetGCHeap()->SetFinalizationRun(refObj); +} + +COOP_PINVOKE_HELPER(Boolean, RhReRegisterForFinalize, (OBJECTREF refObj)) +{ + if (!refObj->get_EEType()->HasFinalizer()) + return Boolean_true; + return GCHeapUtilities::GetGCHeap()->RegisterForFinalization(-1, refObj) ? Boolean_true : Boolean_false; +} + +COOP_PINVOKE_HELPER(Int32, RhGetMaxGcGeneration, ()) +{ + return GCHeapUtilities::GetGCHeap()->GetMaxGeneration(); +} + +COOP_PINVOKE_HELPER(Int32, RhGetGcCollectionCount, (Int32 generation, Boolean getSpecialGCCount)) +{ + return GCHeapUtilities::GetGCHeap()->CollectionCount(generation, getSpecialGCCount); +} + +COOP_PINVOKE_HELPER(Int32, RhGetGeneration, (OBJECTREF obj)) +{ + return GCHeapUtilities::GetGCHeap()->WhichGeneration(obj); +} + +COOP_PINVOKE_HELPER(Int32, RhGetGcLatencyMode, ()) +{ + return GCHeapUtilities::GetGCHeap()->GetGcLatencyMode(); +} + +COOP_PINVOKE_HELPER(Int32, RhSetGcLatencyMode, (Int32 newLatencyMode)) +{ + return GCHeapUtilities::GetGCHeap()->SetGcLatencyMode(newLatencyMode); +} + +COOP_PINVOKE_HELPER(Boolean, RhIsServerGc, ()) +{ + return GCHeapUtilities::IsServerHeap(); +} + +COOP_PINVOKE_HELPER(Boolean, RhRegisterGcCallout, (GcRestrictedCalloutKind eKind, void * pCallout)) +{ + return RestrictedCallouts::RegisterGcCallout(eKind, pCallout); +} + +COOP_PINVOKE_HELPER(void, RhUnregisterGcCallout, (GcRestrictedCalloutKind eKind, void * pCallout)) +{ + RestrictedCallouts::UnregisterGcCallout(eKind, pCallout); +} + +COOP_PINVOKE_HELPER(Boolean, RhIsPromoted, (OBJECTREF obj)) +{ + return GCHeapUtilities::GetGCHeap()->IsPromoted(obj) ? Boolean_true : Boolean_false; +} + +COOP_PINVOKE_HELPER(Int32, RhGetLohCompactionMode, ()) +{ + return GCHeapUtilities::GetGCHeap()->GetLOHCompactionMode(); +} + +COOP_PINVOKE_HELPER(void, RhSetLohCompactionMode, (Int32 newLohCompactionMode)) +{ + GCHeapUtilities::GetGCHeap()->SetLOHCompactionMode(newLohCompactionMode); +} + +COOP_PINVOKE_HELPER(Int64, RhGetCurrentObjSize, ()) +{ + return GCHeapUtilities::GetGCHeap()->GetCurrentObjSize(); +} + +COOP_PINVOKE_HELPER(Int64, RhGetGCNow, ()) +{ + return GCHeapUtilities::GetGCHeap()->GetNow(); +} + +COOP_PINVOKE_HELPER(Int64, RhGetLastGCStartTime, (Int32 generation)) +{ + return GCHeapUtilities::GetGCHeap()->GetLastGCStartTime(generation); +} + +COOP_PINVOKE_HELPER(Int64, RhGetLastGCDuration, (Int32 generation)) +{ + return GCHeapUtilities::GetGCHeap()->GetLastGCDuration(generation); +} + +COOP_PINVOKE_HELPER(Boolean, RhRegisterForFullGCNotification, (Int32 maxGenerationThreshold, Int32 largeObjectHeapThreshold)) +{ + ASSERT(maxGenerationThreshold >= 1 && maxGenerationThreshold <= 99); + ASSERT(largeObjectHeapThreshold >= 1 && largeObjectHeapThreshold <= 99); + return GCHeapUtilities::GetGCHeap()->RegisterForFullGCNotification(maxGenerationThreshold, largeObjectHeapThreshold) + ? Boolean_true : Boolean_false; +} + +COOP_PINVOKE_HELPER(Boolean, RhCancelFullGCNotification, ()) +{ + return GCHeapUtilities::GetGCHeap()->CancelFullGCNotification() ? Boolean_true : Boolean_false; +} + +COOP_PINVOKE_HELPER(Int32, RhWaitForFullGCApproach, (Int32 millisecondsTimeout)) +{ + ASSERT(millisecondsTimeout >= -1); + ASSERT(ThreadStore::GetCurrentThread()->IsCurrentThreadInCooperativeMode()); + + int timeout = millisecondsTimeout == -1 ? INFINITE : millisecondsTimeout; + return GCHeapUtilities::GetGCHeap()->WaitForFullGCApproach(millisecondsTimeout); +} + +COOP_PINVOKE_HELPER(Int32, RhWaitForFullGCComplete, (Int32 millisecondsTimeout)) +{ + ASSERT(millisecondsTimeout >= -1); + ASSERT(ThreadStore::GetCurrentThread()->IsCurrentThreadInCooperativeMode()); + + int timeout = millisecondsTimeout == -1 ? INFINITE : millisecondsTimeout; + return GCHeapUtilities::GetGCHeap()->WaitForFullGCComplete(millisecondsTimeout); +} + +COOP_PINVOKE_HELPER(Int64, RhGetGCSegmentSize, ()) +{ + size_t first = GCHeapUtilities::GetGCHeap()->GetValidSegmentSize(Boolean_true); + size_t second = GCHeapUtilities::GetGCHeap()->GetValidSegmentSize(Boolean_false); + + return (first > second) ? first : second; +} + +COOP_PINVOKE_HELPER(Int64, RhGetAllocatedBytesForCurrentThread, ()) +{ + Thread *pThread = ThreadStore::GetCurrentThread(); + gc_alloc_context *ac = pThread->GetAllocContext(); + Int64 currentAllocated = ac->alloc_bytes + ac->alloc_bytes_uoh - (ac->alloc_limit - ac->alloc_ptr); + return currentAllocated; +} + +struct RH_GC_GENERATION_INFO +{ + UInt64 sizeBefore; + UInt64 fragmentationBefore; + UInt64 sizeAfter; + UInt64 fragmentationAfter; +}; + +#if defined(TARGET_X86) && !defined(TARGET_UNIX) +#include "pshpack4.h" +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable:4121) // alignment of a member was sensitive to packing +#endif +#endif +struct RH_GH_MEMORY_INFO +{ +public: + UInt64 highMemLoadThresholdBytes; + UInt64 totalAvailableMemoryBytes; + UInt64 lastRecordedMemLoadBytes; + UInt64 lastRecordedHeapSizeBytes; + UInt64 lastRecordedFragmentationBytes; + UInt64 totalCommittedBytes; + UInt64 promotedBytes; + UInt64 pinnedObjectCount; + UInt64 finalizationPendingCount; + UInt64 index; + UInt32 generation; + UInt32 pauseTimePercent; + UInt8 isCompaction; + UInt8 isConcurrent; + RH_GC_GENERATION_INFO generationInfo0; + RH_GC_GENERATION_INFO generationInfo1; + RH_GC_GENERATION_INFO generationInfo2; + RH_GC_GENERATION_INFO generationInfo3; + RH_GC_GENERATION_INFO generationInfo4; + UInt64 pauseDuration0; + UInt64 pauseDuration1; +}; +#if defined(TARGET_X86) && !defined(TARGET_UNIX) +#ifdef _MSC_VER +#pragma warning(pop) +#endif +#include "poppack.h" +#endif + +COOP_PINVOKE_HELPER(void, RhGetMemoryInfo, (RH_GH_MEMORY_INFO* pData, int kind)) +{ + UInt64* genInfoRaw = (UInt64*)&(pData->generationInfo0); + UInt64* pauseInfoRaw = (UInt64*)&(pData->pauseDuration0); + + return GCHeapUtilities::GetGCHeap()->GetMemoryInfo( + &(pData->highMemLoadThresholdBytes), + &(pData->totalAvailableMemoryBytes), + &(pData->lastRecordedMemLoadBytes), + &(pData->lastRecordedHeapSizeBytes), + &(pData->lastRecordedFragmentationBytes), + &(pData->totalCommittedBytes), + &(pData->promotedBytes), + &(pData->pinnedObjectCount), + &(pData->finalizationPendingCount), + &(pData->index), + &(pData->generation), + &(pData->pauseTimePercent), + (bool*)&(pData->isCompaction), + (bool*)&(pData->isConcurrent), + genInfoRaw, + pauseInfoRaw, + kind); +} + +COOP_PINVOKE_HELPER(Int64, RhGetTotalAllocatedBytes, ()) +{ + uint64_t allocated_bytes = GCHeapUtilities::GetGCHeap()->GetTotalAllocatedBytes() - RedhawkGCInterface::GetDeadThreadsNonAllocBytes(); + + // highest reported allocated_bytes. We do not want to report a value less than that even if unused_bytes has increased. + static uint64_t high_watermark; + + uint64_t current_high = high_watermark; + while (allocated_bytes > current_high) + { + uint64_t orig = PalInterlockedCompareExchange64((Int64*)&high_watermark, allocated_bytes, current_high); + if (orig == current_high) + return allocated_bytes; + + current_high = orig; + } + + return current_high; +} + +EXTERN_C REDHAWK_API Int64 __cdecl RhGetTotalAllocatedBytesPrecise() +{ + Int64 allocated; + + // We need to suspend/restart the EE to get each thread's + // non-allocated memory from their allocation contexts + + GCToEEInterface::SuspendEE(SUSPEND_REASON::SUSPEND_FOR_GC); + + allocated = GCHeapUtilities::GetGCHeap()->GetTotalAllocatedBytes() - RedhawkGCInterface::GetDeadThreadsNonAllocBytes(); + + FOREACH_THREAD(pThread) + { + gc_alloc_context* ac = pThread->GetAllocContext(); + allocated -= ac->alloc_limit - ac->alloc_ptr; + } + END_FOREACH_THREAD + + GCToEEInterface::RestartEE(true); + + return allocated; +} + +static Array* AllocateNewArrayImpl(Thread* pThread, EEType* pArrayEEType, UInt32 numElements, UInt32 flags) +{ + size_t size; +#ifndef HOST_64BIT + // if the element count is <= 0x10000, no overflow is possible because the component size is + // <= 0xffff, and thus the product is <= 0xffff0000, and the base size is only ~12 bytes + if (numElements > 0x10000) + { + // Perform the size computation using 64-bit integeres to detect overflow + uint64_t size64 = (uint64_t)pArrayEEType->get_BaseSize() + ((uint64_t)numElements * (uint64_t)pArrayEEType->get_ComponentSize()); + size64 = (size64 + (sizeof(UIntNative) - 1)) & ~(sizeof(UIntNative) - 1); + + size = (size_t)size64; + if (size != size64) + { + return NULL; + } + } + else +#endif // !HOST_64BIT + { + size = (size_t)pArrayEEType->get_BaseSize() + ((size_t)numElements * (size_t)pArrayEEType->get_ComponentSize()); + size = ALIGN_UP(size, sizeof(UIntNative)); + } + + size_t max_object_size; +#ifdef HOST_64BIT + if (g_pConfig->GetGCAllowVeryLargeObjects()) + { + max_object_size = (INT64_MAX - 7 - min_obj_size); + } + else +#endif // HOST_64BIT + { + max_object_size = (INT32_MAX - 7 - min_obj_size); + } + + if (size >= max_object_size) + { + return NULL; + } + + const int MaxArrayLength = 0x7FEFFFFF; + const int MaxByteArrayLength = 0x7FFFFFC7; + + // Impose limits on maximum array length in each dimension to allow efficient + // implementation of advanced range check elimination in future. We have to allow + // higher limit for array of bytes (or one byte structs) for backward compatibility. + // Keep in sync with Array.MaxArrayLength in BCL. + if (size > MaxByteArrayLength /* note: comparing allocation size with element count */) + { + // Ensure the above if check covers the minimal interesting size + static_assert(MaxByteArrayLength < (uint64_t)MaxArrayLength * 2, ""); + + if (pArrayEEType->get_ComponentSize() != 1) + { + size_t elementCount = (size - pArrayEEType->get_BaseSize()) / pArrayEEType->get_ComponentSize(); + if (elementCount > MaxArrayLength) + return NULL; + } + else + { + size_t elementCount = size - pArrayEEType->get_BaseSize(); + if (elementCount > MaxByteArrayLength) + return NULL; + } + } + + if (size > RH_LARGE_OBJECT_SIZE) + flags |= GC_ALLOC_LARGE_OBJECT_HEAP; + + // Save the EEType for instrumentation purposes. + RedhawkGCInterface::SetLastAllocEEType(pArrayEEType); + + Array* pArray = (Array*)GCHeapUtilities::GetGCHeap()->Alloc(pThread->GetAllocContext(), size, flags); + if (pArray == NULL) + { + return NULL; + } + + pArray->set_EEType(pArrayEEType); + pArray->InitArrayLength(numElements); + + if (size >= RH_LARGE_OBJECT_SIZE) + GCHeapUtilities::GetGCHeap()->PublishObject((uint8_t*)pArray); + + return pArray; +} + +EXTERN_C REDHAWK_API void RhAllocateNewArray(EEType* pArrayEEType, UInt32 numElements, UInt32 flags, Array** pResult) +{ + Thread* pThread = ThreadStore::GetCurrentThread(); + + pThread->SetupHackPInvokeTunnel(); + pThread->DisablePreemptiveMode(); + + ASSERT(!pThread->IsDoNotTriggerGcSet()); + + *pResult = AllocateNewArrayImpl(pThread, pArrayEEType, numElements, flags); + + pThread->EnablePreemptiveMode(); +} diff --git a/src/coreclr/src/nativeaot/Runtime/GCMemoryHelpers.cpp b/src/coreclr/src/nativeaot/Runtime/GCMemoryHelpers.cpp new file mode 100644 index 0000000000000..66bc74d4e730e --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/GCMemoryHelpers.cpp @@ -0,0 +1,131 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// +// Unmanaged GC memory helpers +// + +#include "common.h" +#include "gcenv.h" +#include "PalRedhawkCommon.h" +#include "CommonMacros.inl" + +#include "GCMemoryHelpers.h" +#include "GCMemoryHelpers.inl" + +// This function clears a piece of memory in a GC safe way. It makes the guarantee that it will clear memory in at +// least pointer sized chunks whenever possible. Unaligned memory at the beginning and remaining bytes at the end are +// written bytewise. We must make this guarantee whenever we clear memory in the GC heap that could contain object +// references. The GC or other user threads can read object references at any time, clearing them bytewise can result +// in a read on another thread getting incorrect data. +// +// USAGE: The caller is responsible for hoisting any null reference exceptions to a place where the hardware exception +// can be properly translated to a managed exception. +COOP_PINVOKE_CDECL_HELPER(void *, RhpInitMultibyte, (void * mem, int c, size_t size)) +{ + // The caller must do the null-check because we cannot take an AV in the runtime and translate it to managed. + ASSERT(mem != nullptr); + + UIntNative bv = (UInt8)c; + UIntNative pv = 0; + + if (bv != 0) + { + pv = +#if (POINTER_SIZE == 8) + bv << 7*8 | bv << 6*8 | bv << 5*8 | bv << 4*8 | +#endif + bv << 3*8 | bv << 2*8 | bv << 1*8 | bv; + } + + InlineGCSafeFillMemory(mem, size, pv); + + // memset returns the destination buffer + return mem; +} + + +// This is a GC-safe variant of memcpy. It guarantees that the object references in the GC heap are updated atomically. +// This is required for type safety and proper operation of the background GC. +// +// USAGE: 1) The caller is responsible for performing the appropriate bulk write barrier. +// 2) The caller is responsible for hoisting any null reference exceptions to a place where the hardware +// exception can be properly translated to a managed exception. This is handled by RhpCopyMultibyte. +// 3) The caller must ensure that all three parameters are pointer-size-aligned. This should be the case for +// value types which contain GC refs anyway, so if you want to copy structs without GC refs which might be +// unaligned, then you must use RhpCopyMultibyteNoGCRefs. +COOP_PINVOKE_CDECL_HELPER(void *, memcpyGCRefs, (void * dest, const void *src, size_t len)) +{ + // null pointers are not allowed (they are checked by RhpCopyMultibyte) + ASSERT(dest != nullptr); + ASSERT(src != nullptr); + + InlineForwardGCSafeCopy(dest, src, len); + + // memcpy returns the destination buffer + return dest; +} + +// This is a GC-safe variant of memcpy. It guarantees that the object references in the GC heap are updated atomically. +// This is required for type safety and proper operation of the background GC. +// Writebarrier is included. +// +// USAGE: +// 1) The caller is responsible for hoisting any null reference exceptions to a place where the hardware +// exception can be properly translated to a managed exception. This is handled by RhpCopyMultibyte. +// 2) The caller must ensure that all three parameters are pointer-size-aligned. This should be the case for +// value types which contain GC refs anyway, so if you want to copy structs without GC refs which might be +// unaligned, then you must use RhpCopyMultibyteNoGCRefs. +COOP_PINVOKE_CDECL_HELPER(void *, memcpyGCRefsWithWriteBarrier, (void * dest, const void *src, size_t len)) +{ + // null pointers are not allowed (they are checked by RhpCopyMultibyteWithWriteBarrier) + ASSERT(dest != nullptr); + ASSERT(src != nullptr); + + InlineForwardGCSafeCopy(dest, src, len); + InlinedBulkWriteBarrier(dest, len); + + // memcpy returns the destination buffer + return dest; +} + +// Same as memcpyGCRefsWithWriteBarrier, except it checks if memory might contain GC pointers +// and if so dispatches to memcpyGCRefsWithWriteBarrier and if not uses traditional memcpy +COOP_PINVOKE_CDECL_HELPER(void *, memcpyAnyWithWriteBarrier, (void * dest, const void *src, size_t len)) +{ + // null pointers are not allowed (they are checked by RhpCopyMultibyteWithWriteBarrier) + ASSERT(dest != nullptr); + ASSERT(src != nullptr); + + // Use GC safe copy whenever there might be GC pointers + if (IS_ALIGNED(dest, sizeof(size_t)) && IS_ALIGNED(src, sizeof(size_t)) && IS_ALIGNED(len, sizeof(size_t))) + { + return memcpyGCRefsWithWriteBarrier(dest, src, len); + } + + return memcpy(dest, src, len); +} + +// Move memory, in a way that is compatible with a move onto the heap, but +// does not require the destination pointer to be on the heap. + +COOP_PINVOKE_HELPER(void, RhBulkMoveWithWriteBarrier, (uint8_t* pDest, uint8_t* pSrc, size_t cbDest)) +{ + if (pDest <= pSrc || pSrc + cbDest <= pDest) + InlineForwardGCSafeCopy(pDest, pSrc, cbDest); + else + InlineBackwardGCSafeCopy(pDest, pSrc, cbDest); + + InlinedBulkWriteBarrier(pDest, cbDest); +} + +void GCSafeCopyMemoryWithWriteBarrier(void * dest, const void *src, size_t len) +{ + InlineForwardGCSafeCopy(dest, src, len); + InlinedBulkWriteBarrier(dest, len); +} + +void REDHAWK_CALLCONV RhpBulkWriteBarrier(void* pMemStart, UInt32 cbMemSize) +{ + InlinedBulkWriteBarrier(pMemStart, cbMemSize); +} diff --git a/src/coreclr/src/nativeaot/Runtime/GCMemoryHelpers.h b/src/coreclr/src/nativeaot/Runtime/GCMemoryHelpers.h new file mode 100644 index 0000000000000..3d74bd3fa498a --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/GCMemoryHelpers.h @@ -0,0 +1,10 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// +// Unmanaged GC memory helpers +// + +void GCSafeCopyMemoryWithWriteBarrier(void * dest, const void *src, size_t len); + +EXTERN_C void REDHAWK_CALLCONV RhpBulkWriteBarrier(void* pMemStart, UInt32 cbMemSize); diff --git a/src/coreclr/src/nativeaot/Runtime/GCMemoryHelpers.inl b/src/coreclr/src/nativeaot/Runtime/GCMemoryHelpers.inl new file mode 100644 index 0000000000000..5d973bc70edbb --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/GCMemoryHelpers.inl @@ -0,0 +1,255 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include "volatile.h" + +// +// Unmanaged GC memory helpers +// + +// This function fills a piece of memory in a GC safe way. It makes the guarantee +// that it will fill memory in at least pointer sized chunks whenever possible. +// Unaligned memory at the beginning and remaining bytes at the end are written bytewise. +// We must make this guarantee whenever we clear memory in the GC heap that could contain +// object references. The GC or other user threads can read object references at any time, +// clearing them bytewise can result in a read on another thread getting incorrect data. +FORCEINLINE void InlineGCSafeFillMemory(void * mem, size_t size, size_t pv) +{ + UInt8 * memBytes = (UInt8 *)mem; + UInt8 * endBytes = &memBytes[size]; + + // handle unaligned bytes at the beginning + while (!IS_ALIGNED(memBytes, sizeof(void *)) && (memBytes < endBytes)) + *memBytes++ = (UInt8)pv; + + // now write pointer sized pieces + // volatile ensures that this doesn't get optimized back into a memset call + size_t nPtrs = (endBytes - memBytes) / sizeof(void *); + volatile UIntNative* memPtr = (UIntNative*)memBytes; + for (size_t i = 0; i < nPtrs; i++) + *memPtr++ = pv; + + // handle remaining bytes at the end + memBytes = (UInt8*)memPtr; + while (memBytes < endBytes) + *memBytes++ = (UInt8)pv; +} + +// These functions copy memory in a GC safe way. They makes the guarantee +// that the memory is copies in at least pointer sized chunks. + +FORCEINLINE void InlineForwardGCSafeCopy(void * dest, const void *src, size_t len) +{ + // All parameters must be pointer-size-aligned + ASSERT(IS_ALIGNED(dest, sizeof(size_t))); + ASSERT(IS_ALIGNED(src, sizeof(size_t))); + ASSERT(IS_ALIGNED(len, sizeof(size_t))); + + size_t size = len; + UInt8 * dmem = (UInt8 *)dest; + UInt8 * smem = (UInt8 *)src; + + // regions must be non-overlapping + ASSERT(dmem <= smem || smem + size <= dmem); + + // copy 4 pointers at a time + while (size >= 4 * sizeof(size_t)) + { + size -= 4 * sizeof(size_t); + ((size_t *)dmem)[0] = ((size_t *)smem)[0]; + ((size_t *)dmem)[1] = ((size_t *)smem)[1]; + ((size_t *)dmem)[2] = ((size_t *)smem)[2]; + ((size_t *)dmem)[3] = ((size_t *)smem)[3]; + smem += 4 * sizeof(size_t); + dmem += 4 * sizeof(size_t); + } + + // copy 2 trailing pointers, if needed + if ((size & (2 * sizeof(size_t))) != 0) + { + ((size_t *)dmem)[0] = ((size_t *)smem)[0]; + ((size_t *)dmem)[1] = ((size_t *)smem)[1]; + smem += 2 * sizeof(size_t); + dmem += 2 * sizeof(size_t); + } + + // finish with one pointer, if needed + if ((size & sizeof(size_t)) != 0) + { + ((size_t *)dmem)[0] = ((size_t *)smem)[0]; + } +} + +FORCEINLINE void InlineBackwardGCSafeCopy(void * dest, const void *src, size_t len) +{ + // All parameters must be pointer-size-aligned + ASSERT(IS_ALIGNED(dest, sizeof(size_t))); + ASSERT(IS_ALIGNED(src, sizeof(size_t))); + ASSERT(IS_ALIGNED(len, sizeof(size_t))); + + size_t size = len; + UInt8 * dmem = (UInt8 *)dest + len; + UInt8 * smem = (UInt8 *)src + len; + + // regions must be non-overlapping + ASSERT(smem <= dmem || dmem + size <= smem); + + // copy 4 pointers at a time + while (size >= 4 * sizeof(size_t)) + { + size -= 4 * sizeof(size_t); + smem -= 4 * sizeof(size_t); + dmem -= 4 * sizeof(size_t); + ((size_t *)dmem)[3] = ((size_t *)smem)[3]; + ((size_t *)dmem)[2] = ((size_t *)smem)[2]; + ((size_t *)dmem)[1] = ((size_t *)smem)[1]; + ((size_t *)dmem)[0] = ((size_t *)smem)[0]; + } + + // copy 2 trailing pointers, if needed + if ((size & (2 * sizeof(size_t))) != 0) + { + smem -= 2 * sizeof(size_t); + dmem -= 2 * sizeof(size_t); + ((size_t *)dmem)[1] = ((size_t *)smem)[1]; + ((size_t *)dmem)[0] = ((size_t *)smem)[0]; + } + + // finish with one pointer, if needed + if ((size & sizeof(size_t)) != 0) + { + smem -= sizeof(size_t); + dmem -= sizeof(size_t); + ((size_t *)dmem)[0] = ((size_t *)smem)[0]; + } +} + + +#ifndef DACCESS_COMPILE +#ifdef WRITE_BARRIER_CHECK +extern uint8_t* g_GCShadow; +extern uint8_t* g_GCShadowEnd; +typedef DPTR(uint8_t) PTR_uint8_t; +extern "C" { + GPTR_DECL(uint8_t, g_lowest_address); + GPTR_DECL(uint8_t, g_highest_address); +} +#endif + +typedef DPTR(uint32_t) PTR_uint32_t; +extern "C" { + GPTR_DECL(uint32_t, g_card_table); +} +static const UInt32 INVALIDGCVALUE = 0xcccccccd; + +FORCEINLINE void InlineWriteBarrier(void * dst, void * ref) +{ + if (((uint8_t*)ref >= g_ephemeral_low) && ((uint8_t*)ref < g_ephemeral_high)) + { + // volatile is used here to prevent fetch of g_card_table from being reordered + // with g_lowest/highest_address check above. See comment in code:gc_heap::grow_brick_card_tables. + uint8_t* pCardByte = (uint8_t *)VolatileLoadWithoutBarrier(&g_card_table) + ((size_t)dst >> LOG2_CLUMP_SIZE); + if (*pCardByte != 0xFF) + *pCardByte = 0xFF; + } +} + +FORCEINLINE void InlineCheckedWriteBarrier(void * dst, void * ref) +{ + // if the dst is outside of the heap (unboxed value classes) then we + // simply exit + if (((uint8_t*)dst < g_lowest_address) || ((uint8_t*)dst >= g_highest_address)) + return; + + InlineWriteBarrier(dst, ref); +} + +FORCEINLINE void InlinedBulkWriteBarrier(void* pMemStart, size_t cbMemSize) +{ + // Check whether the writes were even into the heap. If not there's no card update required. + // Also if the size is smaller than a pointer, no write barrier is required. + // This case can occur with universal shared generic code where the size + // is not known at compile time. + if (pMemStart < g_lowest_address || (pMemStart >= g_highest_address) || (cbMemSize < sizeof(UIntNative))) + { + return; + } + +#ifdef WRITE_BARRIER_CHECK + // Perform shadow heap updates corresponding to the gc heap updates that immediately preceded this helper + // call. + + // If g_GCShadow is 0, don't perform the check. + if (g_GCShadow != NULL) + { + // Compute the shadow heap address corresponding to the beginning of the range of heap addresses modified + // and in the process range check it to make sure we have the shadow version allocated. + UIntNative* shadowSlot = (UIntNative*)(g_GCShadow + ((uint8_t*)pMemStart - g_lowest_address)); + if (shadowSlot <= (UIntNative*)g_GCShadowEnd) + { + // Iterate over every pointer sized slot in the range, copying data from the real heap to the shadow heap. + // As we perform each copy we need to recheck the real heap contents with an ordered read to ensure we're + // not racing with another heap updater. If we discover a race we invalidate the corresponding shadow heap + // slot using a special well-known value so that this location will not be tested during the next shadow + // heap validation. + + UIntNative* realSlot = (UIntNative*)pMemStart; + UIntNative slotCount = cbMemSize / sizeof(UIntNative); + do + { + // Update shadow slot from real slot. + UIntNative realValue = *realSlot; + *shadowSlot = realValue; + // Memory barrier to ensure the next read is ordered wrt to the shadow heap write we just made. + PalMemoryBarrier(); + + // Read the real slot contents again. If they don't agree with what we just wrote then someone just raced + // with us and updated the heap again. In such cases we invalidate the shadow slot. + if (*realSlot != realValue) + { + *shadowSlot = INVALIDGCVALUE; + } + + realSlot++; + shadowSlot++; + slotCount--; + } + while (slotCount > 0); + } + } + +#endif // WRITE_BARRIER_CHECK + + // Compute the starting card address and the number of bytes to write (groups of 8 cards). We could try + // for further optimization here using aligned 32-bit writes but there's some overhead in setup required + // and additional complexity. It's not clear this is warranted given that a single byte of card table + // update already covers 1K of object space (2K on 64-bit platforms). It's also not worth probing that + // 1K/2K range to see if any of the pointers appear to be non-ephemeral GC references. Given the size of + // the area the chances are high that at least one interesting GC refenence is present. + + size_t startAddress = (size_t)pMemStart; + size_t endAddress = startAddress + cbMemSize; + size_t startingClump = startAddress >> LOG2_CLUMP_SIZE; + size_t endingClump = (endAddress + CLUMP_SIZE - 1) >> LOG2_CLUMP_SIZE; + + // calculate the number of clumps to mark (round_up(end) - start) + size_t clumpCount = endingClump - startingClump; + // VolatileLoadWithoutBarrier() is used here to prevent fetch of g_card_table from being reordered + // with g_lowest/highest_address check at the beginning of this function. + uint8_t* card = ((uint8_t*)VolatileLoadWithoutBarrier(&g_card_table)) + startingClump; + + // Fill the cards. To avoid cache line thrashing we check whether the cards have already been set before + // writing. + do + { + if (*card != 0xff) + { + *card = 0xff; + } + + card++; + clumpCount--; + } + while (clumpCount != 0); +} +#endif // DACCESS_COMPILE diff --git a/src/coreclr/src/nativeaot/Runtime/GcStressControl.cpp b/src/coreclr/src/nativeaot/Runtime/GcStressControl.cpp new file mode 100644 index 0000000000000..5296d30971c29 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/GcStressControl.cpp @@ -0,0 +1,182 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +#include "common.h" + +#if defined(FEATURE_GC_STRESS) & !defined(DACCESS_COMPILE) + + +#include "CommonTypes.h" +#include "CommonMacros.h" +#include "daccess.h" +#include "PalRedhawkCommon.h" +#include "PalRedhawk.h" +#include "rhassert.h" +#include "holder.h" +#include "Crst.h" +#include "RhConfig.h" +#include "gcrhinterface.h" +#include "slist.h" +#include "varint.h" +#include "regdisplay.h" +#include "forward_declarations.h" +#include "StackFrameIterator.h" +#include "thread.h" +#include "event.h" +#include "RWLock.h" +#include "threadstore.h" +#include "threadstore.inl" +#include "shash.h" +#include "shash.inl" +#include "GcStressControl.h" + + +class GcStressControl +{ +public: + static bool ShouldHijack(UIntNative CallsiteIP, HijackType ht) + { + if (s_initState != isInited) + Initialize(); + + // don't hijack for GC stress if we're in a "no GC stress" region + Thread * pCurrentThread = ThreadStore::GetCurrentThread(); + if (pCurrentThread->IsSuppressGcStressSet()) + return false; + + if (g_pRhConfig->GetGcStressThrottleMode() == 0) + { + return true; + } + if (g_pRhConfig->GetGcStressThrottleMode() & gcstm_TriggerRandom) + { + if (GcStressTriggerRandom(CallsiteIP, ht, pCurrentThread)) + return true; + } + if (g_pRhConfig->GetGcStressThrottleMode() & gcstm_TriggerOnFirstHit) + { + if (GcStressTriggerFirstHit(CallsiteIP, ht)) + return true; + } + return false; + } + +private: + enum InitState { isNotInited, isIniting, isInited }; + + static void Initialize() + { + volatile InitState is = (InitState) PalInterlockedCompareExchange((volatile Int32*)(&s_initState), isIniting, isNotInited); + if (is == isNotInited) + { + s_lock.InitNoThrow(CrstGcStressControl); + + if (g_pRhConfig->GetGcStressSeed()) + s_lGcStressRNGSeed = g_pRhConfig->GetGcStressSeed(); + else + s_lGcStressRNGSeed = PalGetTickCount(); + + if (g_pRhConfig->GetGcStressFreqDenom()) + s_lGcStressFreqDenom = g_pRhConfig->GetGcStressFreqDenom(); + else + s_lGcStressFreqDenom = 10000; + + s_initState = isInited; + } + else + { + while (s_initState != isInited) + ; + } + } + + // returns true if no entry was found for CallsiteIP, false otherwise + static bool GcStressTrackAtIP(UIntNative CallsiteIP, HijackType ht, bool bForceGC) + { + // do this under a lock, as the underlying SHash might be "grown" by + // operations on other threads + + CrstHolder lh(&s_lock); + + const CallsiteCountEntry * pEntry = s_callsites.LookupPtr(CallsiteIP); + size_t hits; + + if (pEntry == NULL) + { + hits = 1; + CallsiteCountEntry e = {CallsiteIP, 1, 1, ht}; + s_callsites.AddOrReplace(e); + } + else + { + hits = ++(const_cast(pEntry)->countHit); + if (bForceGC) + { + ++(const_cast(pEntry)->countForced); + } + } + + return pEntry == NULL; + } + + static bool GcStressTriggerFirstHit(UIntNative CallsiteIP, HijackType ht) + { + return GcStressTrackAtIP(CallsiteIP, ht, false); + } + + static UInt32 GcStressRNG(UInt32 uMaxValue, Thread *pCurrentThread) + { + if (!pCurrentThread->IsRandInited()) + { + pCurrentThread->SetRandomSeed(s_lGcStressRNGSeed); + } + + return pCurrentThread->NextRand() % uMaxValue; + } + + static bool GcStressTriggerRandom(UIntNative CallsiteIP, HijackType ht, Thread *pCurrentThread) + { + bool bRes = false; + if (ht == htLoop) + { + bRes = GcStressRNG(s_lGcStressFreqDenom , pCurrentThread) < g_pRhConfig->GetGcStressFreqLoop(); + } + else if (ht == htCallsite) + { + bRes = GcStressRNG(s_lGcStressFreqDenom , pCurrentThread) < g_pRhConfig->GetGcStressFreqCallsite(); + } + if (bRes) + { + // if we're about to trigger a GC, track this in s_callsites + GcStressTrackAtIP(CallsiteIP, ht, true); + } + return bRes; + } + +private: + static CrstStatic s_lock; + static UInt32 s_lGcStressRNGSeed; + static UInt32 s_lGcStressFreqDenom; + static volatile InitState s_initState; + +public: + static CallsiteCountSHash s_callsites; // exposed to the DAC +}; + +// public interface: + +CallsiteCountSHash GcStressControl::s_callsites; +CrstStatic GcStressControl::s_lock; +UInt32 GcStressControl::s_lGcStressRNGSeed = 0; +UInt32 GcStressControl::s_lGcStressFreqDenom = 0; +volatile GcStressControl::InitState GcStressControl::s_initState = GcStressControl::isNotInited; + +GPTR_IMPL_INIT(CallsiteCountSHash, g_pCallsites, &GcStressControl::s_callsites); + +bool ShouldHijackForGcStress(UIntNative CallsiteIP, HijackType ht) +{ + return GcStressControl::ShouldHijack(CallsiteIP, ht); +} + +#endif // FEATURE_GC_STRESS & !DACCESS_COMPILE + + diff --git a/src/coreclr/src/nativeaot/Runtime/GcStressControl.h b/src/coreclr/src/nativeaot/Runtime/GcStressControl.h new file mode 100644 index 0000000000000..3d564d7a6ef0f --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/GcStressControl.h @@ -0,0 +1,51 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +#ifndef __GcStressControl_h__ +#define __GcStressControl_h__ + + +enum HijackType { htLoop, htCallsite }; +bool ShouldHijackForGcStress(UIntNative CallsiteIP, HijackType ht); + + +enum GcStressThrottleMode { + gcstm_TriggerAlways = 0x0000, // trigger a GC every time we hit a GC safe point + gcstm_TriggerOnFirstHit = 0x0001, // trigger a GC the first time a GC safe point is hit + gcstm_TriggerRandom = 0x0002, // trigger a GC randomly, as defined by GcStressFreqCallsite/GcStressFreqLoop/GcStressSeed +}; + +struct CallsiteCountEntry +{ + UIntNative callsiteIP; + UIntNative countHit; + UIntNative countForced; + HijackType ht; +}; + +typedef DPTR(CallsiteCountEntry) PTR_CallsiteCountEntry; + +class CallsiteCountTraits: public NoRemoveSHashTraits< DefaultSHashTraits < CallsiteCountEntry > > +{ +public: + typedef UIntNative key_t; + + static UIntNative GetKey(const CallsiteCountEntry & e) { return e.callsiteIP; } + + static count_t Hash(UIntNative k) + { return (count_t) k; } + + static bool Equals(UIntNative k1, UIntNative k2) + { return k1 == k2; } + + static CallsiteCountEntry Null() + { CallsiteCountEntry e; e.callsiteIP = 0; return e; } + + static bool IsNull(const CallsiteCountEntry & e) + { return e.callsiteIP == 0; } +}; + +typedef SHash < CallsiteCountTraits > CallsiteCountSHash; +typedef DPTR(CallsiteCountSHash) PTR_CallsiteCountSHash; + + +#endif // __GcStressControl_h__ diff --git a/src/coreclr/src/nativeaot/Runtime/HandleTableHelpers.cpp b/src/coreclr/src/nativeaot/Runtime/HandleTableHelpers.cpp new file mode 100644 index 0000000000000..83cfe0e0c1431 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/HandleTableHelpers.cpp @@ -0,0 +1,83 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// +// Helper functions that are p/invoked from redhawkm in order to expose handle table functionality to managed +// code. These p/invokes are special in that the handle table code requires we remain in co-operative mode +// (since these routines mutate the handle tables which are also accessed during garbage collections). The +// binder has special knowledge of these methods and doesn't generate the normal code to transition out of the +// runtime prior to the call. +// +#include "common.h" +#include "gcenv.h" +#include "objecthandle.h" +#include "RestrictedCallouts.h" +#include "gchandleutilities.h" + + +COOP_PINVOKE_HELPER(OBJECTHANDLE, RhpHandleAlloc, (Object *pObject, int type)) +{ + return GCHandleUtilities::GetGCHandleManager()->GetGlobalHandleStore()->CreateHandleOfType(pObject, (HandleType)type); +} + +COOP_PINVOKE_HELPER(OBJECTHANDLE, RhpHandleAllocDependent, (Object *pPrimary, Object *pSecondary)) +{ + return GCHandleUtilities::GetGCHandleManager()->GetGlobalHandleStore()->CreateDependentHandle(pPrimary, pSecondary); +} + +COOP_PINVOKE_HELPER(void, RhHandleFree, (OBJECTHANDLE handle)) +{ + GCHandleUtilities::GetGCHandleManager()->DestroyHandleOfUnknownType(handle); +} + +COOP_PINVOKE_HELPER(Object *, RhHandleGet, (OBJECTHANDLE handle)) +{ + return ObjectFromHandle(handle); +} + +COOP_PINVOKE_HELPER(Object *, RhHandleGetDependent, (OBJECTHANDLE handle, Object **ppSecondary)) +{ + Object *pPrimary = ObjectFromHandle(handle); + *ppSecondary = (pPrimary != NULL) ? GetDependentHandleSecondary(handle) : NULL; + return pPrimary; +} + +COOP_PINVOKE_HELPER(void, RhHandleSetDependentSecondary, (OBJECTHANDLE handle, Object *pSecondary)) +{ + SetDependentHandleSecondary(handle, pSecondary); +} + +COOP_PINVOKE_HELPER(void, RhHandleSet, (OBJECTHANDLE handle, Object *pObject)) +{ + GCHandleUtilities::GetGCHandleManager()->StoreObjectInHandle(handle, pObject); +} + +COOP_PINVOKE_HELPER(Boolean, RhRegisterRefCountedHandleCallback, (void * pCallout, EEType * pTypeFilter)) +{ + return RestrictedCallouts::RegisterRefCountedHandleCallback(pCallout, pTypeFilter); +} + +COOP_PINVOKE_HELPER(void, RhUnregisterRefCountedHandleCallback, (void * pCallout, EEType * pTypeFilter)) +{ + RestrictedCallouts::UnregisterRefCountedHandleCallback(pCallout, pTypeFilter); +} + +COOP_PINVOKE_HELPER(OBJECTHANDLE, RhpHandleAllocVariable, (Object * pObject, UInt32 type)) +{ + return GCHandleUtilities::GetGCHandleManager()->GetGlobalHandleStore()->CreateHandleWithExtraInfo(pObject, HNDTYPE_VARIABLE, (void*)((uintptr_t)type)); +} + +COOP_PINVOKE_HELPER(UInt32, RhHandleGetVariableType, (OBJECTHANDLE handle)) +{ + return GetVariableHandleType(handle); +} + +COOP_PINVOKE_HELPER(void, RhHandleSetVariableType, (OBJECTHANDLE handle, UInt32 type)) +{ + UpdateVariableHandleType(handle, type); +} + +COOP_PINVOKE_HELPER(UInt32, RhHandleCompareExchangeVariableType, (OBJECTHANDLE handle, UInt32 oldType, UInt32 newType)) +{ + return CompareExchangeVariableHandleType(handle, oldType, newType); +} diff --git a/src/coreclr/src/nativeaot/Runtime/ICodeManager.h b/src/coreclr/src/nativeaot/Runtime/ICodeManager.h new file mode 100644 index 0000000000000..2c92eea53cfd2 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/ICodeManager.h @@ -0,0 +1,165 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +#pragma once + +#define ICODEMANAGER_INCLUDED + +// TODO: Debugger/DAC support (look for TODO: JIT) + +struct REGDISPLAY; + +#define GC_CALL_INTERIOR 0x1 +#define GC_CALL_PINNED 0x2 +#define GC_CALL_CHECK_APP_DOMAIN 0x4 +#define GC_CALL_STATIC 0x8 + +typedef void (*GCEnumCallback)( + void * hCallback, // callback data + PTR_PTR_VOID pObject, // address of object-reference we are reporting + UInt32 flags // is this a pinned and/or interior pointer +); + +struct GCEnumContext +{ + GCEnumCallback pCallback; +}; + +// All values but GCRK_Unknown must correspond to MethodReturnKind enumeration in gcinfo.h +enum GCRefKind : unsigned char +{ + GCRK_Scalar = 0x00, + GCRK_Object = 0x01, + GCRK_Byref = 0x02, +#ifdef TARGET_ARM64 + // Composite return kinds for value types returned in two registers (encoded with two bits per register) + GCRK_Scalar_Obj = (GCRK_Object << 2) | GCRK_Scalar, + GCRK_Obj_Obj = (GCRK_Object << 2) | GCRK_Object, + GCRK_Byref_Obj = (GCRK_Object << 2) | GCRK_Byref, + GCRK_Scalar_Byref = (GCRK_Byref << 2) | GCRK_Scalar, + GCRK_Obj_Byref = (GCRK_Byref << 2) | GCRK_Object, + GCRK_Byref_Byref = (GCRK_Byref << 2) | GCRK_Byref, + + GCRK_LastValid = GCRK_Byref_Byref, +#else // TARGET_ARM64 + GCRK_LastValid = GCRK_Byref, +#endif // TARGET_ARM64 + GCRK_Unknown = 0xFF, +}; + +#ifdef TARGET_ARM64 +// Extract individual GCRefKind components from a composite return kind +inline GCRefKind ExtractReg0ReturnKind(GCRefKind returnKind) +{ + ASSERT(returnKind <= GCRK_LastValid); + return (GCRefKind)(returnKind & (GCRK_Object | GCRK_Byref)); +} + +inline GCRefKind ExtractReg1ReturnKind(GCRefKind returnKind) +{ + ASSERT(returnKind <= GCRK_LastValid); + return (GCRefKind)(returnKind >> 2); +} +#endif // TARGET_ARM64 + +// +// MethodInfo is placeholder type used to allocate space for MethodInfo. Maximum size +// of the actual method should be less or equal to the placeholder size. +// It avoids memory allocation during stackwalk. +// +class MethodInfo +{ + TADDR dummyPtrs[5]; + Int32 dummyInts[8]; +}; + +class EHEnumState +{ + TADDR dummyPtrs[2]; + Int32 dummyInts[2]; +}; + +enum EHClauseKind +{ + EH_CLAUSE_TYPED = 0, + EH_CLAUSE_FAULT = 1, + EH_CLAUSE_FILTER = 2, + EH_CLAUSE_UNUSED = 3, +}; + +struct EHClause +{ + EHClauseKind m_clauseKind; + UInt32 m_tryStartOffset; + UInt32 m_tryEndOffset; + UInt8* m_filterAddress; + UInt8* m_handlerAddress; + void* m_pTargetType; +}; + +// Note: make sure you change the def in System\Runtime\InternalCalls.cs if you change this! +enum class ClasslibFunctionId +{ + GetRuntimeException = 0, + FailFast = 1, + UnhandledExceptionHandler = 2, + AppendExceptionStackFrame = 3, + CheckStaticClassConstruction = 4, + GetSystemArrayEEType = 5, + OnFirstChanceException = 6, + DebugFuncEvalHelper = 7, + DebugFuncEvalAbortHelper = 8, +}; + +enum class AssociatedDataFlags : unsigned char +{ + None = 0, + HasUnboxingStubTarget = 1, +}; + +class ICodeManager +{ +public: + virtual bool FindMethodInfo(PTR_VOID ControlPC, + MethodInfo * pMethodInfoOut) = 0; + + virtual bool IsFunclet(MethodInfo * pMethodInfo) = 0; + + virtual PTR_VOID GetFramePointer(MethodInfo * pMethodInfo, + REGDISPLAY * pRegisterSet) = 0; + + virtual void EnumGcRefs(MethodInfo * pMethodInfo, + PTR_VOID safePointAddress, + REGDISPLAY * pRegisterSet, + GCEnumContext * hCallback) = 0; + + virtual bool UnwindStackFrame(MethodInfo * pMethodInfo, + REGDISPLAY * pRegisterSet, // in/out + PTR_VOID * ppPreviousTransitionFrame) = 0; // out + + virtual UIntNative GetConservativeUpperBoundForOutgoingArgs(MethodInfo * pMethodInfo, + REGDISPLAY * pRegisterSet) = 0; + + virtual bool GetReturnAddressHijackInfo(MethodInfo * pMethodInfo, + REGDISPLAY * pRegisterSet, // in + PTR_PTR_VOID * ppvRetAddrLocation, // out + GCRefKind * pRetValueKind) = 0; // out + + virtual void UnsynchronizedHijackMethodLoops(MethodInfo * pMethodInfo) = 0; + + virtual PTR_VOID RemapHardwareFaultToGCSafePoint(MethodInfo * pMethodInfo, PTR_VOID controlPC) = 0; + + virtual bool EHEnumInit(MethodInfo * pMethodInfo, PTR_VOID * pMethodStartAddress, EHEnumState * pEHEnumState) = 0; + + virtual bool EHEnumNext(EHEnumState * pEHEnumState, EHClause * pEHClause) = 0; + + virtual PTR_VOID GetMethodStartAddress(MethodInfo * pMethodInfo) = 0; + + virtual PTR_VOID GetOsModuleHandle() = 0; + + virtual void * GetClasslibFunction(ClasslibFunctionId functionId) = 0; + + // Returns any custom data attached to the method. Format: + // AssociatedDataFlags // 1 byte. Flags describing the data stored + // Data (stream of bytes) // Variable size (depending on flags). Custom data associated with method + virtual PTR_VOID GetAssociatedData(PTR_VOID ControlPC) = 0; +}; diff --git a/src/coreclr/src/nativeaot/Runtime/MathHelpers.cpp b/src/coreclr/src/nativeaot/Runtime/MathHelpers.cpp new file mode 100644 index 0000000000000..09fae4a18c15e --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/MathHelpers.cpp @@ -0,0 +1,179 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +#include "common.h" +#include "CommonTypes.h" +#include "CommonMacros.h" +#include "rhassert.h" + +// +// Floating point and 64-bit integer math helpers. +// + +EXTERN_C REDHAWK_API UInt64 REDHAWK_CALLCONV RhpDbl2ULng(double val) +{ + return((UInt64)val); +} + +#undef min +#undef max +#include + +EXTERN_C REDHAWK_API float REDHAWK_CALLCONV RhpFltRem(float dividend, float divisor) +{ + // + // From the ECMA standard: + // + // If [divisor] is zero or [dividend] is infinity + // the result is NaN. + // If [divisor] is infinity, + // the result is [dividend] (negated for -infinity***). + // + // ***"negated for -infinity" has been removed from the spec + // + + if (divisor==0 || !std::isfinite(dividend)) + { + return -nanf(0); + } + else if (!std::isfinite(divisor) && !std::isnan(divisor)) + { + return dividend; + } + // else... + return fmodf(dividend,divisor); +} + +EXTERN_C REDHAWK_API double REDHAWK_CALLCONV RhpDblRem(double dividend, double divisor) +{ + // + // From the ECMA standard: + // + // If [divisor] is zero or [dividend] is infinity + // the result is NaN. + // If [divisor] is infinity, + // the result is [dividend] (negated for -infinity***). + // + // ***"negated for -infinity" has been removed from the spec + // + if (divisor==0 || !std::isfinite(dividend)) + { + return -nan(0); + } + else if (!std::isfinite(divisor) && !std::isnan(divisor)) + { + return dividend; + } + // else... + return(fmod(dividend,divisor)); +} + +EXTERN_C REDHAWK_API double REDHAWK_CALLCONV RhpDblRound(double value) +{ + return round(value); +} + +EXTERN_C REDHAWK_API float REDHAWK_CALLCONV RhpFltRound(float value) +{ + return roundf(value); +} + +#ifdef HOST_ARM +EXTERN_C REDHAWK_API Int32 REDHAWK_CALLCONV RhpIDiv(Int32 i, Int32 j) +{ + ASSERT(j && "Divide by zero!"); + return i / j; +} + +EXTERN_C REDHAWK_API UInt32 REDHAWK_CALLCONV RhpUDiv(UInt32 i, UInt32 j) +{ + ASSERT(j && "Divide by zero!"); + return i / j; +} + +EXTERN_C REDHAWK_API Int64 REDHAWK_CALLCONV RhpLDiv(Int64 i, Int64 j) +{ + ASSERT(j && "Divide by zero!"); + return i / j; +} + +EXTERN_C REDHAWK_API UInt64 REDHAWK_CALLCONV RhpULDiv(UInt64 i, UInt64 j) +{ + ASSERT(j && "Divide by zero!"); + return i / j; +} + +EXTERN_C REDHAWK_API Int32 REDHAWK_CALLCONV RhpIMod(Int32 i, Int32 j) +{ + ASSERT(j && "Divide by zero!"); + return i % j; +} + +EXTERN_C REDHAWK_API UInt32 REDHAWK_CALLCONV RhpUMod(UInt32 i, UInt32 j) +{ + ASSERT(j && "Divide by zero!"); + return i % j; +} + +EXTERN_C REDHAWK_API Int64 REDHAWK_CALLCONV RhpLMod(Int64 i, Int64 j) +{ + ASSERT(j && "Divide by zero!"); + return i % j; +} + +EXTERN_C REDHAWK_API UInt64 REDHAWK_CALLCONV RhpULMod(UInt64 i, UInt64 j) +{ + ASSERT(j && "Divide by zero!"); + return i % j; +} + +EXTERN_C REDHAWK_API Int64 REDHAWK_CALLCONV RhpLMul(Int64 i, Int64 j) +{ + return i * j; +} + +EXTERN_C REDHAWK_API UInt64 REDHAWK_CALLCONV RhpULMul(UInt64 i, UInt64 j) +{ + return i * j; +} + +EXTERN_C REDHAWK_API UInt64 REDHAWK_CALLCONV RhpLRsz(UInt64 i, Int32 j) +{ + return i >> j; +} + +EXTERN_C REDHAWK_API Int64 REDHAWK_CALLCONV RhpLRsh(Int64 i, Int32 j) +{ + return i >> j; +} + +EXTERN_C REDHAWK_API Int64 REDHAWK_CALLCONV RhpLLsh(Int64 i, Int32 j) +{ + return i << j; +} + +EXTERN_C REDHAWK_API Int64 REDHAWK_CALLCONV RhpDbl2Lng(double val) +{ + return (Int64)val; +} + +EXTERN_C REDHAWK_API Int32 REDHAWK_CALLCONV RhpDbl2Int(double val) +{ + return (Int32)val; +} + +EXTERN_C REDHAWK_API UInt32 REDHAWK_CALLCONV RhpDbl2UInt(double val) +{ + return (UInt32)val; +} + +EXTERN_C REDHAWK_API double REDHAWK_CALLCONV RhpLng2Dbl(Int64 val) +{ + return (double)val; +} + +EXTERN_C REDHAWK_API double REDHAWK_CALLCONV RhpULng2Dbl(UInt64 val) +{ + return (double)val; +} + +#endif // HOST_ARM diff --git a/src/coreclr/src/nativeaot/Runtime/MiscHelpers.cpp b/src/coreclr/src/nativeaot/Runtime/MiscHelpers.cpp new file mode 100644 index 0000000000000..3b90f0f6d0f35 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/MiscHelpers.cpp @@ -0,0 +1,541 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// +// Miscellaneous unmanaged helpers called by managed code. +// + +#include "common.h" +#include "CommonTypes.h" +#include "CommonMacros.h" +#include "daccess.h" +#include "PalRedhawkCommon.h" +#include "PalRedhawk.h" +#include "rhassert.h" +#include "slist.h" +#include "holder.h" +#include "Crst.h" +#include "rhbinder.h" +#include "RWLock.h" +#include "RuntimeInstance.h" +#include "regdisplay.h" +#include "gcrhinterface.h" +#include "varint.h" +#include "StackFrameIterator.h" +#include "thread.h" +#include "event.h" +#include "threadstore.h" +#include "threadstore.inl" +#include "thread.inl" +#include "gcrhinterface.h" +#include "shash.h" +#include "TypeManager.h" +#include "eetype.h" +#include "ObjectLayout.h" +#include "slist.inl" +#include "eetype.inl" +#include "CommonMacros.inl" +#include "volatile.h" +#include "GCMemoryHelpers.h" +#include "GCMemoryHelpers.inl" +#include "yieldprocessornormalized.h" + +COOP_PINVOKE_HELPER(void, RhDebugBreak, ()) +{ + PalDebugBreak(); +} + +// Busy spin for the given number of iterations. +COOP_PINVOKE_HELPER(void, RhSpinWait, (Int32 iterations)) +{ + YieldProcessorNormalizationInfo normalizationInfo; + YieldProcessorNormalizedForPreSkylakeCount(normalizationInfo, iterations); +} + +// Yield the cpu to another thread ready to process, if one is available. +EXTERN_C REDHAWK_API UInt32_BOOL __cdecl RhYield() +{ + // This must be called via p/invoke -- it's a wait operation and we don't want to block thread suspension on this. + ASSERT_MSG(!ThreadStore::GetCurrentThread()->IsCurrentThreadInCooperativeMode(), + "You must p/invoke to RhYield"); + + return PalSwitchToThread(); +} + +EXTERN_C REDHAWK_API void __cdecl RhFlushProcessWriteBuffers() +{ + // This must be called via p/invoke -- it's a wait operation and we don't want to block thread suspension on this. + ASSERT_MSG(!ThreadStore::GetCurrentThread()->IsCurrentThreadInCooperativeMode(), + "You must p/invoke to RhFlushProcessWriteBuffers"); + + PalFlushProcessWriteBuffers(); +} + +// Get the list of currently loaded Redhawk modules (as OS HMODULE handles). The caller provides a reference +// to an array of pointer-sized elements and we return the total number of modules currently loaded (whether +// that is less than, equal to or greater than the number of elements in the array). If there are more modules +// loaded than the array will hold then the array is filled to capacity and the caller can tell further +// modules are available based on the return count. It is also possible to call this method without an array, +// in which case just the module count is returned (note that it's still possible for the module count to +// increase between calls to this method). +COOP_PINVOKE_HELPER(UInt32, RhGetLoadedOSModules, (Array * pResultArray)) +{ + // Note that we depend on the fact that this is a COOP helper to make writing into an unpinned array safe. + + // If a result array is passed then it should be an array type with pointer-sized components that are not + // GC-references. + ASSERT(!pResultArray || pResultArray->get_EEType()->IsArray()); + ASSERT(!pResultArray || !pResultArray->get_EEType()->HasReferenceFields()); + ASSERT(!pResultArray || pResultArray->get_EEType()->get_ComponentSize() == sizeof(void*)); + + UInt32 cResultArrayElements = pResultArray ? pResultArray->GetArrayLength() : 0; + HANDLE * pResultElements = pResultArray ? (HANDLE*)(pResultArray + 1) : NULL; + + UInt32 cModules = 0; + + ReaderWriterLock::ReadHolder read(&GetRuntimeInstance()->GetTypeManagerLock()); + + RuntimeInstance::OsModuleList *osModules = GetRuntimeInstance()->GetOsModuleList(); + + for (RuntimeInstance::OsModuleList::Iterator iter = osModules->Begin(); iter != osModules->End(); iter++) + { + if (pResultArray && (cModules < cResultArrayElements)) + pResultElements[cModules] = iter->m_osModule; + cModules++; + } + + return cModules; +} + +COOP_PINVOKE_HELPER(HANDLE, RhGetOSModuleFromPointer, (PTR_VOID pPointerVal)) +{ + ICodeManager * pCodeManager = GetRuntimeInstance()->FindCodeManagerByAddress(pPointerVal); + + if (pCodeManager != NULL) + return (HANDLE)pCodeManager->GetOsModuleHandle(); + + return NULL; +} + +COOP_PINVOKE_HELPER(HANDLE, RhGetOSModuleFromEEType, (EEType * pEEType)) +{ + return pEEType->GetTypeManagerPtr()->AsTypeManager()->GetOsModuleHandle(); +} + +COOP_PINVOKE_HELPER(TypeManagerHandle, RhGetModuleFromEEType, (EEType * pEEType)) +{ + return *pEEType->GetTypeManagerPtr(); +} + +COOP_PINVOKE_HELPER(Boolean, RhFindBlob, (TypeManagerHandle *pTypeManagerHandle, UInt32 blobId, UInt8 ** ppbBlob, UInt32 * pcbBlob)) +{ + TypeManagerHandle typeManagerHandle = *pTypeManagerHandle; + + ReadyToRunSectionType section = + (ReadyToRunSectionType)((UInt32)ReadyToRunSectionType::ReadonlyBlobRegionStart + blobId); + ASSERT(section <= ReadyToRunSectionType::ReadonlyBlobRegionEnd); + + TypeManager* pModule = typeManagerHandle.AsTypeManager(); + + int length; + void* pBlob; + pBlob = pModule->GetModuleSection(section, &length); + + *ppbBlob = (UInt8*)pBlob; + *pcbBlob = (UInt32)length; + + return pBlob != NULL; +} + +// This helper is not called directly but is used by the implementation of RhpCheckCctor to locate the +// CheckStaticClassConstruction classlib callback. It must not trigger a GC. The return address passed points +// to code in the caller's module and can be used in the lookup. +COOP_PINVOKE_HELPER(void *, GetClasslibCCtorCheck, (void * pReturnAddress)) +{ + // Locate the calling module from the context structure address (which is in writable memory in the + // module image). + ICodeManager * pCodeManager = GetRuntimeInstance()->FindCodeManagerByAddress(pReturnAddress); + ASSERT(pCodeManager); + + // Lookup the callback registered by the classlib. + void * pCallback = pCodeManager->GetClasslibFunction(ClasslibFunctionId::CheckStaticClassConstruction); + + // We have no fallback path if we got here but the classlib doesn't implement the callback. + if (pCallback == NULL) + RhFailFast(); + + return pCallback; +} + +COOP_PINVOKE_HELPER(void *, RhGetTargetOfUnboxingAndInstantiatingStub, (void * pUnboxStub)) +{ + return GetRuntimeInstance()->GetTargetOfUnboxingAndInstantiatingStub(pUnboxStub); +} + +#if TARGET_ARM +//***************************************************************************** +// Extract the 16-bit immediate from ARM Thumb2 Instruction (format T2_N) +//***************************************************************************** +static FORCEINLINE UInt16 GetThumb2Imm16(UInt16 * p) +{ + return ((p[0] << 12) & 0xf000) | + ((p[0] << 1) & 0x0800) | + ((p[1] >> 4) & 0x0700) | + ((p[1] >> 0) & 0x00ff); +} + +//***************************************************************************** +// Extract the 32-bit immediate from movw/movt sequence +//***************************************************************************** +inline UInt32 GetThumb2Mov32(UInt16 * p) +{ + // Make sure we are decoding movw/movt sequence + ASSERT((*(p + 0) & 0xFBF0) == 0xF240); + ASSERT((*(p + 2) & 0xFBF0) == 0xF2C0); + + return (UInt32)GetThumb2Imm16(p) + ((UInt32)GetThumb2Imm16(p + 2) << 16); +} + +//***************************************************************************** +// Extract the 24-bit distance from a B/BL instruction +//***************************************************************************** +inline Int32 GetThumb2BlRel24(UInt16 * p) +{ + UInt16 Opcode0 = p[0]; + UInt16 Opcode1 = p[1]; + + UInt32 S = Opcode0 >> 10; + UInt32 J2 = Opcode1 >> 11; + UInt32 J1 = Opcode1 >> 13; + + Int32 ret = + ((S << 24) & 0x1000000) | + (((J1 ^ S ^ 1) << 23) & 0x0800000) | + (((J2 ^ S ^ 1) << 22) & 0x0400000) | + ((Opcode0 << 12) & 0x03FF000) | + ((Opcode1 << 1) & 0x0000FFE); + + // Sign-extend and return + return (ret << 7) >> 7; +} +#endif // TARGET_ARM + +// Given a pointer to code, find out if this points to an import stub +// or unboxing stub, and if so, return the address that stub jumps to +COOP_PINVOKE_HELPER(UInt8 *, RhGetCodeTarget, (UInt8 * pCodeOrg)) +{ + bool unboxingStub = false; + + // First, check the unboxing stubs regions known by the runtime (if any exist) + if (!GetRuntimeInstance()->IsUnboxingStub(pCodeOrg)) + { + return pCodeOrg; + } + +#ifdef TARGET_AMD64 + UInt8 * pCode = pCodeOrg; + + // is this "add rcx/rdi,8"? + if (pCode[0] == 0x48 && + pCode[1] == 0x83 && +#ifdef UNIX_AMD64_ABI + pCode[2] == 0xc7 && +#else + pCode[2] == 0xc1 && +#endif + pCode[3] == 0x08) + { + // unboxing sequence + unboxingStub = true; + pCode += 4; + } + // is this an indirect jump? + if (pCode[0] == 0xff && pCode[1] == 0x25) + { + // normal import stub - dist to IAT cell is relative to the point *after* the instruction + Int32 distToIatCell = *(Int32 *)&pCode[2]; + UInt8 ** pIatCell = (UInt8 **)(pCode + 6 + distToIatCell); + return *pIatCell; + } + // is this an unboxing stub followed by a relative jump? + else if (unboxingStub && pCode[0] == 0xe9) + { + // relative jump - dist is relative to the point *after* the instruction + Int32 distToTarget = *(Int32 *)&pCode[1]; + UInt8 * target = pCode + 5 + distToTarget; + return target; + } + +#elif TARGET_X86 + UInt8 * pCode = pCodeOrg; + + // is this "add ecx,4"? + if (pCode[0] == 0x83 && pCode[1] == 0xc1 && pCode[2] == 0x04) + { + // unboxing sequence + unboxingStub = true; + pCode += 3; + } + // is this an indirect jump? + if (pCode[0] == 0xff && pCode[1] == 0x25) + { + // normal import stub - address of IAT follows + UInt8 **pIatCell = *(UInt8 ***)&pCode[2]; + return *pIatCell; + } + // is this an unboxing stub followed by a relative jump? + else if (unboxingStub && pCode[0] == 0xe9) + { + // relative jump - dist is relative to the point *after* the instruction + Int32 distToTarget = *(Int32 *)&pCode[1]; + UInt8 * pTarget = pCode + 5 + distToTarget; + return pTarget; + } + +#elif TARGET_ARM + UInt16 * pCode = (UInt16 *)((size_t)pCodeOrg & ~THUMB_CODE); + // is this "adds r0,4"? + if (pCode[0] == 0x3004) + { + // unboxing sequence + unboxingStub = true; + pCode += 1; + } + // is this movw r12,#imm16; movt r12,#imm16; ldr pc,[r12] + // or movw r12,#imm16; movt r12,#imm16; bx r12 + if ((pCode[0] & 0xfbf0) == 0xf240 && (pCode[1] & 0x0f00) == 0x0c00 + && (pCode[2] & 0xfbf0) == 0xf2c0 && (pCode[3] & 0x0f00) == 0x0c00 + && ((pCode[4] == 0xf8dc && pCode[5] == 0xf000) || pCode[4] == 0x4760)) + { + if (pCode[4] == 0xf8dc && pCode[5] == 0xf000) + { + // ldr pc,[r12] + UInt8 **pIatCell = (UInt8 **)GetThumb2Mov32(pCode); + return *pIatCell; + } + else if (pCode[4] == 0x4760) + { + // bx r12 + return (UInt8 *)GetThumb2Mov32(pCode); + } + } + // is this an unboxing stub followed by a relative jump? + else if (unboxingStub && (pCode[0] & 0xf800) == 0xf000 && (pCode[1] & 0xd000) == 0x9000) + { + Int32 distToTarget = GetThumb2BlRel24(pCode); + UInt8 * pTarget = (UInt8 *)(pCode + 2) + distToTarget + THUMB_CODE; + return (UInt8 *)pTarget; + } + +#elif TARGET_ARM64 + UInt32 * pCode = (UInt32 *)pCodeOrg; + // is this "add x0,x0,#8"? + if (pCode[0] == 0x91002000) + { + // unboxing sequence + unboxingStub = true; + pCode++; + } + // is this an indirect jump? + // adrp xip0,#imm21; ldr xip0,[xip0,#imm12]; br xip0 + if ((pCode[0] & 0x9f00001f) == 0x90000010 && + (pCode[1] & 0xffc003ff) == 0xf9400210 && + pCode[2] == 0xd61f0200) + { + // normal import stub - dist to IAT cell is relative to (PC & ~0xfff) + // adrp: imm = SignExtend(immhi:immlo:Zeros(12), 64); + Int64 distToIatCell = (((((Int64)pCode[0] & ~0x1f) << 40) >> 31) | ((pCode[0] >> 17) & 0x3000)); + // ldr: offset = LSL(ZeroExtend(imm12, 64), 3); + distToIatCell += (pCode[1] >> 7) & 0x7ff8; + UInt8 ** pIatCell = (UInt8 **)(((Int64)pCode & ~0xfff) + distToIatCell); + return *pIatCell; + } + // is this an unboxing stub followed by a relative jump? + else if (unboxingStub && (pCode[0] >> 26) == 0x5) + { + // relative jump - dist is relative to the instruction + // offset = SignExtend(imm26:'00', 64); + Int64 distToTarget = ((Int64)pCode[0] << 38) >> 36; + return (UInt8 *)pCode + distToTarget; + } +#else + UNREFERENCED_PARAMETER(unboxingStub); + PORTABILITY_ASSERT("RhGetCodeTarget"); +#endif + + return pCodeOrg; +} + +// +// Return true if the array slice is valid +// +FORCEINLINE bool CheckArraySlice(Array * pArray, Int32 index, Int32 length) +{ + Int32 arrayLength = pArray->GetArrayLength(); + + return (0 <= index) && (index <= arrayLength) && + (0 <= length) && (length <= arrayLength) && + (length <= arrayLength - index); +} + +// +// This function handles all cases of Array.Copy that do not require conversions or casting. It returns false if the copy cannot be performed, leaving +// the handling of the complex cases or throwing appropriate exception to the higher level framework. +// +COOP_PINVOKE_HELPER(Boolean, RhpArrayCopy, (Array * pSourceArray, Int32 sourceIndex, Array * pDestinationArray, Int32 destinationIndex, Int32 length)) +{ + if (pSourceArray == NULL || pDestinationArray == NULL) + return false; + + EEType* pArrayType = pSourceArray->get_EEType(); + EEType* pDestinationArrayType = pDestinationArray->get_EEType(); + if (pArrayType != pDestinationArrayType) + { + if (!pArrayType->IsEquivalentTo(pDestinationArrayType)) + return false; + } + + size_t componentSize = pArrayType->get_ComponentSize(); + if (componentSize == 0) // Not an array + return false; + + if (!CheckArraySlice(pSourceArray, sourceIndex, length)) + return false; + + if (!CheckArraySlice(pDestinationArray, destinationIndex, length)) + return false; + + if (length == 0) + return true; + + UInt8 * pSourceData = (UInt8 *)pSourceArray->GetArrayData() + sourceIndex * componentSize; + UInt8 * pDestinationData = (UInt8 *)pDestinationArray->GetArrayData() + destinationIndex * componentSize; + size_t size = length * componentSize; + + if (pArrayType->HasReferenceFields()) + { + if (pDestinationData <= pSourceData || pSourceData + size <= pDestinationData) + InlineForwardGCSafeCopy(pDestinationData, pSourceData, size); + else + InlineBackwardGCSafeCopy(pDestinationData, pSourceData, size); + + InlinedBulkWriteBarrier(pDestinationData, size); + } + else + { + memmove(pDestinationData, pSourceData, size); + } + + return true; +} + +// +// This function handles all cases of Array.Clear that do not require conversions. It returns false if the operation cannot be performed, leaving +// the handling of the complex cases or throwing appropriate exception to the higher level framework. It is only allowed to return false for illegal +// calls as the BCL side has fallback for "complex cases" only. +// +COOP_PINVOKE_HELPER(Boolean, RhpArrayClear, (Array * pArray, Int32 index, Int32 length)) +{ + if (pArray == NULL) + return false; + + EEType* pArrayType = pArray->get_EEType(); + + size_t componentSize = pArrayType->get_ComponentSize(); + if (componentSize == 0) // Not an array + return false; + + if (!CheckArraySlice(pArray, index, length)) + return false; + + if (length == 0) + return true; + + InlineGCSafeFillMemory((UInt8 *)pArray->GetArrayData() + index * componentSize, length * componentSize, 0); + + return true; +} + +// Get the universal transition thunk. If the universal transition stub is called through +// the normal PE static linkage model, a jump stub would be used which may interfere with +// the custom calling convention of the universal transition thunk. So instead, a special +// api just for getting the thunk address is needed. +// TODO: On ARM this may still result in a jump stub that trashes R12. Determine if anything +// needs to be done about that when we implement the stub for ARM. +extern "C" void RhpUniversalTransition(); +COOP_PINVOKE_HELPER(void*, RhGetUniversalTransitionThunk, ()) +{ + return (void*)RhpUniversalTransition; +} + +extern CrstStatic g_CastCacheLock; + +EXTERN_C REDHAWK_API void __cdecl RhpAcquireCastCacheLock() +{ + g_CastCacheLock.Enter(); +} + +EXTERN_C REDHAWK_API void __cdecl RhpReleaseCastCacheLock() +{ + g_CastCacheLock.Leave(); +} + +extern CrstStatic g_ThunkPoolLock; + +EXTERN_C REDHAWK_API void __cdecl RhpAcquireThunkPoolLock() +{ + g_ThunkPoolLock.Enter(); +} + +EXTERN_C REDHAWK_API void __cdecl RhpReleaseThunkPoolLock() +{ + g_ThunkPoolLock.Leave(); +} + +EXTERN_C Int32 __cdecl RhpCalculateStackTraceWorker(void* pOutputBuffer, UInt32 outputBufferLength); + +EXTERN_C REDHAWK_API Int32 __cdecl RhpGetCurrentThreadStackTrace(void* pOutputBuffer, UInt32 outputBufferLength) +{ + // This must be called via p/invoke rather than RuntimeImport to make the stack crawlable. + + ThreadStore::GetCurrentThread()->SetupHackPInvokeTunnel(); + + return RhpCalculateStackTraceWorker(pOutputBuffer, outputBufferLength); +} + +COOP_PINVOKE_HELPER(void*, RhpRegisterFrozenSegment, (void* pSegmentStart, size_t length)) +{ + return RedhawkGCInterface::RegisterFrozenSegment(pSegmentStart, length); +} + +COOP_PINVOKE_HELPER(void, RhpUnregisterFrozenSegment, (void* pSegmentHandle)) +{ + RedhawkGCInterface::UnregisterFrozenSegment((GcSegmentHandle)pSegmentHandle); +} + +COOP_PINVOKE_HELPER(void*, RhpGetModuleSection, (TypeManagerHandle *pModule, Int32 headerId, Int32* length)) +{ + return pModule->AsTypeManager()->GetModuleSection((ReadyToRunSectionType)headerId, length); +} + +COOP_PINVOKE_HELPER(void, RhGetCurrentThreadStackBounds, (PTR_VOID * ppStackLow, PTR_VOID * ppStackHigh)) +{ + ThreadStore::GetCurrentThread()->GetStackBounds(ppStackLow, ppStackHigh); +} + +#ifdef TARGET_UNIX + +// Function to call when a thread is detached from the runtime +ThreadExitCallback g_threadExitCallback; + +COOP_PINVOKE_HELPER(void, RhSetThreadExitCallback, (void * pCallback)) +{ + g_threadExitCallback = (ThreadExitCallback)pCallback; +} + +#endif // TARGET_UNIX + +COOP_PINVOKE_HELPER(Int32, RhGetProcessCpuCount, ()) +{ + return PalGetProcessCpuCount(); +} diff --git a/src/coreclr/src/nativeaot/Runtime/ObjectLayout.cpp b/src/coreclr/src/nativeaot/Runtime/ObjectLayout.cpp new file mode 100644 index 0000000000000..ce3328c52c9d4 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/ObjectLayout.cpp @@ -0,0 +1,69 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// +// Implementations of functions dealing with object layout related types. +// +#include "common.h" +#include "CommonTypes.h" +#include "CommonMacros.h" +#include "daccess.h" +#include "rhassert.h" +#include "RedhawkWarnings.h" +#include "PalRedhawkCommon.h" +#include "PalRedhawk.h" +#include "TargetPtrs.h" +#include "eetype.h" +#include "ObjectLayout.h" + +#ifndef DACCESS_COMPILE +void Object::InitEEType(EEType * pEEType) +{ + ASSERT(NULL == m_pEEType); + m_pEEType = pEEType; +} +#endif + +UInt32 Array::GetArrayLength() +{ + return m_Length; +} + +void* Array::GetArrayData() +{ + UInt8* pData = (UInt8*)this; + pData += (get_EEType()->get_BaseSize() - sizeof(ObjHeader)); + return pData; +} + +#ifndef DACCESS_COMPILE +void Array::InitArrayLength(UInt32 length) +{ + m_Length = length; +} + +void ObjHeader::SetBit(UInt32 uBit) +{ + PalInterlockedOr(&m_uSyncBlockValue, uBit); +} + +void ObjHeader::ClrBit(UInt32 uBit) +{ + PalInterlockedAnd(&m_uSyncBlockValue, ~uBit); +} + +size_t Object::GetSize() +{ + EEType * pEEType = get_EEType(); + + // strings have component size2, all other non-arrays should have 0 + ASSERT(( pEEType->get_ComponentSize() <= 2) || pEEType->IsArray()); + + size_t s = pEEType->get_BaseSize(); + UInt16 componentSize = pEEType->get_ComponentSize(); + if (componentSize > 0) + s += ((Array*)this)->GetArrayLength() * componentSize; + return s; +} + +#endif diff --git a/src/coreclr/src/nativeaot/Runtime/ObjectLayout.h b/src/coreclr/src/nativeaot/Runtime/ObjectLayout.h new file mode 100644 index 0000000000000..c3924a2f2c8c5 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/ObjectLayout.h @@ -0,0 +1,129 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// +// Low-level types describing GC object layouts. +// + +// Bits stolen from the sync block index that the GC/HandleTable knows about (currently these are at the same +// positions as the mainline runtime but we can change this below when it becomes apparent how Redhawk will +// handle sync blocks). +#define BIT_SBLK_GC_RESERVE 0x20000000 +#define BIT_SBLK_FINALIZER_RUN 0x40000000 + +// The sync block index header (small structure that immediately precedes every object in the GC heap). Only +// the GC uses this so far, and only to store a couple of bits of information. +class ObjHeader +{ +private: +#if defined(HOST_64BIT) + UInt32 m_uAlignpad; +#endif // HOST_64BIT + UInt32 m_uSyncBlockValue; + +public: + UInt32 GetBits() { return m_uSyncBlockValue; } + void SetBit(UInt32 uBit); + void ClrBit(UInt32 uBit); + void SetGCBit() { m_uSyncBlockValue |= BIT_SBLK_GC_RESERVE; } + void ClrGCBit() { m_uSyncBlockValue &= ~BIT_SBLK_GC_RESERVE; } +}; + +//------------------------------------------------------------------------------------------------- +static UIntNative const SYNC_BLOCK_SKEW = sizeof(void *); + +class EEType; +typedef DPTR(class EEType) PTR_EEType; +class MethodTable; + +//------------------------------------------------------------------------------------------------- +class Object +{ + friend class AsmOffsets; + + PTR_EEType m_pEEType; +public: + EEType * get_EEType() const + { return m_pEEType; } + EEType * get_SafeEEType() const + { return dac_cast((dac_cast(m_pEEType)) & ~((UIntNative)3)); } + ObjHeader * GetHeader() { return dac_cast(dac_cast(this) - SYNC_BLOCK_SKEW); } +#ifndef DACCESS_COMPILE + void set_EEType(EEType * pEEType) + { m_pEEType = pEEType; } + void InitEEType(EEType * pEEType); + + size_t GetSize(); +#endif + + // + // Adapter methods for GC code so that GC and runtime code can use the same type. + // These methods are deprecated -- only use from existing GC code. + // + MethodTable * RawGetMethodTable() const + { + return (MethodTable*)get_EEType(); + } + MethodTable * GetGCSafeMethodTable() const + { + return (MethodTable *)get_SafeEEType(); + } + void RawSetMethodTable(MethodTable * pMT) + { + m_pEEType = PTR_EEType((EEType *)pMT); + } + ////// End adaptor methods +}; +typedef DPTR(Object) PTR_Object; +typedef DPTR(PTR_Object) PTR_PTR_Object; + +//------------------------------------------------------------------------------------------------- +static UIntNative const MIN_OBJECT_SIZE = (2 * sizeof(void*)) + sizeof(ObjHeader); + +//------------------------------------------------------------------------------------------------- +static UIntNative const REFERENCE_SIZE = sizeof(Object *); + +//------------------------------------------------------------------------------------------------- +class Array : public Object +{ + friend class ArrayBase; + friend class AsmOffsets; + + UInt32 m_Length; +#if defined(HOST_64BIT) + UInt32 m_uAlignpad; +#endif // HOST_64BIT +public: + UInt32 GetArrayLength(); + void InitArrayLength(UInt32 length); + void* GetArrayData(); +}; +typedef DPTR(Array) PTR_Array; + +//------------------------------------------------------------------------------------------------- +class String : public Object +{ + friend class AsmOffsets; + friend class StringConstants; + + UInt32 m_Length; + UInt16 m_FirstChar; +}; +typedef DPTR(String) PTR_String; + +//------------------------------------------------------------------------------------------------- +class StringConstants +{ +public: + static UIntNative const ComponentSize = sizeof(((String*)0)->m_FirstChar); + static UIntNative const BaseSize = sizeof(ObjHeader) + offsetof(String, m_FirstChar) + ComponentSize; +}; + +//------------------------------------------------------------------------------------------------- +static UIntNative const STRING_COMPONENT_SIZE = StringConstants::ComponentSize; + +//------------------------------------------------------------------------------------------------- +static UIntNative const STRING_BASE_SIZE = StringConstants::BaseSize; + +//------------------------------------------------------------------------------------------------- +static UIntNative const MAX_STRING_LENGTH = 0x3FFFFFDF; diff --git a/src/coreclr/src/nativeaot/Runtime/OptionalFieldsRuntime.cpp b/src/coreclr/src/nativeaot/Runtime/OptionalFieldsRuntime.cpp new file mode 100644 index 0000000000000..d8b10a35fea2a --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/OptionalFieldsRuntime.cpp @@ -0,0 +1,71 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// +// Implementations of methods of OptionalFields which are used only at runtime (i.e. reading field values). +// +#include "common.h" +#include "CommonTypes.h" +#include "CommonMacros.h" +#include "daccess.h" +#include "PalRedhawkCommon.h" +#include "PalRedhawk.h" +#include "rhassert.h" +#include "rhbinder.h" +#include "eetype.h" +#include "ObjectLayout.h" +#include "varint.h" + +// Reads the field type from the current byte of the stream and indicates whether this represents the last +// field. +/*static*/ OptionalFieldTag OptionalFields::DecodeFieldTag(PTR_UInt8 * ppFields, bool *pfLastField) +{ + UInt8 tagByte; + tagByte = **ppFields; + + // The last field has the most significant bit of the byte set. + *pfLastField = (tagByte & 0x80) != 0; + + // The remaining 7 bits encode the field type. + OptionalFieldTag eTag = (OptionalFieldTag)(tagByte & 0x7f); + + // Advance the pointer past the header. + (*ppFields)++; + + return eTag; +} + +// Reads a field value (or the basis for an out-of-line record delta) starting from the first byte after the +// field header. Advances the field location to the start of the next field. +UInt32 OptionalFields::DecodeFieldValue(PTR_UInt8 * ppFields) +{ + // VarInt is used to encode the field value (and updates the field pointer in doing so). + return VarInt::ReadUnsigned(*ppFields); +} + +/*static*/ UInt32 OptionalFields::GetInlineField(OptionalFieldTag eTag, UInt32 uiDefaultValue) +{ + // Point at start of encoding stream. + PTR_UInt8 pFields = dac_cast(this); + + for (;;) + { + // Read field tag, an indication of whether this is the last field and the field value (we always read + // the value, even if the tag is not a match because decoding the value advances the field pointer to + // the next field). + bool fLastField; + OptionalFieldTag eCurrentTag = DecodeFieldTag(&pFields, &fLastField); + UInt32 uiCurrentValue = DecodeFieldValue(&pFields); + + // If we found a tag match return the current value. + if (eCurrentTag == eTag) + return uiCurrentValue; + + // If this was the last field we're done as well. + if (fLastField) + break; + } + + // Reached end of stream without getting a match. Field is not present so return default value. + return uiDefaultValue; +} diff --git a/src/coreclr/src/nativeaot/Runtime/PalRedhawk.h b/src/coreclr/src/nativeaot/Runtime/PalRedhawk.h new file mode 100644 index 0000000000000..7789c05346bfc --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/PalRedhawk.h @@ -0,0 +1,857 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// +// Provides declarations for external resources consumed by Redhawk. This comprises functionality +// normally exported from Win32 libraries such as KERNEL32 and MSVCRT. When hosted on Win32 calls to these +// functions become simple pass throughs to the native implementation via export forwarding entries in a PAL +// (Platform Abstraction Layer) library. On other platforms the PAL library has actual code to emulate the +// functionality of these same APIs. +// +// In order to make it both obvious and intentional where Redhawk consumes an external API, such functions are +// decorated with an 'Pal' prefix. Ideally the associated supporting types, constants etc. would be +// similarly isolated from their concrete Win32 definitions, making the extent of platform dependence within +// the core explicit. For now that is too big a work item and we'll settle for manually restricting the use of +// external header files to within this header. +// + +#include +#include +#include "gcenv.structs.h" + +#ifndef PAL_REDHAWK_INCLUDED +#define PAL_REDHAWK_INCLUDED + +/* Adapted from intrin.h - For compatibility with , some intrinsics are __cdecl except on x64 */ +#if defined (_M_X64) +#define __PN__MACHINECALL_CDECL_OR_DEFAULT +#else +#define __PN__MACHINECALL_CDECL_OR_DEFAULT __cdecl +#endif + +#ifndef _INC_WINDOWS +//#ifndef DACCESS_COMPILE + +// There are some fairly primitive type definitions below but don't pull them into the rest of Redhawk unless +// we have to (in which case these definitions will move to CommonTypes.h). +typedef WCHAR * LPWSTR; +typedef const WCHAR * LPCWSTR; +typedef char * LPSTR; +typedef const char * LPCSTR; +typedef void * HINSTANCE; + +typedef void * LPSECURITY_ATTRIBUTES; +typedef void * LPOVERLAPPED; + +#ifndef __GCENV_BASE_INCLUDED__ +#define CALLBACK __stdcall +#define WINAPI __stdcall +#define WINBASEAPI __declspec(dllimport) +#endif //!__GCENV_BASE_INCLUDED__ + +#ifdef TARGET_UNIX +#define DIRECTORY_SEPARATOR_CHAR '/' +#else // TARGET_UNIX +#define DIRECTORY_SEPARATOR_CHAR '\\' +#endif // TARGET_UNIX + +typedef union _LARGE_INTEGER { + struct { +#if BIGENDIAN + int32_t HighPart; + uint32_t LowPart; +#else + uint32_t LowPart; + int32_t HighPart; +#endif + } u; + int64_t QuadPart; +} LARGE_INTEGER, *PLARGE_INTEGER; + +typedef struct _GUID { + uint32_t Data1; + uint16_t Data2; + uint16_t Data3; + uint8_t Data4[8]; +} GUID; + +#define DECLARE_HANDLE(_name) typedef HANDLE _name + +struct SYSTEM_INFO +{ + union + { + UInt32 dwOemId; + struct { + UInt16 wProcessorArchitecture; + UInt16 wReserved; + } DUMMYSTRUCTNAME; + } DUMMYUNIONNAME; + UInt32 dwPageSize; + void * lpMinimumApplicationAddress; + void * lpMaximumApplicationAddress; + UIntNative dwActiveProcessorMask; + UInt32 dwNumberOfProcessors; + UInt32 dwProcessorType; + UInt32 dwAllocationGranularity; + UInt16 wProcessorLevel; + UInt16 wProcessorRevision; +}; + +// defined in gcrhenv.cpp +bool __SwitchToThread(uint32_t dwSleepMSec, uint32_t dwSwitchCount); + +struct FILETIME +{ + UInt32 dwLowDateTime; + UInt32 dwHighDateTime; +}; + +enum MEMORY_RESOURCE_NOTIFICATION_TYPE +{ + LowMemoryResourceNotification, + HighMemoryResourceNotification +}; + +enum LOGICAL_PROCESSOR_RELATIONSHIP +{ + RelationProcessorCore, + RelationNumaNode, + RelationCache, + RelationProcessorPackage +}; + +#define LTP_PC_SMT 0x1 + +enum PROCESSOR_CACHE_TYPE +{ + CacheUnified, + CacheInstruction, + CacheData, + CacheTrace +}; + +struct CACHE_DESCRIPTOR +{ + UInt8 Level; + UInt8 Associativity; + UInt16 LineSize; + UInt32 Size; + PROCESSOR_CACHE_TYPE Type; +}; + +struct SYSTEM_LOGICAL_PROCESSOR_INFORMATION +{ + UIntNative ProcessorMask; + LOGICAL_PROCESSOR_RELATIONSHIP Relationship; + union + { + struct + { + UInt8 Flags; + } ProcessorCore; + struct + { + UInt32 NodeNumber; + } NumaNode; + CACHE_DESCRIPTOR Cache; + UInt64 Reserved[2]; + }; +}; + +#ifdef HOST_AMD64 + +typedef struct DECLSPEC_ALIGN(16) _XSAVE_FORMAT { + UInt16 ControlWord; + UInt16 StatusWord; + UInt8 TagWord; + UInt8 Reserved1; + UInt16 ErrorOpcode; + UInt32 ErrorOffset; + UInt16 ErrorSelector; + UInt16 Reserved2; + UInt32 DataOffset; + UInt16 DataSelector; + UInt16 Reserved3; + UInt32 MxCsr; + UInt32 MxCsr_Mask; + Fp128 FloatRegisters[8]; +#if defined(HOST_64BIT) + Fp128 XmmRegisters[16]; + UInt8 Reserved4[96]; +#else + Fp128 XmmRegisters[8]; + UInt8 Reserved4[220]; + UInt32 Cr0NpxState; +#endif +} XSAVE_FORMAT, *PXSAVE_FORMAT; + + +typedef XSAVE_FORMAT XMM_SAVE_AREA32, *PXMM_SAVE_AREA32; + +typedef struct DECLSPEC_ALIGN(16) _CONTEXT { + UInt64 P1Home; + UInt64 P2Home; + UInt64 P3Home; + UInt64 P4Home; + UInt64 P5Home; + UInt64 P6Home; + UInt32 ContextFlags; + UInt32 MxCsr; + UInt16 SegCs; + UInt16 SegDs; + UInt16 SegEs; + UInt16 SegFs; + UInt16 SegGs; + UInt16 SegSs; + UInt32 EFlags; + UInt64 Dr0; + UInt64 Dr1; + UInt64 Dr2; + UInt64 Dr3; + UInt64 Dr6; + UInt64 Dr7; + UInt64 Rax; + UInt64 Rcx; + UInt64 Rdx; + UInt64 Rbx; + UInt64 Rsp; + UInt64 Rbp; + UInt64 Rsi; + UInt64 Rdi; + UInt64 R8; + UInt64 R9; + UInt64 R10; + UInt64 R11; + UInt64 R12; + UInt64 R13; + UInt64 R14; + UInt64 R15; + UInt64 Rip; + union { + XMM_SAVE_AREA32 FltSave; + struct { + Fp128 Header[2]; + Fp128 Legacy[8]; + Fp128 Xmm0; + Fp128 Xmm1; + Fp128 Xmm2; + Fp128 Xmm3; + Fp128 Xmm4; + Fp128 Xmm5; + Fp128 Xmm6; + Fp128 Xmm7; + Fp128 Xmm8; + Fp128 Xmm9; + Fp128 Xmm10; + Fp128 Xmm11; + Fp128 Xmm12; + Fp128 Xmm13; + Fp128 Xmm14; + Fp128 Xmm15; + } DUMMYSTRUCTNAME; + } DUMMYUNIONNAME; + Fp128 VectorRegister[26]; + UInt64 VectorControl; + UInt64 DebugControl; + UInt64 LastBranchToRip; + UInt64 LastBranchFromRip; + UInt64 LastExceptionToRip; + UInt64 LastExceptionFromRip; + + void SetIp(UIntNative ip) { Rip = ip; } + void SetSp(UIntNative sp) { Rsp = sp; } +#ifdef UNIX_AMD64_ABI + void SetArg0Reg(UIntNative val) { Rdi = val; } + void SetArg1Reg(UIntNative val) { Rsi = val; } +#else // UNIX_AMD64_ABI + void SetArg0Reg(UIntNative val) { Rcx = val; } + void SetArg1Reg(UIntNative val) { Rdx = val; } +#endif // UNIX_AMD64_ABI + UIntNative GetIp() { return Rip; } + UIntNative GetSp() { return Rsp; } +} CONTEXT, *PCONTEXT; +#elif defined(HOST_ARM) + +#define ARM_MAX_BREAKPOINTS 8 +#define ARM_MAX_WATCHPOINTS 1 + +typedef struct DECLSPEC_ALIGN(8) _CONTEXT { + UInt32 ContextFlags; + UInt32 R0; + UInt32 R1; + UInt32 R2; + UInt32 R3; + UInt32 R4; + UInt32 R5; + UInt32 R6; + UInt32 R7; + UInt32 R8; + UInt32 R9; + UInt32 R10; + UInt32 R11; + UInt32 R12; + UInt32 Sp; // R13 + UInt32 Lr; // R14 + UInt32 Pc; // R15 + UInt32 Cpsr; + UInt32 Fpscr; + UInt32 Padding; + union { + Fp128 Q[16]; + UInt64 D[32]; + UInt32 S[32]; + } DUMMYUNIONNAME; + UInt32 Bvr[ARM_MAX_BREAKPOINTS]; + UInt32 Bcr[ARM_MAX_BREAKPOINTS]; + UInt32 Wvr[ARM_MAX_WATCHPOINTS]; + UInt32 Wcr[ARM_MAX_WATCHPOINTS]; + UInt32 Padding2[2]; + + void SetIp(UIntNative ip) { Pc = ip; } + void SetArg0Reg(UIntNative val) { R0 = val; } + void SetArg1Reg(UIntNative val) { R1 = val; } + UIntNative GetIp() { return Pc; } + UIntNative GetLr() { return Lr; } +} CONTEXT, *PCONTEXT; + +#elif defined(HOST_X86) +#define SIZE_OF_80387_REGISTERS 80 +#define MAXIMUM_SUPPORTED_EXTENSION 512 + +typedef struct _FLOATING_SAVE_AREA { + UInt32 ControlWord; + UInt32 StatusWord; + UInt32 TagWord; + UInt32 ErrorOffset; + UInt32 ErrorSelector; + UInt32 DataOffset; + UInt32 DataSelector; + UInt8 RegisterArea[SIZE_OF_80387_REGISTERS]; + UInt32 Cr0NpxState; +} FLOATING_SAVE_AREA; + +#include "pshpack4.h" +typedef struct _CONTEXT { + UInt32 ContextFlags; + UInt32 Dr0; + UInt32 Dr1; + UInt32 Dr2; + UInt32 Dr3; + UInt32 Dr6; + UInt32 Dr7; + FLOATING_SAVE_AREA FloatSave; + UInt32 SegGs; + UInt32 SegFs; + UInt32 SegEs; + UInt32 SegDs; + UInt32 Edi; + UInt32 Esi; + UInt32 Ebx; + UInt32 Edx; + UInt32 Ecx; + UInt32 Eax; + UInt32 Ebp; + UInt32 Eip; + UInt32 SegCs; + UInt32 EFlags; + UInt32 Esp; + UInt32 SegSs; + UInt8 ExtendedRegisters[MAXIMUM_SUPPORTED_EXTENSION]; + + void SetIp(UIntNative ip) { Eip = ip; } + void SetSp(UIntNative sp) { Esp = sp; } + void SetArg0Reg(UIntNative val) { Ecx = val; } + void SetArg1Reg(UIntNative val) { Edx = val; } + UIntNative GetIp() { return Eip; } + UIntNative GetSp() { return Esp; } +} CONTEXT, *PCONTEXT; +#include "poppack.h" + +#elif defined(HOST_ARM64) + +// Specify the number of breakpoints and watchpoints that the OS +// will track. Architecturally, ARM64 supports up to 16. In practice, +// however, almost no one implements more than 4 of each. + +#define ARM64_MAX_BREAKPOINTS 8 +#define ARM64_MAX_WATCHPOINTS 2 + +typedef struct _NEON128 { + UInt64 Low; + Int64 High; +} NEON128, *PNEON128; + +typedef struct DECLSPEC_ALIGN(16) _CONTEXT { + // + // Control flags. + // + UInt32 ContextFlags; + + // + // Integer registers + // + UInt32 Cpsr; // NZVF + DAIF + CurrentEL + SPSel + union { + struct { + UInt64 X0; + UInt64 X1; + UInt64 X2; + UInt64 X3; + UInt64 X4; + UInt64 X5; + UInt64 X6; + UInt64 X7; + UInt64 X8; + UInt64 X9; + UInt64 X10; + UInt64 X11; + UInt64 X12; + UInt64 X13; + UInt64 X14; + UInt64 X15; + UInt64 X16; + UInt64 X17; + UInt64 X18; + UInt64 X19; + UInt64 X20; + UInt64 X21; + UInt64 X22; + UInt64 X23; + UInt64 X24; + UInt64 X25; + UInt64 X26; + UInt64 X27; + UInt64 X28; +#pragma warning(push) +#pragma warning(disable:4201) // nameless struct + }; + UInt64 X[29]; + }; +#pragma warning(pop) + UInt64 Fp; // X29 + UInt64 Lr; // X30 + UInt64 Sp; + UInt64 Pc; + + // + // Floating Point/NEON Registers + // + NEON128 V[32]; + UInt32 Fpcr; + UInt32 Fpsr; + + // + // Debug registers + // + UInt32 Bcr[ARM64_MAX_BREAKPOINTS]; + UInt64 Bvr[ARM64_MAX_BREAKPOINTS]; + UInt32 Wcr[ARM64_MAX_WATCHPOINTS]; + UInt64 Wvr[ARM64_MAX_WATCHPOINTS]; + + void SetIp(UIntNative ip) { Pc = ip; } + void SetArg0Reg(UIntNative val) { X0 = val; } + void SetArg1Reg(UIntNative val) { X1 = val; } + UIntNative GetIp() { return Pc; } + UIntNative GetLr() { return Lr; } +} CONTEXT, *PCONTEXT; + +#elif defined(HOST_WASM) + +typedef struct DECLSPEC_ALIGN(8) _CONTEXT { + // TODO: Figure out if WebAssembly has a meaningful context available + void SetIp(UIntNative ip) { } + void SetArg0Reg(UIntNative val) { } + void SetArg1Reg(UIntNative val) { } + UIntNative GetIp() { return 0; } +} CONTEXT, *PCONTEXT; +#endif + +#define EXCEPTION_MAXIMUM_PARAMETERS 15 // maximum number of exception parameters + +typedef struct _EXCEPTION_RECORD32 { + UInt32 ExceptionCode; + UInt32 ExceptionFlags; + UIntNative ExceptionRecord; + UIntNative ExceptionAddress; + UInt32 NumberParameters; + UIntNative ExceptionInformation[EXCEPTION_MAXIMUM_PARAMETERS]; +} EXCEPTION_RECORD, *PEXCEPTION_RECORD; + +typedef struct _EXCEPTION_POINTERS { + PEXCEPTION_RECORD ExceptionRecord; + PCONTEXT ContextRecord; +} EXCEPTION_POINTERS, *PEXCEPTION_POINTERS; + +typedef Int32 (__stdcall *PVECTORED_EXCEPTION_HANDLER)( + PEXCEPTION_POINTERS ExceptionInfo + ); + +#define EXCEPTION_CONTINUE_EXECUTION (-1) +#define EXCEPTION_CONTINUE_SEARCH (0) +#define EXCEPTION_EXECUTE_HANDLER (1) + +typedef enum _EXCEPTION_DISPOSITION { + ExceptionContinueExecution, + ExceptionContinueSearch, + ExceptionNestedException, + ExceptionCollidedUnwind +} EXCEPTION_DISPOSITION; + +#define STATUS_ACCESS_VIOLATION ((UInt32 )0xC0000005L) +#define STATUS_STACK_OVERFLOW ((UInt32 )0xC00000FDL) +#define STATUS_REDHAWK_NULL_REFERENCE ((UInt32 )0x00000000L) +#define STATUS_REDHAWK_WRITE_BARRIER_NULL_REFERENCE ((UInt32 )0x00000042L) + +#ifdef TARGET_UNIX +#define NULL_AREA_SIZE (4*1024) +#else +#define NULL_AREA_SIZE (64*1024) +#endif + +//#endif // !DACCESS_COMPILE +#endif // !_INC_WINDOWS + + + +#ifndef DACCESS_COMPILE +#ifndef _INC_WINDOWS + +typedef UInt32 (WINAPI *PTHREAD_START_ROUTINE)(_In_opt_ void* lpThreadParameter); +typedef IntNative (WINAPI *FARPROC)(); + +#ifndef __GCENV_BASE_INCLUDED__ +#define TRUE 1 +#define FALSE 0 +#endif // !__GCENV_BASE_INCLUDED__ + +#define INVALID_HANDLE_VALUE ((HANDLE)(IntNative)-1) + +#define DLL_PROCESS_ATTACH 1 +#define DLL_THREAD_ATTACH 2 +#define DLL_THREAD_DETACH 3 +#define DLL_PROCESS_DETACH 0 +#define DLL_PROCESS_VERIFIER 4 + +#define INFINITE 0xFFFFFFFF + +#define DUPLICATE_CLOSE_SOURCE 0x00000001 +#define DUPLICATE_SAME_ACCESS 0x00000002 + +#define GENERIC_READ 0x80000000 +#define GENERIC_WRITE 0x40000000 +#define GENERIC_EXECUTE 0x20000000 +#define GENERIC_ALL 0x10000000 + +#define FILE_SHARE_READ 0x00000001 +#define FILE_SHARE_WRITE 0x00000002 +#define FILE_SHARE_DELETE 0x00000004 + +#define FILE_ATTRIBUTE_READONLY 0x00000001 +#define FILE_ATTRIBUTE_HIDDEN 0x00000002 +#define FILE_ATTRIBUTE_SYSTEM 0x00000004 +#define FILE_ATTRIBUTE_DIRECTORY 0x00000010 +#define FILE_ATTRIBUTE_ARCHIVE 0x00000020 +#define FILE_ATTRIBUTE_DEVICE 0x00000040 +#define FILE_ATTRIBUTE_NORMAL 0x00000080 +#define FILE_ATTRIBUTE_TEMPORARY 0x00000100 +#define FILE_ATTRIBUTE_SPARSE_FILE 0x00000200 +#define FILE_ATTRIBUTE_REPARSE_POINT 0x00000400 +#define FILE_ATTRIBUTE_COMPRESSED 0x00000800 +#define FILE_ATTRIBUTE_OFFLINE 0x00001000 +#define FILE_ATTRIBUTE_NOT_CONTENT_INDEXED 0x00002000 +#define FILE_ATTRIBUTE_ENCRYPTED 0x00004000 + +#define CREATE_NEW 1 +#define CREATE_ALWAYS 2 +#define OPEN_EXISTING 3 +#define OPEN_ALWAYS 4 +#define TRUNCATE_EXISTING 5 + +#define FILE_BEGIN 0 +#define FILE_CURRENT 1 +#define FILE_END 2 + +#define PAGE_NOACCESS 0x01 +#define PAGE_READONLY 0x02 +#define PAGE_READWRITE 0x04 +#define PAGE_WRITECOPY 0x08 +#define PAGE_EXECUTE 0x10 +#define PAGE_EXECUTE_READ 0x20 +#define PAGE_EXECUTE_READWRITE 0x40 +#define PAGE_EXECUTE_WRITECOPY 0x80 +#define PAGE_GUARD 0x100 +#define PAGE_NOCACHE 0x200 +#define PAGE_WRITECOMBINE 0x400 +#define MEM_COMMIT 0x1000 +#define MEM_RESERVE 0x2000 +#define MEM_DECOMMIT 0x4000 +#define MEM_RELEASE 0x8000 +#define MEM_FREE 0x10000 +#define MEM_PRIVATE 0x20000 +#define MEM_MAPPED 0x40000 +#define MEM_RESET 0x80000 +#define MEM_TOP_DOWN 0x100000 +#define MEM_WRITE_WATCH 0x200000 +#define MEM_PHYSICAL 0x400000 +#define MEM_LARGE_PAGES 0x20000000 +#define MEM_4MB_PAGES 0x80000000 + +#define WAIT_OBJECT_0 0 +#define WAIT_TIMEOUT 258 +#define WAIT_FAILED 0xFFFFFFFF + +#define CREATE_SUSPENDED 0x00000004 +#define THREAD_PRIORITY_NORMAL 0 +#define THREAD_PRIORITY_HIGHEST 2 + +#define NOERROR 0x0 + +#define SUSPENDTHREAD_FAILED 0xFFFFFFFF +#define RESUMETHREAD_FAILED 0xFFFFFFFF + +#define ERROR_INSUFFICIENT_BUFFER 122 +#define ERROR_TIMEOUT 1460 +#define ERROR_ALREADY_EXISTS 183 + +#define GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT 0x00000002 +#define GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS 0x00000004 + +#endif // !_INC_WINDOWS +#endif // !DACCESS_COMPILE + +typedef UInt64 REGHANDLE; +typedef UInt64 TRACEHANDLE; + +#ifndef _EVNTPROV_H_ +struct EVENT_DATA_DESCRIPTOR +{ + UInt64 Ptr; + UInt32 Size; + UInt32 Reserved; +}; + +struct EVENT_DESCRIPTOR +{ + UInt16 Id; + UInt8 Version; + UInt8 Channel; + UInt8 Level; + UInt8 Opcode; + UInt16 Task; + UInt64 Keyword; + +}; + +struct EVENT_FILTER_DESCRIPTOR +{ + UInt64 Ptr; + UInt32 Size; + UInt32 Type; +}; + +__forceinline +void +EventDataDescCreate(_Out_ EVENT_DATA_DESCRIPTOR * EventDataDescriptor, _In_opt_ const void * DataPtr, UInt32 DataSize) +{ + EventDataDescriptor->Ptr = (UInt64)DataPtr; + EventDataDescriptor->Size = DataSize; + EventDataDescriptor->Reserved = 0; +} +#endif // _EVNTPROV_H_ + +extern GCSystemInfo g_RhSystemInfo; + +#ifdef TARGET_UNIX +#define REDHAWK_PALIMPORT extern "C" +#define REDHAWK_PALEXPORT extern "C" +#define REDHAWK_PALAPI +#else +#define REDHAWK_PALIMPORT EXTERN_C +#define REDHAWK_PALAPI __stdcall +#endif // TARGET_UNIX + +bool InitializeSystemInfo(); + +#ifndef DACCESS_COMPILE + +#ifdef _DEBUG +#define CaptureStackBackTrace RtlCaptureStackBackTrace +#endif + +#ifndef _INC_WINDOWS +// Include the list of external functions we wish to access. If we do our job 100% then it will be +// possible to link without any direct reference to any Win32 library. +#include "PalRedhawkFunctions.h" +#endif // !_INC_WINDOWS +#endif // !DACCESS_COMPILE + +// The Redhawk PAL must be initialized before any of its exports can be called. Returns true for a successful +// initialization and false on failure. +REDHAWK_PALIMPORT bool REDHAWK_PALAPI PalInit(); + +// Given a mask of capabilities return true if all of them are supported by the current PAL. +REDHAWK_PALIMPORT bool REDHAWK_PALAPI PalHasCapability(PalCapability capability); + +// Given the OS handle of a loaded module, compute the upper and lower virtual address bounds (inclusive). +REDHAWK_PALIMPORT void REDHAWK_PALAPI PalGetModuleBounds(HANDLE hOsHandle, _Out_ UInt8 ** ppLowerBound, _Out_ UInt8 ** ppUpperBound); + +typedef struct _GUID GUID; +REDHAWK_PALIMPORT void REDHAWK_PALAPI PalGetPDBInfo(HANDLE hOsHandle, _Out_ GUID * pGuidSignature, _Out_ UInt32 * pdwAge, _Out_writes_z_(cchPath) WCHAR * wszPath, Int32 cchPath); + +#ifndef APP_LOCAL_RUNTIME +REDHAWK_PALIMPORT bool REDHAWK_PALAPI PalGetThreadContext(HANDLE hThread, _Out_ PAL_LIMITED_CONTEXT * pCtx); +#endif + +REDHAWK_PALIMPORT Int32 REDHAWK_PALAPI PalGetProcessCpuCount(); + +REDHAWK_PALIMPORT UInt32 REDHAWK_PALAPI PalReadFileContents(_In_z_ const TCHAR *, _Out_writes_all_(maxBytesToRead) char * buff, _In_ UInt32 maxBytesToRead); + +// Retrieves the entire range of memory dedicated to the calling thread's stack. This does +// not get the current dynamic bounds of the stack, which can be significantly smaller than +// the maximum bounds. +REDHAWK_PALIMPORT bool REDHAWK_PALAPI PalGetMaximumStackBounds(_Out_ void** ppStackLowOut, _Out_ void** ppStackHighOut); + +// Return value: number of characters in name string +REDHAWK_PALIMPORT Int32 PalGetModuleFileName(_Out_ const TCHAR** pModuleNameOut, HANDLE moduleBase); + +#if _WIN32 + +// Various intrinsic declarations needed for the PalGetCurrentTEB implementation below. +#if defined(HOST_X86) +EXTERN_C unsigned long __readfsdword(unsigned long Offset); +#pragma intrinsic(__readfsdword) +#elif defined(HOST_AMD64) +EXTERN_C unsigned __int64 __readgsqword(unsigned long Offset); +#pragma intrinsic(__readgsqword) +#elif defined(HOST_ARM) +EXTERN_C unsigned int _MoveFromCoprocessor(unsigned int, unsigned int, unsigned int, unsigned int, unsigned int); +#pragma intrinsic(_MoveFromCoprocessor) +#elif defined(HOST_ARM64) +EXTERN_C unsigned __int64 __getReg(int); +#pragma intrinsic(__getReg) +#else +#error Unsupported architecture +#endif + +// Retrieves the OS TEB for the current thread. +inline UInt8 * PalNtCurrentTeb() +{ +#if defined(HOST_X86) + return (UInt8*)__readfsdword(0x18); +#elif defined(HOST_AMD64) + return (UInt8*)__readgsqword(0x30); +#elif defined(HOST_ARM) + return (UInt8*)_MoveFromCoprocessor(15, 0, 13, 0, 2); +#elif defined(HOST_ARM64) + return (UInt8*)__getReg(18); +#else +#error Unsupported architecture +#endif +} + +// Offsets of ThreadLocalStoragePointer in the TEB. +#if defined(HOST_64BIT) +#define OFFSETOF__TEB__ThreadLocalStoragePointer 0x58 +#else +#define OFFSETOF__TEB__ThreadLocalStoragePointer 0x2c +#endif + +#else // _WIN32 + +inline UInt8 * PalNtCurrentTeb() +{ + // UNIXTODO: Implement PalNtCurrentTeb + return NULL; +} + +#define OFFSETOF__TEB__ThreadLocalStoragePointer 0 + +#endif // _WIN32 + +// +// Compiler intrinsic definitions. In the interest of performance the PAL doesn't provide exports of these +// (that would defeat the purpose of having an intrinsic in the first place). Instead we place the necessary +// compiler linkage directly inline in this header. As a result this section may have platform specific +// conditional compilation (upto and including defining an export of functionality that isn't a supported +// intrinsic on that platform). +// + +EXTERN_C void * __cdecl _alloca(size_t); +#pragma intrinsic(_alloca) + +REDHAWK_PALIMPORT _Ret_maybenull_ _Post_writable_byte_size_(size) void* REDHAWK_PALAPI PalVirtualAlloc(_In_opt_ void* pAddress, UIntNative size, UInt32 allocationType, UInt32 protect); +REDHAWK_PALIMPORT UInt32_BOOL REDHAWK_PALAPI PalVirtualFree(_In_ void* pAddress, UIntNative size, UInt32 freeType); +REDHAWK_PALIMPORT UInt32_BOOL REDHAWK_PALAPI PalVirtualProtect(_In_ void* pAddress, UIntNative size, UInt32 protect); +REDHAWK_PALIMPORT void REDHAWK_PALAPI PalSleep(UInt32 milliseconds); +REDHAWK_PALIMPORT UInt32_BOOL REDHAWK_PALAPI PalSwitchToThread(); +REDHAWK_PALIMPORT HANDLE REDHAWK_PALAPI PalCreateEventW(_In_opt_ LPSECURITY_ATTRIBUTES pEventAttributes, UInt32_BOOL manualReset, UInt32_BOOL initialState, _In_opt_z_ LPCWSTR pName); +REDHAWK_PALIMPORT UInt32 REDHAWK_PALAPI PalGetTickCount(); +REDHAWK_PALIMPORT HANDLE REDHAWK_PALAPI PalCreateFileW(_In_z_ LPCWSTR pFileName, uint32_t desiredAccess, uint32_t shareMode, _In_opt_ void* pSecurityAttributes, uint32_t creationDisposition, uint32_t flagsAndAttributes, HANDLE hTemplateFile); +REDHAWK_PALIMPORT HANDLE REDHAWK_PALAPI PalCreateLowMemoryNotification(); +REDHAWK_PALIMPORT void REDHAWK_PALAPI PalTerminateCurrentProcess(UInt32 exitCode); +REDHAWK_PALIMPORT HANDLE REDHAWK_PALAPI PalGetModuleHandleFromPointer(_In_ void* pointer); + +#ifndef APP_LOCAL_RUNTIME + +#ifdef TARGET_UNIX +REDHAWK_PALIMPORT void REDHAWK_PALAPI PalSetHardwareExceptionHandler(PHARDWARE_EXCEPTION_HANDLER handler); +#else +REDHAWK_PALIMPORT void* REDHAWK_PALAPI PalAddVectoredExceptionHandler(UInt32 firstHandler, _In_ PVECTORED_EXCEPTION_HANDLER vectoredHandler); +#endif + +#endif + + +typedef UInt32 (__stdcall *BackgroundCallback)(_In_opt_ void* pCallbackContext); +REDHAWK_PALIMPORT bool REDHAWK_PALAPI PalStartBackgroundGCThread(_In_ BackgroundCallback callback, _In_opt_ void* pCallbackContext); +REDHAWK_PALIMPORT bool REDHAWK_PALAPI PalStartFinalizerThread(_In_ BackgroundCallback callback, _In_opt_ void* pCallbackContext); + +typedef UInt32_BOOL (*PalHijackCallback)(HANDLE hThread, _In_ PAL_LIMITED_CONTEXT* pThreadContext, _In_opt_ void* pCallbackContext); +REDHAWK_PALIMPORT UInt32 REDHAWK_PALAPI PalHijack(HANDLE hThread, _In_ PalHijackCallback callback, _In_opt_ void* pCallbackContext); + +#ifdef FEATURE_ETW +REDHAWK_PALIMPORT bool REDHAWK_PALAPI PalEventEnabled(REGHANDLE regHandle, _In_ const EVENT_DESCRIPTOR* eventDescriptor); +#endif + +REDHAWK_PALIMPORT _Ret_maybenull_ void* REDHAWK_PALAPI PalSetWerDataBuffer(_In_ void* pNewBuffer); + +REDHAWK_PALIMPORT UInt32_BOOL REDHAWK_PALAPI PalAllocateThunksFromTemplate(_In_ HANDLE hTemplateModule, UInt32 templateRva, size_t templateSize, _Outptr_result_bytebuffer_(templateSize) void** newThunksOut); +REDHAWK_PALIMPORT UInt32_BOOL REDHAWK_PALAPI PalFreeThunksFromTemplate(_In_ void *pBaseAddress); + +REDHAWK_PALIMPORT UInt32_BOOL REDHAWK_PALAPI PalMarkThunksAsValidCallTargets( + void *virtualAddress, + int thunkSize, + int thunksPerBlock, + int thunkBlockSize, + int thunkBlocksPerMapping); + +REDHAWK_PALIMPORT UInt32 REDHAWK_PALAPI PalCompatibleWaitAny(UInt32_BOOL alertable, UInt32 timeout, UInt32 count, HANDLE* pHandles, UInt32_BOOL allowReentrantWait); + +REDHAWK_PALIMPORT void REDHAWK_PALAPI PalAttachThread(void* thread); +REDHAWK_PALIMPORT bool REDHAWK_PALAPI PalDetachThread(void* thread); + +REDHAWK_PALIMPORT UInt64 PalGetCurrentThreadIdForLogging(); + +REDHAWK_PALIMPORT void PalPrintFatalError(const char* message); + +#ifdef TARGET_UNIX +REDHAWK_PALIMPORT Int32 __cdecl _stricmp(const char *string1, const char *string2); +#endif // TARGET_UNIX + +#ifdef UNICODE +#define _tcsicmp _wcsicmp +#else +#define _tcsicmp _stricmp +#endif + +#if defined(HOST_X86) || defined(HOST_AMD64) +REDHAWK_PALIMPORT uint32_t REDHAWK_PALAPI getcpuid(uint32_t arg1, unsigned char result[16]); +REDHAWK_PALIMPORT uint32_t REDHAWK_PALAPI getextcpuid(uint32_t arg1, uint32_t arg2, unsigned char result[16]); +REDHAWK_PALIMPORT uint32_t REDHAWK_PALAPI xmmYmmStateSupport(); +REDHAWK_PALIMPORT bool REDHAWK_PALAPI PalIsAvxEnabled(); +#endif // defined(HOST_X86) || defined(HOST_AMD64) + +#include "PalRedhawkInline.h" + +#endif // !PAL_REDHAWK_INCLUDED diff --git a/src/coreclr/src/nativeaot/Runtime/PalRedhawkCommon.h b/src/coreclr/src/nativeaot/Runtime/PalRedhawkCommon.h new file mode 100644 index 0000000000000..d1933865af2a8 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/PalRedhawkCommon.h @@ -0,0 +1,172 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// +// Provide common definitions between the Redhawk and the Redhawk PAL implementation. This header file is used +// (rather than PalRedhawk.h) since the PAL implementation is built in a different environment than Redhawk +// code. For instance both environments may provide a definition of various common macros such as NULL. +// +// This header contains only environment neutral definitions (i.e. using only base C++ types and compositions +// of those types) and can thus be included from either environment without issue. +// + +#ifndef __PAL_REDHAWK_COMMON_INCLUDED +#define __PAL_REDHAWK_COMMON_INCLUDED + +#include "rhassert.h" + +// We define the notion of capabilities: optional functionality that the PAL may expose. Use +// PalHasCapability() with the constants below to determine what is supported at runtime. +enum PalCapability +{ + WriteWatchCapability = 0x00000001, // GetWriteWatch() and friends + LowMemoryNotificationCapability = 0x00000002, // CreateMemoryResourceNotification() and friends +}; + +#ifndef DECLSPEC_ALIGN +#ifdef _MSC_VER +#define DECLSPEC_ALIGN(x) __declspec(align(x)) +#else +#define DECLSPEC_ALIGN(x) __attribute__((aligned(x))) +#endif +#endif // DECLSPEC_ALIGN + +#ifdef HOST_AMD64 +#define AMD64_ALIGN_16 DECLSPEC_ALIGN(16) +#else // HOST_AMD64 +#define AMD64_ALIGN_16 +#endif // HOST_AMD64 + +struct AMD64_ALIGN_16 Fp128 { + UInt64 Low; + Int64 High; +}; + + +struct PAL_LIMITED_CONTEXT +{ + // Includes special registers, callee saved registers and general purpose registers used to return values from functions (not floating point return registers) +#ifdef TARGET_ARM + UIntNative R0; + UIntNative R4; + UIntNative R5; + UIntNative R6; + UIntNative R7; + UIntNative R8; + UIntNative R9; + UIntNative R10; + UIntNative R11; + + UIntNative IP; + UIntNative SP; + UIntNative LR; + + UInt64 D[16-8]; // D8 .. D15 registers (D16 .. D31 are volatile according to the ABI spec) + + UIntNative GetIp() const { return IP; } + UIntNative GetSp() const { return SP; } + UIntNative GetFp() const { return R7; } + UIntNative GetLr() const { return LR; } + void SetIp(UIntNative ip) { IP = ip; } + void SetSp(UIntNative sp) { SP = sp; } +#elif defined(TARGET_ARM64) + UIntNative FP; + UIntNative LR; + + UIntNative X0; + UIntNative X1; + UIntNative X19; + UIntNative X20; + UIntNative X21; + UIntNative X22; + UIntNative X23; + UIntNative X24; + UIntNative X25; + UIntNative X26; + UIntNative X27; + UIntNative X28; + + UIntNative SP; + UIntNative IP; + + UInt64 D[16 - 8]; // Only the bottom 64-bit value of the V registers V8..V15 needs to be preserved + // (V0-V7 and V16-V31 are not preserved according to the ABI spec). + + + UIntNative GetIp() const { return IP; } + UIntNative GetSp() const { return SP; } + UIntNative GetFp() const { return FP; } + UIntNative GetLr() const { return LR; } + void SetIp(UIntNative ip) { IP = ip; } + void SetSp(UIntNative sp) { SP = sp; } +#elif defined(UNIX_AMD64_ABI) + // Param regs: rdi, rsi, rdx, rcx, r8, r9, scratch: rax, rdx (both return val), preserved: rbp, rbx, r12-r15 + UIntNative IP; + UIntNative Rsp; + UIntNative Rbp; + UIntNative Rax; + UIntNative Rbx; + UIntNative Rdx; + UIntNative R12; + UIntNative R13; + UIntNative R14; + UIntNative R15; + + UIntNative GetIp() const { return IP; } + UIntNative GetSp() const { return Rsp; } + void SetIp(UIntNative ip) { IP = ip; } + void SetSp(UIntNative sp) { Rsp = sp; } + UIntNative GetFp() const { return Rbp; } +#elif defined(TARGET_X86) || defined(TARGET_AMD64) + UIntNative IP; + UIntNative Rsp; + UIntNative Rbp; + UIntNative Rdi; + UIntNative Rsi; + UIntNative Rax; + UIntNative Rbx; +#ifdef TARGET_AMD64 + UIntNative R12; + UIntNative R13; + UIntNative R14; + UIntNative R15; + UIntNative __explicit_padding__; + Fp128 Xmm6; + Fp128 Xmm7; + Fp128 Xmm8; + Fp128 Xmm9; + Fp128 Xmm10; + Fp128 Xmm11; + Fp128 Xmm12; + Fp128 Xmm13; + Fp128 Xmm14; + Fp128 Xmm15; +#endif // TARGET_AMD64 + + UIntNative GetIp() const { return IP; } + UIntNative GetSp() const { return Rsp; } + UIntNative GetFp() const { return Rbp; } + void SetIp(UIntNative ip) { IP = ip; } + void SetSp(UIntNative sp) { Rsp = sp; } +#else // TARGET_ARM + UIntNative IP; + + UIntNative GetIp() const { PORTABILITY_ASSERT("GetIp"); return 0; } + UIntNative GetSp() const { PORTABILITY_ASSERT("GetSp"); return 0; } + UIntNative GetFp() const { PORTABILITY_ASSERT("GetFp"); return 0; } + void SetIp(UIntNative ip) { PORTABILITY_ASSERT("SetIp"); } + void SetSp(UIntNative sp) { PORTABILITY_ASSERT("GetSp"); } +#endif // TARGET_ARM +}; + +void RuntimeThreadShutdown(void* thread); + +#ifdef TARGET_UNIX +typedef void (__fastcall * ThreadExitCallback)(); + +extern ThreadExitCallback g_threadExitCallback; + +typedef Int32 (*PHARDWARE_EXCEPTION_HANDLER)(UIntNative faultCode, UIntNative faultAddress, PAL_LIMITED_CONTEXT* palContext, UIntNative* arg0Reg, UIntNative* arg1Reg); +#endif + +#endif // __PAL_REDHAWK_COMMON_INCLUDED diff --git a/src/coreclr/src/nativeaot/Runtime/PalRedhawkFunctions.h b/src/coreclr/src/nativeaot/Runtime/PalRedhawkFunctions.h new file mode 100644 index 0000000000000..bf12d74f5e3fc --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/PalRedhawkFunctions.h @@ -0,0 +1,186 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +extern "C" UInt16 __stdcall CaptureStackBackTrace(UInt32, UInt32, void*, UInt32*); +inline UInt16 PalCaptureStackBackTrace(UInt32 arg1, UInt32 arg2, void* arg3, UInt32* arg4) +{ + return CaptureStackBackTrace(arg1, arg2, arg3, arg4); +} + +extern "C" UInt32_BOOL __stdcall CloseHandle(HANDLE); +inline UInt32_BOOL PalCloseHandle(HANDLE arg1) +{ + return CloseHandle(arg1); +} + +extern "C" UInt32_BOOL __stdcall CreateDirectoryW(LPCWSTR, LPSECURITY_ATTRIBUTES); +inline UInt32_BOOL PalCreateDirectoryW(LPCWSTR arg1, LPSECURITY_ATTRIBUTES arg2) +{ + return CreateDirectoryW(arg1, arg2); +} + +extern "C" void __stdcall DeleteCriticalSection(CRITICAL_SECTION *); +inline void PalDeleteCriticalSection(CRITICAL_SECTION * arg1) +{ + DeleteCriticalSection(arg1); +} + +extern "C" UInt32_BOOL __stdcall DuplicateHandle(HANDLE, HANDLE, HANDLE, HANDLE *, UInt32, UInt32_BOOL, UInt32); +inline UInt32_BOOL PalDuplicateHandle(HANDLE arg1, HANDLE arg2, HANDLE arg3, HANDLE * arg4, UInt32 arg5, UInt32_BOOL arg6, UInt32 arg7) +{ + return DuplicateHandle(arg1, arg2, arg3, arg4, arg5, arg6, arg7); +} + +extern "C" void __stdcall EnterCriticalSection(CRITICAL_SECTION *); +inline void PalEnterCriticalSection(CRITICAL_SECTION * arg1) +{ + EnterCriticalSection(arg1); +} + +extern "C" UInt32 __stdcall EventRegister(const GUID *, void *, void *, REGHANDLE *); +inline UInt32 PalEventRegister(const GUID * arg1, void * arg2, void * arg3, REGHANDLE * arg4) +{ + return EventRegister(arg1, arg2, arg3, arg4); +} + +extern "C" UInt32 __stdcall EventUnregister(REGHANDLE); +inline UInt32 PalEventUnregister(REGHANDLE arg1) +{ + return EventUnregister(arg1); +} + +extern "C" UInt32 __stdcall EventWrite(REGHANDLE, const EVENT_DESCRIPTOR *, UInt32, EVENT_DATA_DESCRIPTOR *); +inline UInt32 PalEventWrite(REGHANDLE arg1, const EVENT_DESCRIPTOR * arg2, UInt32 arg3, EVENT_DATA_DESCRIPTOR * arg4) +{ + return EventWrite(arg1, arg2, arg3, arg4); +} + +extern "C" void __stdcall FlushProcessWriteBuffers(); +inline void PalFlushProcessWriteBuffers() +{ + FlushProcessWriteBuffers(); +} + +extern "C" HANDLE __stdcall GetCurrentProcess(); +inline HANDLE PalGetCurrentProcess() +{ + return GetCurrentProcess(); +} + +extern "C" UInt32 __stdcall GetCurrentProcessId(); +inline UInt32 PalGetCurrentProcessId() +{ + return GetCurrentProcessId(); +} + +extern "C" HANDLE __stdcall GetCurrentThread(); +inline HANDLE PalGetCurrentThread() +{ + return GetCurrentThread(); +} + +#ifdef UNICODE +extern "C" UInt32 __stdcall GetEnvironmentVariableW(__in_z_opt LPCWSTR, __out_z_opt LPWSTR, UInt32); +inline UInt32 PalGetEnvironmentVariable(__in_z_opt LPCWSTR arg1, __out_z_opt LPWSTR arg2, UInt32 arg3) +{ + return GetEnvironmentVariableW(arg1, arg2, arg3); +} +#else +extern "C" UInt32 __stdcall GetEnvironmentVariableA(__in_z_opt LPCSTR, __out_z_opt LPSTR, UInt32); +inline UInt32 PalGetEnvironmentVariable(__in_z_opt LPCSTR arg1, __out_z_opt LPSTR arg2, UInt32 arg3) +{ + return GetEnvironmentVariableA(arg1, arg2, arg3); +} +#endif + +extern "C" void * __stdcall GetProcAddress(HANDLE, const char *); +inline void * PalGetProcAddress(HANDLE arg1, const char * arg2) +{ + return GetProcAddress(arg1, arg2); +} + +extern "C" UInt32_BOOL __stdcall InitializeCriticalSectionEx(CRITICAL_SECTION *, UInt32, UInt32); +inline UInt32_BOOL PalInitializeCriticalSectionEx(CRITICAL_SECTION * arg1, UInt32 arg2, UInt32 arg3) +{ + return InitializeCriticalSectionEx(arg1, arg2, arg3); +} + +extern "C" UInt32_BOOL __stdcall IsDebuggerPresent(); +inline UInt32_BOOL PalIsDebuggerPresent() +{ + return IsDebuggerPresent(); +} + +extern "C" void __stdcall LeaveCriticalSection(CRITICAL_SECTION *); +inline void PalLeaveCriticalSection(CRITICAL_SECTION * arg1) +{ + LeaveCriticalSection(arg1); +} + +extern "C" HANDLE __stdcall LoadLibraryExW(const WCHAR *, HANDLE, UInt32); +inline HANDLE PalLoadLibraryExW(const WCHAR * arg1, HANDLE arg2, UInt32 arg3) +{ + return LoadLibraryExW(arg1, arg2, arg3); +} + +extern "C" UInt32_BOOL __stdcall QueryPerformanceCounter(LARGE_INTEGER *); +inline UInt32_BOOL PalQueryPerformanceCounter(LARGE_INTEGER * arg1) +{ + return QueryPerformanceCounter(arg1); +} + +extern "C" UInt32_BOOL __stdcall QueryPerformanceFrequency(LARGE_INTEGER *); +inline UInt32_BOOL PalQueryPerformanceFrequency(LARGE_INTEGER * arg1) +{ + return QueryPerformanceFrequency(arg1); +} + +extern "C" void __stdcall RaiseException(UInt32, UInt32, UInt32, const UInt32 *); +inline void PalRaiseException(UInt32 arg1, UInt32 arg2, UInt32 arg3, const UInt32 * arg4) +{ + RaiseException(arg1, arg2, arg3, arg4); +} + +extern "C" UInt32_BOOL __stdcall ReleaseMutex(HANDLE); +inline UInt32_BOOL PalReleaseMutex(HANDLE arg1) +{ + return ReleaseMutex(arg1); +} + +extern "C" UInt32_BOOL __stdcall ResetEvent(HANDLE); +inline UInt32_BOOL PalResetEvent(HANDLE arg1) +{ + return ResetEvent(arg1); +} + +extern "C" UInt32_BOOL __stdcall SetEvent(HANDLE); +inline UInt32_BOOL PalSetEvent(HANDLE arg1) +{ + return SetEvent(arg1); +} + +extern "C" void __stdcall TerminateProcess(HANDLE, UInt32); +inline void PalTerminateProcess(HANDLE arg1, UInt32 arg2) +{ + TerminateProcess(arg1, arg2); +} + +extern "C" UInt32 __stdcall WaitForSingleObjectEx(HANDLE, UInt32, UInt32_BOOL); +inline UInt32 PalWaitForSingleObjectEx(HANDLE arg1, UInt32 arg2, UInt32_BOOL arg3) +{ + return WaitForSingleObjectEx(arg1, arg2, arg3); +} + +#ifdef PAL_REDHAWK_INCLUDED +extern "C" void __stdcall GetSystemTimeAsFileTime(FILETIME *); +inline void PalGetSystemTimeAsFileTime(FILETIME * arg1) +{ + GetSystemTimeAsFileTime(arg1); +} + +extern "C" void __stdcall RaiseFailFastException(PEXCEPTION_RECORD, PCONTEXT, UInt32); +inline void PalRaiseFailFastException(PEXCEPTION_RECORD arg1, PCONTEXT arg2, UInt32 arg3) +{ + RaiseFailFastException(arg1, arg2, arg3); +} +#endif diff --git a/src/coreclr/src/nativeaot/Runtime/RWLock.cpp b/src/coreclr/src/nativeaot/Runtime/RWLock.cpp new file mode 100644 index 0000000000000..a678c5cedad4c --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/RWLock.cpp @@ -0,0 +1,267 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// +// RWLock.cpp -- adapted from CLR SimpleRWLock.cpp +// +#include "common.h" +#include "CommonTypes.h" +#include "CommonMacros.h" +#include "daccess.h" +#include "PalRedhawkCommon.h" +#include "PalRedhawk.h" +#include "rhassert.h" +#include "slist.h" +#include "gcrhinterface.h" +#include "varint.h" +#include "regdisplay.h" +#include "StackFrameIterator.h" +#include "thread.h" +#include "holder.h" +#include "Crst.h" +#include "event.h" +#include "RWLock.h" +#include "threadstore.h" +#include "threadstore.inl" +#include "RuntimeInstance.h" +#include "yieldprocessornormalized.h" + +// Configurable constants used across our spin locks +// Initialization here is necessary so that we have meaningful values before the runtime is started +// These initial values were selected to match the defaults, but anything reasonable is close enough +struct SpinConstants +{ + UInt32 uInitialDuration; + UInt32 uMaximumDuration; + UInt32 uBackoffFactor; + UInt32 uRepetitions; +} g_SpinConstants = { + 50, // dwInitialDuration + 40000, // dwMaximumDuration - ideally (20000 * max(2, numProc)) + 3, // dwBackoffFactor + 10 // dwRepetitions +}; + +ReaderWriterLock::ReadHolder::ReadHolder(ReaderWriterLock * pLock, bool fAcquireLock) : + m_pLock(pLock) +{ +#ifndef DACCESS_COMPILE + m_fLockAcquired = fAcquireLock; + if (fAcquireLock) + m_pLock->AcquireReadLock(); +#else + UNREFERENCED_PARAMETER(fAcquireLock); +#endif // !DACCESS_COMPILE +} + +ReaderWriterLock::ReadHolder::~ReadHolder() +{ +#ifndef DACCESS_COMPILE + if (m_fLockAcquired) + m_pLock->ReleaseReadLock(); +#endif // !DACCESS_COMPILE +} + +ReaderWriterLock::WriteHolder::WriteHolder(ReaderWriterLock * pLock, bool fAcquireLock) : + m_pLock(pLock) +{ +#ifndef DACCESS_COMPILE + m_fLockAcquired = fAcquireLock; + if (fAcquireLock) + m_pLock->AcquireWriteLock(); +#else + UNREFERENCED_PARAMETER(fAcquireLock); +#endif // !DACCESS_COMPILE +} + +ReaderWriterLock::WriteHolder::~WriteHolder() +{ +#ifndef DACCESS_COMPILE + if (m_fLockAcquired) + m_pLock->ReleaseWriteLock(); +#endif // !DACCESS_COMPILE +} + +ReaderWriterLock::ReaderWriterLock(bool fBlockOnGc) : + m_RWLock(0) +#if 0 + , m_WriterWaiting(false) +#endif +{ + m_spinCount = ( +#ifndef DACCESS_COMPILE + (PalGetProcessCpuCount() == 1) ? 0 : +#endif + 4000); + m_fBlockOnGc = fBlockOnGc; +} + + +#ifndef DACCESS_COMPILE + +// Attempt to take the read lock, but do not wait if a writer has the lock. +// Release the lock if successfully acquired. Returns true if the lock was +// taken and released. Returns false if a writer had the lock. +// +// BEWARE: Because this method returns after releasing the lock, you can't +// infer the state of the lock based on the return value. This is currently +// only used to detect if a suspended thread owns the write lock to prevent +// deadlock with the Hijack logic during GC suspension. +// +bool ReaderWriterLock::DangerousTryPulseReadLock() +{ + if (TryAcquireReadLock()) + { + ReleaseReadLock(); + return true; + } + return false; +} + +bool ReaderWriterLock::TryAcquireReadLock() +{ + Int32 RWLock; + + do + { + RWLock = m_RWLock; + if (RWLock == -1) + return false; + ASSERT(RWLock >= 0); + } + while (RWLock != PalInterlockedCompareExchange(&m_RWLock, RWLock+1, RWLock)); + + return true; +} + +void ReaderWriterLock::AcquireReadLock() +{ + if (TryAcquireReadLock()) + return; + + AcquireReadLockWorker(); +} + +void ReaderWriterLock::AcquireReadLockWorker() +{ + UInt32 uSwitchCount = 0; + + for (;;) + { +#if 0 + // @TODO: Validate that we never re-enter the reader lock from a thread that + // already holds it. This scenario will deadlock if there are outstanding + // writers. + + // prevent writers from being starved. This assumes that writers are rare and + // dont hold the lock for a long time. + while (m_WriterWaiting) + { + Int32 spinCount = m_spinCount; + while (spinCount > 0) { + spinCount--; + PalYieldProcessor(); + } + __SwitchToThread(0, ++uSwitchCount); + } +#endif + + if (TryAcquireReadLock()) + return; + + UInt32 uDelay = g_SpinConstants.uInitialDuration; + do + { + if (TryAcquireReadLock()) + return; + + if (g_RhSystemInfo.dwNumberOfProcessors <= 1) + break; + + // Delay by approximately 2*i clock cycles (Pentium III). + YieldProcessorNormalizedForPreSkylakeCount(uDelay); + + // exponential backoff: wait a factor longer in the next iteration + uDelay *= g_SpinConstants.uBackoffFactor; + } + while (uDelay < g_SpinConstants.uMaximumDuration); + + __SwitchToThread(0, ++uSwitchCount); + } +} + +void ReaderWriterLock::ReleaseReadLock() +{ + Int32 RWLock; + RWLock = PalInterlockedDecrement(&m_RWLock); + ASSERT(RWLock >= 0); +} + + +bool ReaderWriterLock::TryAcquireWriteLock() +{ + Int32 RWLock = PalInterlockedCompareExchange(&m_RWLock, -1, 0); + + ASSERT(RWLock >= 0 || RWLock == -1); + + if (RWLock) + return false; + +#if 0 + m_WriterWaiting = false; +#endif + + return true; +} + +void ReaderWriterLock::AcquireWriteLock() +{ + UInt32 uSwitchCount = 0; + + for (;;) + { + if (TryAcquireWriteLock()) + return; + +#if 0 + // Set the writer waiting word, if not already set, to notify potential readers to wait. + m_WriterWaiting = true; +#endif + + UInt32 uDelay = g_SpinConstants.uInitialDuration; + do + { + if (TryAcquireWriteLock()) + return; + + // Do not spin if GC is in progress because the lock will not + // be released until GC is finished. + if (m_fBlockOnGc && ThreadStore::IsTrapThreadsRequested()) + { + RedhawkGCInterface::WaitForGCCompletion(); + } + + if (g_RhSystemInfo.dwNumberOfProcessors <= 1) + { + break; + } + + // Delay by approximately 2*i clock cycles (Pentium III). + YieldProcessorNormalizedForPreSkylakeCount(uDelay); + + // exponential backoff: wait a factor longer in the next iteration + uDelay *= g_SpinConstants.uBackoffFactor; + } + while (uDelay < g_SpinConstants.uMaximumDuration); + + __SwitchToThread(0, ++uSwitchCount); + } +} + +void ReaderWriterLock::ReleaseWriteLock() +{ + Int32 RWLock; + RWLock = PalInterlockedExchange(&m_RWLock, 0); + ASSERT(RWLock == -1); +} +#endif // DACCESS_COMPILE diff --git a/src/coreclr/src/nativeaot/Runtime/RWLock.h b/src/coreclr/src/nativeaot/Runtime/RWLock.h new file mode 100644 index 0000000000000..79459a86cf525 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/RWLock.h @@ -0,0 +1,58 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#ifndef __RWLock_h__ +#define __RWLock_h__ + +class ReaderWriterLock +{ + volatile Int32 m_RWLock; // lock used for R/W synchronization + Int32 m_spinCount; // spin count for a reader waiting for a writer to release the lock + bool m_fBlockOnGc; // True if the spinning writers should block when GC is in progress + + +#if 0 + // used to prevent writers from being starved by readers + // we currently do not prevent writers from starving readers since writers + // are supposed to be rare. + bool m_WriterWaiting; +#endif + + bool TryAcquireReadLock(); + bool TryAcquireWriteLock(); + +public: + class ReadHolder + { + ReaderWriterLock * m_pLock; + bool m_fLockAcquired; + public: + ReadHolder(ReaderWriterLock * pLock, bool fAcquireLock = true); + ~ReadHolder(); + }; + + class WriteHolder + { + ReaderWriterLock * m_pLock; + bool m_fLockAcquired; + public: + WriteHolder(ReaderWriterLock * pLock, bool fAcquireLock = true); + ~WriteHolder(); + }; + + ReaderWriterLock(bool fBlockOnGc = false); + + void AcquireReadLock(); + void ReleaseReadLock(); + + bool DangerousTryPulseReadLock(); + +protected: + void AcquireWriteLock(); + void ReleaseWriteLock(); + + void AcquireReadLockWorker(); + +}; + +#endif // __RWLock_h__ diff --git a/src/coreclr/src/nativeaot/Runtime/Range.h b/src/coreclr/src/nativeaot/Runtime/Range.h new file mode 100644 index 0000000000000..a728c587c1641 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/Range.h @@ -0,0 +1,137 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +#pragma once + +namespace rh { namespace util +{ + //--------------------------------------------------------------------------------------------- + // Represents value range [a,b), and provides various convenience methods. + + template + class Range + { + typedef Range THIS_T; + + public: + //----------------------------------------------------------------------------------------- + // Construction + + Range() + : m_start(0), + m_end(0) + {} + + Range(Range const & range) + : m_start(range.m_start), + m_end(range.m_end) + {} + + static Range CreateWithEndpoint(VALUE_TYPE start, + VALUE_TYPE end) + { return Range(start, end); } + + static Range CreateWithLength(VALUE_TYPE start, LENGTH_TYPE len) + { return Range(start, start + len); } + + //----------------------------------------------------------------------------------------- + // Operations + + THIS_T& operator=(THIS_T const & range) + { m_start = range.m_start; m_end = range.m_end; return *this; } + + bool Equals(THIS_T const & range) const + { return GetStart() == range.GetStart() && GetEnd() == range.GetEnd(); } + + bool operator==(THIS_T const & range) const + { return Equals(range); } + + bool operator!=(THIS_T const & range) const + { return !Equals(range); } + + VALUE_TYPE GetStart() const + { return m_start; } + + VALUE_TYPE GetEnd() const + { return m_end; } + + LENGTH_TYPE GetLength() const + { return m_end - m_start; } + + bool IntersectsWith(THIS_T const &range) const + { return range.GetStart() < GetEnd() && range.GetEnd() > GetStart(); } + + bool IntersectsWith(VALUE_TYPE start, + VALUE_TYPE end) const + { return IntersectsWith(THIS_T(start, end)); } + + bool Contains(THIS_T const &range) const + { return GetStart() <= range.GetStart() && range.GetEnd() <= GetEnd(); } + + bool IsAdjacentTo(THIS_T const &range) const + { return GetEnd() == range.GetStart() || range.GetEnd() == GetStart(); } + + protected: + Range(VALUE_TYPE start, VALUE_TYPE end) + : m_start(start), + m_end(end) + { ASSERT(start <= end); } + + VALUE_TYPE m_start; + VALUE_TYPE m_end; + }; + + //--------------------------------------------------------------------------------------------- + // Represents address range [a,b), and provides various convenience methods. + + class MemRange : public Range + { + typedef Range BASE_T; + + public: + //----------------------------------------------------------------------------------------- + // Construction + + MemRange() + : BASE_T() + {} + + MemRange(void* pvMemStart, + UIntNative cbMemLen) + : BASE_T(reinterpret_cast(pvMemStart), reinterpret_cast(pvMemStart) + cbMemLen) + {} + + MemRange(void* pvMemStart, + void* pvMemEnd) + : BASE_T(reinterpret_cast(pvMemStart), reinterpret_cast(pvMemEnd)) + {} + + MemRange(MemRange const & range) + : BASE_T(range) + { } + + //----------------------------------------------------------------------------------------- + // Operations + + MemRange& operator=(MemRange const & range) + { BASE_T::operator=(range); return *this; } + + UIntNative GetPageCount() const + { + UInt8 *pCurPage = ALIGN_DOWN(GetStart(), OS_PAGE_SIZE); + UInt8 *pEndPage = ALIGN_UP(GetEnd(), OS_PAGE_SIZE); + return (pEndPage - pCurPage) / OS_PAGE_SIZE; + } + + UInt8* GetStartPage() const + { return ALIGN_DOWN(GetStart(), OS_PAGE_SIZE); } + + // The page immediately following the last page contained by this range. + UInt8* GetEndPage() const + { return ALIGN_UP(GetEnd(), OS_PAGE_SIZE); } + + MemRange GetPageRange() const + { return MemRange(GetStartPage(), GetEndPage()); } + }; +}// namespace util +}// namespace rh + diff --git a/src/coreclr/src/nativeaot/Runtime/RedhawkWarnings.h b/src/coreclr/src/nativeaot/Runtime/RedhawkWarnings.h new file mode 100644 index 0000000000000..e3cc1118b5d8d --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/RedhawkWarnings.h @@ -0,0 +1,8 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// +// Disable some commonly ignored warnings +// + +MSVC_DISABLE_WARNING(4200) // nonstandard extension used : zero-sized array in struct/union diff --git a/src/coreclr/src/nativeaot/Runtime/RestrictedCallouts.cpp b/src/coreclr/src/nativeaot/Runtime/RestrictedCallouts.cpp new file mode 100644 index 0000000000000..b7a8ce134b679 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/RestrictedCallouts.cpp @@ -0,0 +1,248 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// +// Callouts from the unmanaged portion of the runtime to C# helpers made during garbage collections. See +// RestrictedCallouts.h for more detail. +// + +#include "common.h" +#include "CommonTypes.h" +#include "CommonMacros.h" +#include "daccess.h" +#include "PalRedhawkCommon.h" +#include "PalRedhawk.h" +#include "rhassert.h" +#include "slist.h" +#include "holder.h" +#include "gcrhinterface.h" +#include "shash.h" +#include "RWLock.h" +#include "rhbinder.h" +#include "Crst.h" +#include "RuntimeInstance.h" +#include "eetype.h" +#include "ObjectLayout.h" +#include "event.h" +#include "varint.h" +#include "regdisplay.h" +#include "StackFrameIterator.h" +#include "thread.h" +#include "threadstore.h" +#include "threadstore.inl" +#include "RestrictedCallouts.h" + +// The head of the chains of GC callouts, one per callout type. +RestrictedCallouts::GcRestrictedCallout * RestrictedCallouts::s_rgGcRestrictedCallouts[GCRC_Count] = { 0 }; + +// The head of the chain of HandleTable callouts. +RestrictedCallouts::HandleTableRestrictedCallout * RestrictedCallouts::s_pHandleTableRestrictedCallouts = NULL; + +// Lock protecting access to s_rgGcRestrictedCallouts and s_pHandleTableRestrictedCallouts during registration +// and unregistration (not used during actual callbacks since everything is single threaded then). +CrstStatic RestrictedCallouts::s_sLock; + +// One time startup initialization. +bool RestrictedCallouts::Initialize() +{ + s_sLock.Init(CrstRestrictedCallouts, CRST_DEFAULT); + + return true; +} + +// Register callback of the given type to the method with the given address. The most recently registered +// callbacks are called first. Returns true on success, false if insufficient memory was available for the +// registration. +bool RestrictedCallouts::RegisterGcCallout(GcRestrictedCalloutKind eKind, void * pCalloutMethod) +{ + // Validate callout kind. + if (eKind >= GCRC_Count) + { + ASSERT_UNCONDITIONALLY("Invalid GC restricted callout kind."); + RhFailFast(); + } + + GcRestrictedCallout * pCallout = new (nothrow) GcRestrictedCallout(); + if (pCallout == NULL) + return false; + + pCallout->m_pCalloutMethod = pCalloutMethod; + + CrstHolder lh(&s_sLock); + + // Link new callout to head of the chain according to its type. + pCallout->m_pNext = s_rgGcRestrictedCallouts[eKind]; + s_rgGcRestrictedCallouts[eKind] = pCallout; + + return true; +} + +// Unregister a previously registered callout. Removes the first registration that matches on both callout +// kind and address. Causes a fail fast if the registration doesn't exist. +void RestrictedCallouts::UnregisterGcCallout(GcRestrictedCalloutKind eKind, void * pCalloutMethod) +{ + // Validate callout kind. + if (eKind >= GCRC_Count) + { + ASSERT_UNCONDITIONALLY("Invalid GC restricted callout kind."); + RhFailFast(); + } + + CrstHolder lh(&s_sLock); + + GcRestrictedCallout * pCurrCallout = s_rgGcRestrictedCallouts[eKind]; + GcRestrictedCallout * pPrevCallout = NULL; + + while (pCurrCallout) + { + if (pCurrCallout->m_pCalloutMethod == pCalloutMethod) + { + // Found a matching entry, remove it from the chain. + if (pPrevCallout) + pPrevCallout->m_pNext = pCurrCallout->m_pNext; + else + s_rgGcRestrictedCallouts[eKind] = pCurrCallout->m_pNext; + + delete pCurrCallout; + + return; + } + + pPrevCallout = pCurrCallout; + pCurrCallout = pCurrCallout->m_pNext; + } + + // If we get here we didn't find a matching registration, indicating a bug on the part of the caller. + ASSERT_UNCONDITIONALLY("Attempted to unregister restricted callout that wasn't registered."); + RhFailFast(); +} + +// Register callback for the "is alive" property of ref counted handles with objects of the given type (the +// type match must be exact). The most recently registered callbacks are called first. Returns true on +// success, false if insufficient memory was available for the registration. +bool RestrictedCallouts::RegisterRefCountedHandleCallback(void * pCalloutMethod, EEType * pTypeFilter) +{ + HandleTableRestrictedCallout * pCallout = new (nothrow) HandleTableRestrictedCallout(); + if (pCallout == NULL) + return false; + + pCallout->m_pCalloutMethod = pCalloutMethod; + pCallout->m_pTypeFilter = pTypeFilter; + + CrstHolder lh(&s_sLock); + + // Link new callout to head of the chain. + pCallout->m_pNext = s_pHandleTableRestrictedCallouts; + s_pHandleTableRestrictedCallouts = pCallout; + + return true; +} + +// Unregister a previously registered callout. Removes the first registration that matches on both callout +// address and filter type. Causes a fail fast if the registration doesn't exist. +void RestrictedCallouts::UnregisterRefCountedHandleCallback(void * pCalloutMethod, EEType * pTypeFilter) +{ + CrstHolder lh(&s_sLock); + + HandleTableRestrictedCallout * pCurrCallout = s_pHandleTableRestrictedCallouts; + HandleTableRestrictedCallout * pPrevCallout = NULL; + + while (pCurrCallout) + { + if ((pCurrCallout->m_pCalloutMethod == pCalloutMethod) && + (pCurrCallout->m_pTypeFilter == pTypeFilter)) + { + // Found a matching entry, remove it from the chain. + if (pPrevCallout) + pPrevCallout->m_pNext = pCurrCallout->m_pNext; + else + s_pHandleTableRestrictedCallouts = pCurrCallout->m_pNext; + + delete pCurrCallout; + + return; + } + + pPrevCallout = pCurrCallout; + pCurrCallout = pCurrCallout->m_pNext; + } + + // If we get here we didn't find a matching registration, indicating a bug on the part of the caller. + ASSERT_UNCONDITIONALLY("Attempted to unregister restricted callout that wasn't registered."); + RhFailFast(); +} + +// Invoke all the registered GC callouts of the given kind. The condemned generation of the current collection +// is passed along to the callouts. +void RestrictedCallouts::InvokeGcCallouts(GcRestrictedCalloutKind eKind, UInt32 uiCondemnedGeneration) +{ + ASSERT(eKind < GCRC_Count); + + // It is illegal for any of the callouts to trigger a GC. + Thread * pThread = ThreadStore::GetCurrentThread(); + pThread->SetDoNotTriggerGc(); + + // Due to the above we have better suppress GC stress. + bool fGcStressWasSuppressed = pThread->IsSuppressGcStressSet(); + if (!fGcStressWasSuppressed) + pThread->SetSuppressGcStress(); + + GcRestrictedCallout * pCurrCallout = s_rgGcRestrictedCallouts[eKind]; + while (pCurrCallout) + { + // Make the callout. + ((GcRestrictedCallbackFunction)pCurrCallout->m_pCalloutMethod)(uiCondemnedGeneration); + + pCurrCallout = pCurrCallout->m_pNext; + } + + // Revert GC stress mode if we changed it. + if (!fGcStressWasSuppressed) + pThread->ClearSuppressGcStress(); + + pThread->ClearDoNotTriggerGc(); +} + +// Invoke all the registered ref counted handle callouts for the given object extracted from the handle. The +// result is the union of the results for all the handlers that matched the object type (i.e. if one of them +// returned true the overall result is true otherwise false is returned (which includes the case where no +// handlers matched)). Since there should be no other side-effects of the callout, the invocations cease as +// soon as a handler returns true. +bool RestrictedCallouts::InvokeRefCountedHandleCallbacks(Object * pObject) +{ + bool fResult = false; + + // It is illegal for any of the callouts to trigger a GC. + Thread * pThread = ThreadStore::GetCurrentThread(); + pThread->SetDoNotTriggerGc(); + + // Due to the above we have better suppress GC stress. + bool fGcStressWasSuppressed = pThread->IsSuppressGcStressSet(); + if (!fGcStressWasSuppressed) + pThread->SetSuppressGcStress(); + + HandleTableRestrictedCallout * pCurrCallout = s_pHandleTableRestrictedCallouts; + while (pCurrCallout) + { + if (pObject->get_SafeEEType() == pCurrCallout->m_pTypeFilter) + { + // Make the callout. Return true to our caller as soon as we see a true result here. + if (((HandleTableRestrictedCallbackFunction)pCurrCallout->m_pCalloutMethod)(pObject)) + { + fResult = true; + goto Done; + } + } + + pCurrCallout = pCurrCallout->m_pNext; + } + + Done: + // Revert GC stress mode if we changed it. + if (!fGcStressWasSuppressed) + pThread->ClearSuppressGcStress(); + + pThread->ClearDoNotTriggerGc(); + + return fResult; +} diff --git a/src/coreclr/src/nativeaot/Runtime/RestrictedCallouts.h b/src/coreclr/src/nativeaot/Runtime/RestrictedCallouts.h new file mode 100644 index 0000000000000..384e2fcd9c8a5 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/RestrictedCallouts.h @@ -0,0 +1,102 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// +// Restricted callouts refer to calls to classlib defined code written in C# made from the runtime during a +// garbage collection. As such these C# methods are constrained in what they can do and must be written very +// carefully. The most obvious restriction is that they cannot trigger a GC (by attempting to allocate memory +// for example) since that would lead to an immediate deadlock. +// +// Known constraints: +// * No triggering of GCs (new, boxing value types, foreach over a type that allocates for its IEnumerator, +// calling GC.Collect etc.). +// * No exceptions can leak out of the callout. +// * No blocking (or expensive) operations that could starve the GC or potentially lead to deadlocks. +// * No use of runtime facilities that check whether a GC is in progress, these will deadlock. The big +// example we know about so far is making a p/invoke call. +// * For the AfterMarkPhase callout special attention must be paid to avoid any action that reads the EEType* +// from an object header (e.g. casting). At this point the GC may have mark bits set in the the pointer. +// + +class EEType; + +// Enum for the various GC callouts available. The values and their meanings are a contract with the classlib +// so be careful altering these. +enum GcRestrictedCalloutKind +{ + GCRC_StartCollection = 0, // Collection is about to begin + GCRC_EndCollection = 1, // Collection has completed + GCRC_AfterMarkPhase = 2, // All live objects are marked (not including ready for finalization + // objects), no handles have been cleared + GCRC_Count // Maximum number of callout types +}; + +class RestrictedCallouts +{ +public: + // One time startup initialization. + static bool Initialize(); + + // Register callback of the given type to the method with the given address. The most recently registered + // callbacks are called first. Returns true on success, false if insufficient memory was available for the + // registration. + static bool RegisterGcCallout(GcRestrictedCalloutKind eKind, void * pCalloutMethod); + + // Unregister a previously registered callout. Removes the first registration that matches on both callout + // kind and address. Causes a fail fast if the registration doesn't exist. + static void UnregisterGcCallout(GcRestrictedCalloutKind eKind, void * pCalloutMethod); + + // Register callback for the "is alive" property of ref counted handles with objects of the given type + // (the type match must be exact). The most recently registered callbacks are called first. Returns true + // on success, false if insufficient memory was available for the registration. + static bool RegisterRefCountedHandleCallback(void * pCalloutMethod, EEType * pTypeFilter); + + // Unregister a previously registered callout. Removes the first registration that matches on both callout + // address and filter type. Causes a fail fast if the registration doesn't exist. + static void UnregisterRefCountedHandleCallback(void * pCalloutMethod, EEType * pTypeFilter); + + // Invoke all the registered GC callouts of the given kind. The condemned generation of the current + // collection is passed along to the callouts. + static void InvokeGcCallouts(GcRestrictedCalloutKind eKind, UInt32 uiCondemnedGeneration); + + // Invoke all the registered ref counted handle callouts for the given object extracted from the handle. + // The result is the union of the results for all the handlers that matched the object type (i.e. if one + // of them returned true the overall result is true otherwise false is returned (which includes the case + // where no handlers matched)). Since there should be no other side-effects of the callout, the + // invocations cease as soon as a handler returns true. + static bool InvokeRefCountedHandleCallbacks(Object * pObject); + +private: + // Context struct used to record which GC callbacks are registered to be made (we allow multiple + // registrations). + struct GcRestrictedCallout + { + GcRestrictedCallout * m_pNext; // Next callout to make or NULL + void * m_pCalloutMethod; // Address of code to call + }; + + // The head of the chains of GC callouts, one per callout type. + static GcRestrictedCallout * s_rgGcRestrictedCallouts[GCRC_Count]; + + // The handle table only has one callout type, for ref-counted handles. But it allows the client to + // specify a type filter: i.e. only handles with an object of the exact type specified will have the + // callout invoked. + struct HandleTableRestrictedCallout + { + HandleTableRestrictedCallout * m_pNext; // Next callout to make or NULL + void * m_pCalloutMethod; // Address of code to call + EEType * m_pTypeFilter; // Type of object for which callout will be made + }; + + // The head of the chain of HandleTable callouts. + static HandleTableRestrictedCallout * s_pHandleTableRestrictedCallouts; + + // Lock protecting access to s_rgGcRestrictedCallouts and s_pHandleTableRestrictedCallouts during + // registration and unregistration (not used during actual callbacks since everything is single threaded + // then). + static CrstStatic s_sLock; + + // Prototypes for the callouts. + typedef void (__fastcall * GcRestrictedCallbackFunction)(UInt32 uiCondemnedGeneration); + typedef Boolean (__fastcall * HandleTableRestrictedCallbackFunction)(Object * pObject); +}; diff --git a/src/coreclr/src/nativeaot/Runtime/RhConfig.cpp b/src/coreclr/src/nativeaot/Runtime/RhConfig.cpp new file mode 100644 index 0000000000000..47398b7b6d926 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/RhConfig.cpp @@ -0,0 +1,421 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +#include "common.h" +#ifndef DACCESS_COMPILE +#include "CommonTypes.h" +#include "daccess.h" +#include "CommonMacros.h" +#include "PalRedhawkCommon.h" +#include "PalRedhawk.h" +#include "rhassert.h" +#include "slist.h" +#include "gcrhinterface.h" +#include "varint.h" +#include "regdisplay.h" +#include "StackFrameIterator.h" +#include "thread.h" +#include "holder.h" +#include "Crst.h" +#include "event.h" +#include "RWLock.h" +#include "threadstore.h" +#include "RuntimeInstance.h" +#include "shash.h" +#include "RhConfig.h" + +#include + +UInt32 RhConfig::ReadConfigValue(_In_z_ const TCHAR *wszName, UInt32 uiDefaultValue) +{ + TCHAR wszBuffer[CONFIG_VAL_MAXLEN + 1]; // 8 hex digits plus a nul terminator. + const UInt32 cchBuffer = sizeof(wszBuffer) / sizeof(wszBuffer[0]); + + UInt32 cchResult = 0; + +#ifdef FEATURE_ENVIRONMENT_VARIABLE_CONFIG + cchResult = PalGetEnvironmentVariable(wszName, wszBuffer, cchBuffer); +#endif // FEATURE_ENVIRONMENT_VARIABLE_CONFIG + + //if the config key wasn't found in the environment + if ((cchResult == 0) || (cchResult >= cchBuffer)) + cchResult = GetIniVariable(wszName, wszBuffer, cchBuffer); + +#ifdef FEATURE_EMBEDDED_CONFIG + // if the config key wasn't found in the ini file + if ((cchResult == 0) || (cchResult >= cchBuffer)) + cchResult = GetEmbeddedVariable(wszName, wszBuffer, cchBuffer); +#endif // FEATURE_EMBEDDED_CONFIG + + if ((cchResult == 0) || (cchResult >= cchBuffer)) + return uiDefaultValue; // not found, return default + + UInt32 uiResult = 0; + + for (UInt32 i = 0; i < cchResult; i++) + { + uiResult <<= 4; + + TCHAR ch = wszBuffer[i]; + if ((ch >= _T('0')) && (ch <= _T('9'))) + uiResult += ch - _T('0'); + else if ((ch >= _T('a')) && (ch <= _T('f'))) + uiResult += (ch - _T('a')) + 10; + else if ((ch >= _T('A')) && (ch <= _T('F'))) + uiResult += (ch - _T('A')) + 10; + else + return uiDefaultValue; // parse error, return default + } + + return uiResult; +} + +//reads a config value from rhconfig.ini into outputBuffer buffer returning the length of the value. +//lazily reads the file so if the file is not yet read, it will read it on first called +//if the file is not avaliable, or unreadable zero will always be returned +//cchOutputBuffer is the maximum number of characters to write to outputBuffer +//cchOutputBuffer must be a size >= CONFIG_VAL_MAXLEN + 1 +UInt32 RhConfig::GetIniVariable(_In_z_ const TCHAR* configName, _Out_writes_all_(cchOutputBuffer) TCHAR* outputBuffer, _In_ UInt32 cchOutputBuffer) +{ + //the buffer needs to be big enough to read the value buffer + null terminator + if (cchOutputBuffer < CONFIG_VAL_MAXLEN + 1) + { + return 0; + } + + //if we haven't read the config yet try to read + if (g_iniSettings == NULL) + { + ReadConfigIni(); + } + + //if the config wasn't read or reading failed return 0 immediately + if (g_iniSettings == CONFIG_INI_NOT_AVAIL) + { + return 0; + } + + return GetConfigVariable(configName, (ConfigPair*)g_iniSettings, outputBuffer, cchOutputBuffer); +} + +#ifdef FEATURE_EMBEDDED_CONFIG +UInt32 RhConfig::GetEmbeddedVariable(_In_z_ const TCHAR* configName, _Out_writes_all_(cchOutputBuffer) TCHAR* outputBuffer, _In_ UInt32 cchOutputBuffer) +{ + //the buffer needs to be big enough to read the value buffer + null terminator + if (cchOutputBuffer < CONFIG_VAL_MAXLEN + 1) + { + return 0; + } + + //if we haven't read the config yet try to read + if (g_embeddedSettings == NULL) + { + ReadEmbeddedSettings(); + } + + //if the config wasn't read or reading failed return 0 immediately + if (g_embeddedSettings == CONFIG_INI_NOT_AVAIL) + { + return 0; + } + + return GetConfigVariable(configName, (ConfigPair*)g_embeddedSettings, outputBuffer, cchOutputBuffer); +} +#endif // FEATURE_EMBEDDED_CONFIG + +UInt32 RhConfig::GetConfigVariable(_In_z_ const TCHAR* configName, const ConfigPair* configPairs, _Out_writes_all_(cchOutputBuffer) TCHAR* outputBuffer, _In_ UInt32 cchOutputBuffer) +{ + //find the first name which matches (case insensitive to be compat with environment variable counterpart) + for (int iSettings = 0; iSettings < RCV_Count; iSettings++) + { + if (_tcsicmp(configName, configPairs[iSettings].Key) == 0) + { + bool nullTerm = FALSE; + + UInt32 iValue; + + for (iValue = 0; (iValue < CONFIG_VAL_MAXLEN + 1) && (iValue < (Int32)cchOutputBuffer); iValue++) + { + outputBuffer[iValue] = configPairs[iSettings].Value[iValue]; + + if (outputBuffer[iValue] == '\0') + { + nullTerm = true; + break; + } + } + + //return the length of the config value if null terminated else return zero + return nullTerm ? iValue : 0; + } + } + + //if the config key was not found return 0 + return 0; +} + +//reads the configuration values from rhconfig.ini and updates g_iniSettings +//if the file is read succesfully and g_iniSettings will be set to a valid ConfigPair[] of length RCV_Count. +//if the file does not exist or reading the file fails, g_iniSettings is set to CONFIG_INI_NOT_AVAIL +//NOTE: all return paths must set g_iniSettings +void RhConfig::ReadConfigIni() +{ + if (g_iniSettings == NULL) + { + TCHAR* configPath = GetConfigPath(); + + //if we couldn't determine the path to the config set g_iniSettings to CONGIF_NOT_AVAIL + if (configPath == NULL) + { + //only set if another thread hasn't initialized the buffer yet, otherwise ignore and let the first setter win + PalInterlockedCompareExchangePointer(&g_iniSettings, CONFIG_INI_NOT_AVAIL, NULL); + + return; + } + + //buffer is max file size + 1 for null terminator if needed + char buff[CONFIG_FILE_MAXLEN + 1]; + + //if the file read failed or the file is bigger than the specified buffer this will return zero + UInt32 fSize = PalReadFileContents(configPath, buff, CONFIG_FILE_MAXLEN); + + //ensure the buffer is null terminated + buff[fSize] = '\0'; + + //delete the configPath + delete[] configPath; + + //if reading the file contents failed set g_iniSettings to CONFIG_INI_NOT_AVAIL + if (fSize == 0) + { + //only set if another thread hasn't initialized the buffer yet, otherwise ignore and let the first setter win + PalInterlockedCompareExchangePointer(&g_iniSettings, CONFIG_INI_NOT_AVAIL, NULL); + + return; + } + + ConfigPair* iniBuff = new (nothrow) ConfigPair[RCV_Count]; + if (iniBuff == NULL) + { + //only set if another thread hasn't initialized the buffer yet, otherwise ignore and let the first setter win + PalInterlockedCompareExchangePointer(&g_iniSettings, CONFIG_INI_NOT_AVAIL, NULL); + + return; + } + + UInt32 iBuff = 0; + UInt32 iIniBuff = 0; + char* currLine; + + //while we haven't reached the max number of config pairs, or the end of the file, read the next line + while (iIniBuff < RCV_Count && iBuff < fSize) + { + //'trim' the leading whitespace + while (priv_isspace(buff[iBuff]) && (iBuff < fSize)) + iBuff++; + + currLine = &buff[iBuff]; + + //find the end of the line + while ((buff[iBuff] != '\n') && (buff[iBuff] != '\r') && (iBuff < fSize)) + iBuff++; + + //null terminate the line + buff[iBuff] = '\0'; + + //parse the line + //only increment iIniBuff if the parsing succeeded otherwise reuse the config struct + if (ParseConfigLine(&iniBuff[iIniBuff], currLine)) + { + iIniBuff++; + } + + //advance to the next line; + iBuff++; + } + + //initialize the remaining config pairs to "\0" + while (iIniBuff < RCV_Count) + { + iniBuff[iIniBuff].Key[0] = '\0'; + iniBuff[iIniBuff].Value[0] = '\0'; + iIniBuff++; + } + + //if another thread initialized first let the first setter win + //delete the iniBuff to avoid leaking memory + if (PalInterlockedCompareExchangePointer(&g_iniSettings, iniBuff, NULL) != NULL) + { + delete[] iniBuff; + } + } + + return; +} + +#ifdef FEATURE_EMBEDDED_CONFIG +struct CompilerEmbeddedSettingsBlob +{ + UInt32 Size; + char Data[1]; +}; + +extern "C" CompilerEmbeddedSettingsBlob g_compilerEmbeddedSettingsBlob; + +void RhConfig::ReadEmbeddedSettings() +{ + if (g_embeddedSettings == NULL) + { + //if reading the file contents failed set g_embeddedSettings to CONFIG_INI_NOT_AVAIL + if (g_compilerEmbeddedSettingsBlob.Size == 0) + { + //only set if another thread hasn't initialized the buffer yet, otherwise ignore and let the first setter win + PalInterlockedCompareExchangePointer(&g_embeddedSettings, CONFIG_INI_NOT_AVAIL, NULL); + + return; + } + + ConfigPair* iniBuff = new (nothrow) ConfigPair[RCV_Count]; + if (iniBuff == NULL) + { + //only set if another thread hasn't initialized the buffer yet, otherwise ignore and let the first setter win + PalInterlockedCompareExchangePointer(&g_embeddedSettings, CONFIG_INI_NOT_AVAIL, NULL); + + return; + } + + UInt32 iBuff = 0; + UInt32 iIniBuff = 0; + char* currLine; + + //while we haven't reached the max number of config pairs, or the end of the file, read the next line + while (iIniBuff < RCV_Count && iBuff < g_compilerEmbeddedSettingsBlob.Size) + { + currLine = &g_compilerEmbeddedSettingsBlob.Data[iBuff]; + + //find the end of the line + while ((g_compilerEmbeddedSettingsBlob.Data[iBuff] != '\0') && (iBuff < g_compilerEmbeddedSettingsBlob.Size)) + iBuff++; + + //parse the line + //only increment iIniBuff if the parsing succeeded otherwise reuse the config struct + if (ParseConfigLine(&iniBuff[iIniBuff], currLine)) + { + iIniBuff++; + } + + //advance to the next line; + iBuff++; + } + + //initialize the remaining config pairs to "\0" + while (iIniBuff < RCV_Count) + { + iniBuff[iIniBuff].Key[0] = '\0'; + iniBuff[iIniBuff].Value[0] = '\0'; + iIniBuff++; + } + + //if another thread initialized first let the first setter win + //delete the iniBuff to avoid leaking memory + if (PalInterlockedCompareExchangePointer(&g_embeddedSettings, iniBuff, NULL) != NULL) + { + delete[] iniBuff; + } + } + + return; +} +#endif // FEATURE_EMBEDDED_CONFIG + +//returns the path to the runtime configuration ini +_Ret_maybenull_z_ TCHAR* RhConfig::GetConfigPath() +{ + const TCHAR* exePathBuff; + + //get the path to rhconfig.ini, this file is expected to live along side the app + //to build the path get the process executable module full path strip off the file name and + //append rhconfig.ini + Int32 pathLen = PalGetModuleFileName(&exePathBuff, NULL); + + if (pathLen <= 0) + { + return NULL; + } + UInt32 iLastDirSeparator = 0; + + for (UInt32 iPath = pathLen - 1; iPath > 0; iPath--) + { + if (exePathBuff[iPath] == DIRECTORY_SEPARATOR_CHAR) + { + iLastDirSeparator = iPath; + break; + } + } + + if (iLastDirSeparator == 0) + { + return NULL; + } + + TCHAR* configPath = new (nothrow) TCHAR[iLastDirSeparator + 1 + wcslen(CONFIG_INI_FILENAME) + 1]; + if (configPath != NULL) + { + //copy the path base and file name + for (UInt32 i = 0; i <= iLastDirSeparator; i++) + { + configPath[i] = exePathBuff[i]; + } + + for (UInt32 i = 0; i <= wcslen(CONFIG_INI_FILENAME); i++) + { + configPath[i + iLastDirSeparator + 1] = CONFIG_INI_FILENAME[i]; + } + } + + return configPath; +} + +//Parses one line of rhconfig.ini and populates values in the passed in configPair +//returns: true if the parsing was successful, false if the parsing failed. +//NOTE: if the method fails configPair is left in an unitialized state +bool RhConfig::ParseConfigLine(_Out_ ConfigPair* configPair, _In_z_ const char * line) +{ + UInt32 iLine = 0; + UInt32 iKey = 0; + UInt32 iVal = 0; + + //while we haven't reached the end of the key signalled by '=', or the end of the line, or the key maxlen + while (line[iLine] != '=' && line[iLine] != '\0' && iKey < CONFIG_KEY_MAXLEN) + { + configPair->Key[iKey++] = line[iLine++]; + } + + //if the current char is not '=' we reached the key maxlen, or the line ended return false + if (line[iLine] != '=') + { + return FALSE; + } + + configPair->Key[iKey] = '\0'; + + //increment to start of the value + iLine++; + + //while we haven't reached the end of the line, or val maxlen + while (line[iLine] != '\0' && iVal < CONFIG_VAL_MAXLEN) + { + configPair->Value[iVal++] = line[iLine++]; + } + + //if the current char is not '\0' we didn't reach the end of the line return false + if (line[iLine] != '\0') + { + return FALSE; + } + + configPair->Value[iVal] = '\0'; + + return TRUE; +} + +#endif diff --git a/src/coreclr/src/nativeaot/Runtime/RhConfig.h b/src/coreclr/src/nativeaot/Runtime/RhConfig.h new file mode 100644 index 0000000000000..b6a50eb9b56cd --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/RhConfig.h @@ -0,0 +1,151 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// +// Provides simple configuration support through environment variables. Each variable is lazily inspected on +// first query and the resulting value cached for future use. To keep things simple we support reading only +// 32-bit hex quantities and a zero value is considered equivalent to the environment variable not being +// defined. We can get more sophisticated if needs be, but the hope is that very few configuration values are +// exposed in this manner. +// +// Values can also be configured through an rhconfig.ini file. The file must be and ASCII text file, must be +// placed next to the executing assembly, and be named rhconfig.ini. The file consists of one config entry per line +// in the format: = +// example: +// RH_HeapVerify=1 +// RH_BreakOnAssert=1 +// + + +#ifndef DACCESS_COMPILE + +#if defined(_DEBUG) || !defined(APP_LOCAL_RUNTIME) +#define FEATURE_ENVIRONMENT_VARIABLE_CONFIG +#endif + +class RhConfig +{ + +#define CONFIG_INI_FILENAME L"rhconfig.ini" +#define CONFIG_INI_NOT_AVAIL (void*)0x1 //signal for ini file failed to load +#define CONFIG_KEY_MAXLEN 50 //arbitrary max length of config keys increase if needed +#define CONFIG_VAL_MAXLEN 8 //32 bit uint in hex + +private: + struct ConfigPair + { + public: + TCHAR Key[CONFIG_KEY_MAXLEN + 1]; //maxlen + null terminator + TCHAR Value[CONFIG_VAL_MAXLEN + 1]; //maxlen + null terminator + }; + + //g_iniSettings is a buffer of ConfigPair structs which when initialized is of length RCV_Count + //the first N settings which are set in rhconfig.ini will be initialized and the remainder with have + //empty string "\0" as a Key and Value + // + //if the buffer has not been initialized (ie the ini file has not been read) the value will be NULL + //if we already attempted to initialize the file and could not find or read the contents the + //value will be CONFIG_INI_NOT_AVAIL to distinguish from the unitialized buffer. + // + //NOTE: g_iniSettings is only set in ReadConfigIni and must be set atomically only once + // using PalInterlockedCompareExchangePointer to avoid races when initializing +private: + void* volatile g_iniSettings = NULL; + +#ifdef FEATURE_EMBEDDED_CONFIG + // g_embeddedSettings works similarly to g_iniSettings, except the source of the data + // is a data blob generated by the compiler and embedded into the executable. + void* volatile g_embeddedSettings = NULL; +#endif // FEATURE_EMBEDDED_CONFIG + +public: + +#define DEFINE_VALUE_ACCESSOR(_name, defaultVal) \ + UInt32 Get##_name() \ + { \ + if (m_uiConfigValuesRead & (1 << RCV_##_name)) \ + return m_uiConfigValues[RCV_##_name]; \ + UInt32 uiValue = ReadConfigValue(_T("RH_") _T(#_name), defaultVal); \ + m_uiConfigValues[RCV_##_name] = uiValue; \ + m_uiConfigValuesRead |= 1 << RCV_##_name; \ + return uiValue; \ + } + + +#ifdef _DEBUG +#define DEBUG_CONFIG_VALUE(_name) DEFINE_VALUE_ACCESSOR(_name, 0) +#define DEBUG_CONFIG_VALUE_WITH_DEFAULT(_name, defaultVal) DEFINE_VALUE_ACCESSOR(_name, defaultVal) +#else +#define DEBUG_CONFIG_VALUE(_name) +#define DEBUG_CONFIG_VALUE_WITH_DEFAULT(_name, defaultVal) +#endif +#define RETAIL_CONFIG_VALUE(_name) DEFINE_VALUE_ACCESSOR(_name, 0) +#define RETAIL_CONFIG_VALUE_WITH_DEFAULT(_name, defaultVal) DEFINE_VALUE_ACCESSOR(_name, defaultVal) +#include "RhConfigValues.h" +#undef DEBUG_CONFIG_VALUE +#undef RETAIL_CONFIG_VALUE +#undef DEBUG_CONFIG_VALUE_WITH_DEFAULT +#undef RETAIL_CONFIG_VALUE_WITH_DEFAULT + +private: + + UInt32 ReadConfigValue(_In_z_ const TCHAR *wszName, UInt32 uiDefault); + + enum RhConfigValue + { +#define DEBUG_CONFIG_VALUE(_name) RCV_##_name, +#define RETAIL_CONFIG_VALUE(_name) RCV_##_name, +#define DEBUG_CONFIG_VALUE_WITH_DEFAULT(_name, defaultVal) RCV_##_name, +#define RETAIL_CONFIG_VALUE_WITH_DEFAULT(_name, defaultVal) RCV_##_name, +#include "RhConfigValues.h" +#undef DEBUG_CONFIG_VALUE +#undef RETAIL_CONFIG_VALUE +#undef DEBUG_CONFIG_VALUE_WITH_DEFAULT +#undef RETAIL_CONFIG_VALUE_WITH_DEFAULT + RCV_Count + }; + +//accomidate for the maximum number of config values plus sizable buffer for whitespace 2K +#define CONFIG_FILE_MAXLEN RCV_Count * sizeof(ConfigPair) + 2000 + +private: + _Ret_maybenull_z_ TCHAR* GetConfigPath(); + + //Parses one line of rhconfig.ini and populates values in the passed in configPair + //returns: true if the parsing was successful, false if the parsing failed. + //NOTE: if the method fails configPair is left in an unitialized state + bool ParseConfigLine(_Out_ ConfigPair* configPair, _In_z_ const char * line); + + //reads the configuration values from rhconfig.ini and updates g_iniSettings + //if the file is read succesfully and g_iniSettings will be set to a valid ConfigPair[] of length RCV_Count. + //if the file does not exist or reading the file fails, g_iniSettings is set to CONFIG_INI_NOT_AVAIL + //NOTE: all return paths must set g_iniSettings + void ReadConfigIni(); + + //reads a config value from rhconfig.ini into outputBuffer buffer returning the length of the value. + //lazily reads the file so if the file is not yet read, it will read it on first called + //if the file is not avaliable, or unreadable zero will always be returned + //cchOutputBuffer is the maximum number of characters to write to outputBuffer + UInt32 GetIniVariable(_In_z_ const TCHAR* configName, _Out_writes_all_(cchOutputBuffer) TCHAR* outputBuffer, _In_ UInt32 cchOutputBuffer); + +#ifdef FEATURE_EMBEDDED_CONFIG + void ReadEmbeddedSettings(); + + UInt32 GetEmbeddedVariable(_In_z_ const TCHAR* configName, _Out_writes_all_(cchOutputBuffer) TCHAR* outputBuffer, _In_ UInt32 cchOutputBuffer); +#endif // FEATURE_EMBEDDED_CONFIG + + UInt32 GetConfigVariable(_In_z_ const TCHAR* configName, const ConfigPair* configPairs, _Out_writes_all_(cchOutputBuffer) TCHAR* outputBuffer, _In_ UInt32 cchOutputBuffer); + + static bool priv_isspace(char c) + { + return (c == ' ') || (c == '\t') || (c == '\n') || (c == '\r'); + } + + + UInt32 m_uiConfigValuesRead; + UInt32 m_uiConfigValues[RCV_Count]; +}; + +extern RhConfig * g_pRhConfig; + +#endif //!DACCESS_COMPILE diff --git a/src/coreclr/src/nativeaot/Runtime/RhConfigValues.h b/src/coreclr/src/nativeaot/Runtime/RhConfigValues.h new file mode 100644 index 0000000000000..1c68fe5963def --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/RhConfigValues.h @@ -0,0 +1,26 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// +// Definitions of each configuration value used by the RhConfig class. +// +// Each variable is lazily inspected on first query and the resulting value cached for future use. To keep +// things simple we support reading only 32-bit hex quantities and a zero value is considered equivalent to +// the environment variable not being defined. We can get more sophisticated if needs be, but the hope is that +// very few configuration values are exposed in this manner. +// + +// By default, print assert to console and break in the debugger, if attached. Set to 0 for a pop-up dialog on assert. +DEBUG_CONFIG_VALUE_WITH_DEFAULT(BreakOnAssert, 1) + +RETAIL_CONFIG_VALUE(HeapVerify) +RETAIL_CONFIG_VALUE(StressLogLevel) +RETAIL_CONFIG_VALUE(TotalStressLogSize) +RETAIL_CONFIG_VALUE(DisableBGC) +RETAIL_CONFIG_VALUE(UseServerGC) +DEBUG_CONFIG_VALUE(DisallowRuntimeServicesFallback) +DEBUG_CONFIG_VALUE(GcStressThrottleMode) // gcstm_TriggerAlways / gcstm_TriggerOnFirstHit / gcstm_TriggerRandom +DEBUG_CONFIG_VALUE(GcStressFreqCallsite) // Number of times to force GC out of GcStressFreqDenom (for GCSTM_RANDOM) +DEBUG_CONFIG_VALUE(GcStressFreqLoop) // Number of times to force GC out of GcStressFreqDenom (for GCSTM_RANDOM) +DEBUG_CONFIG_VALUE(GcStressFreqDenom) // Denominator defining frequencies above, 10,000 used when left unspecified (for GCSTM_RANDOM) +DEBUG_CONFIG_VALUE(GcStressSeed) // Specify Seed for random generator (for GCSTM_RANDOM) diff --git a/src/coreclr/src/nativeaot/Runtime/RuntimeInstance.cpp b/src/coreclr/src/nativeaot/Runtime/RuntimeInstance.cpp new file mode 100644 index 0000000000000..487f332fddac7 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/RuntimeInstance.cpp @@ -0,0 +1,543 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +#include "common.h" +#include "CommonTypes.h" +#include "CommonMacros.h" +#include "daccess.h" +#include "PalRedhawkCommon.h" +#include "PalRedhawk.h" +#include "rhassert.h" +#include "slist.h" +#include "holder.h" +#include "Crst.h" +#include "rhbinder.h" +#include "RWLock.h" +#include "RuntimeInstance.h" +#include "event.h" +#include "regdisplay.h" +#include "StackFrameIterator.h" +#include "thread.h" +#include "threadstore.h" +#include "threadstore.inl" +#include "gcrhinterface.h" +#include "shash.h" +#include "TypeManager.h" +#include "eetype.h" +#include "varint.h" +#include "DebugEventSource.h" + +#include "CommonMacros.inl" +#include "slist.inl" +#include "eetype.inl" + +#ifdef FEATURE_GC_STRESS +enum HijackType { htLoop, htCallsite }; +bool ShouldHijackForGcStress(UIntNative CallsiteIP, HijackType ht); +#endif // FEATURE_GC_STRESS + +#include "shash.inl" + +#ifndef DACCESS_COMPILE +COOP_PINVOKE_HELPER(UInt8 *, RhSetErrorInfoBuffer, (UInt8 * pNewBuffer)) +{ + return (UInt8 *) PalSetWerDataBuffer(pNewBuffer); +} +#endif // DACCESS_COMPILE + + +ThreadStore * RuntimeInstance::GetThreadStore() +{ + return m_pThreadStore; +} + +COOP_PINVOKE_HELPER(UInt8 *, RhFindMethodStartAddress, (void * codeAddr)) +{ + return dac_cast(GetRuntimeInstance()->FindMethodStartAddress(dac_cast(codeAddr))); +} + +PTR_UInt8 RuntimeInstance::FindMethodStartAddress(PTR_VOID ControlPC) +{ + ICodeManager * pCodeManager = FindCodeManagerByAddress(ControlPC); + MethodInfo methodInfo; + if (pCodeManager != NULL && pCodeManager->FindMethodInfo(ControlPC, &methodInfo)) + { + return (PTR_UInt8)pCodeManager->GetMethodStartAddress(&methodInfo); + } + + return NULL; +} + +ICodeManager * RuntimeInstance::FindCodeManagerByAddress(PTR_VOID pvAddress) +{ + ReaderWriterLock::ReadHolder read(&m_ModuleListLock); + + // TODO: ICodeManager support in DAC +#ifndef DACCESS_COMPILE + for (CodeManagerEntry * pEntry = m_CodeManagerList.GetHead(); pEntry != NULL; pEntry = pEntry->m_pNext) + { + if (dac_cast(pvAddress) - dac_cast(pEntry->m_pvStartRange) < pEntry->m_cbRange) + return pEntry->m_pCodeManager; + } +#endif + + return NULL; +} + +#ifndef DACCESS_COMPILE + +// Find the code manager containing the given address, which might be a return address from a managed function. The +// address may be to another managed function, or it may be to an unmanaged function. The address may also refer to +// an EEType. +ICodeManager * RuntimeInstance::FindCodeManagerForClasslibFunction(PTR_VOID address) +{ + // Try looking up the code manager assuming the address is for code first. This is expected to be most common. + ICodeManager * pCodeManager = FindCodeManagerByAddress(address); + if (pCodeManager != NULL) + return pCodeManager; + + ASSERT_MSG(!Thread::IsHijackTarget(address), "not expected to be called with hijacked return address"); + + return NULL; +} + +void * RuntimeInstance::GetClasslibFunctionFromCodeAddress(PTR_VOID address, ClasslibFunctionId functionId) +{ + // Find the code manager for the given address, which is an address into some managed module. It could + // be code, or it could be an EEType. No matter what, it's an address into a managed module in some non-Rtm + // type system. + ICodeManager * pCodeManager = FindCodeManagerForClasslibFunction(address); + + // If the address isn't in a managed module then we have no classlib function. + if (pCodeManager == NULL) + { + return NULL; + } + + return pCodeManager->GetClasslibFunction(functionId); +} + +#endif // DACCESS_COMPILE + +PTR_UInt8 RuntimeInstance::GetTargetOfUnboxingAndInstantiatingStub(PTR_VOID ControlPC) +{ + ICodeManager * pCodeManager = FindCodeManagerByAddress(ControlPC); + if (pCodeManager != NULL) + { + PTR_UInt8 pData = (PTR_UInt8)pCodeManager->GetAssociatedData(ControlPC); + if (pData != NULL) + { + UInt8 flags = *pData++; + + if ((flags & (UInt8)AssociatedDataFlags::HasUnboxingStubTarget) != 0) + return pData + *dac_cast(pData); + } + } + + return NULL; +} + +GPTR_IMPL_INIT(RuntimeInstance, g_pTheRuntimeInstance, NULL); + +PTR_RuntimeInstance GetRuntimeInstance() +{ + return g_pTheRuntimeInstance; +} + +void RuntimeInstance::EnumAllStaticGCRefs(void * pfnCallback, void * pvCallbackData) +{ + for (TypeManagerList::Iterator iter = m_TypeManagerList.Begin(); iter != m_TypeManagerList.End(); iter++) + { + iter->m_pTypeManager->EnumStaticGCRefs(pfnCallback, pvCallbackData); + } +} + +void RuntimeInstance::SetLoopHijackFlags(UInt32 flag) +{ + for (TypeManagerList::Iterator iter = m_TypeManagerList.Begin(); iter != m_TypeManagerList.End(); iter++) + { + iter->m_pTypeManager->SetLoopHijackFlag(flag); + } +} + +RuntimeInstance::OsModuleList* RuntimeInstance::GetOsModuleList() +{ + return dac_cast(dac_cast(this) + offsetof(RuntimeInstance, m_OsModuleList)); +} + +ReaderWriterLock& RuntimeInstance::GetTypeManagerLock() +{ + return m_ModuleListLock; +} + +#ifndef DACCESS_COMPILE + +RuntimeInstance::RuntimeInstance() : + m_pThreadStore(NULL), + m_conservativeStackReportingEnabled(false), + m_pUnboxingStubsRegion(NULL) +{ +} + +RuntimeInstance::~RuntimeInstance() +{ + if (NULL != m_pThreadStore) + { + delete m_pThreadStore; + m_pThreadStore = NULL; + } +} + +HANDLE RuntimeInstance::GetPalInstance() +{ + return m_hPalInstance; +} + +void RuntimeInstance::EnableConservativeStackReporting() +{ + m_conservativeStackReportingEnabled = true; +} + +bool RuntimeInstance::RegisterCodeManager(ICodeManager * pCodeManager, PTR_VOID pvStartRange, UInt32 cbRange) +{ + CodeManagerEntry * pEntry = new (nothrow) CodeManagerEntry(); + if (NULL == pEntry) + return false; + + pEntry->m_pvStartRange = pvStartRange; + pEntry->m_cbRange = cbRange; + pEntry->m_pCodeManager = pCodeManager; + + { + ReaderWriterLock::WriteHolder write(&m_ModuleListLock); + + m_CodeManagerList.PushHead(pEntry); + } + + return true; +} + +void RuntimeInstance::UnregisterCodeManager(ICodeManager * pCodeManager) +{ + CodeManagerEntry * pEntry = NULL; + + { + ReaderWriterLock::WriteHolder write(&m_ModuleListLock); + + for (CodeManagerList::Iterator i = m_CodeManagerList.Begin(), end = m_CodeManagerList.End(); i != end; i++) + { + if (i->m_pCodeManager == pCodeManager) + { + pEntry = *i; + + m_CodeManagerList.Remove(i); + break; + } + } + } + + ASSERT(pEntry != NULL); + delete pEntry; +} + +extern "C" bool __stdcall RegisterCodeManager(ICodeManager * pCodeManager, PTR_VOID pvStartRange, UInt32 cbRange) +{ + return GetRuntimeInstance()->RegisterCodeManager(pCodeManager, pvStartRange, cbRange); +} + +extern "C" void __stdcall UnregisterCodeManager(ICodeManager * pCodeManager) +{ + return GetRuntimeInstance()->UnregisterCodeManager(pCodeManager); +} + +bool RuntimeInstance::RegisterUnboxingStubs(PTR_VOID pvStartRange, UInt32 cbRange) +{ + ASSERT(pvStartRange != NULL && cbRange > 0); + + UnboxingStubsRegion * pEntry = new (nothrow) UnboxingStubsRegion(); + if (NULL == pEntry) + return false; + + pEntry->m_pRegionStart = pvStartRange; + pEntry->m_cbRegion = cbRange; + + do + { + pEntry->m_pNextRegion = m_pUnboxingStubsRegion; + } + while (PalInterlockedCompareExchangePointer((void *volatile *)&m_pUnboxingStubsRegion, pEntry, pEntry->m_pNextRegion) != pEntry->m_pNextRegion); + + return true; +} + +bool RuntimeInstance::IsUnboxingStub(UInt8* pCode) +{ + UnboxingStubsRegion * pCurrent = m_pUnboxingStubsRegion; + while (pCurrent != NULL) + { + UInt8* pUnboxingStubsRegion = dac_cast(pCurrent->m_pRegionStart); + if (pCode >= pUnboxingStubsRegion && pCode < (pUnboxingStubsRegion + pCurrent->m_cbRegion)) + return true; + + pCurrent = pCurrent->m_pNextRegion; + } + + return false; +} + +extern "C" bool __stdcall RegisterUnboxingStubs(PTR_VOID pvStartRange, UInt32 cbRange) +{ + return GetRuntimeInstance()->RegisterUnboxingStubs(pvStartRange, cbRange); +} + +bool RuntimeInstance::RegisterTypeManager(TypeManager * pTypeManager) +{ + TypeManagerEntry * pEntry = new (nothrow) TypeManagerEntry(); + if (NULL == pEntry) + return false; + + pEntry->m_pTypeManager = pTypeManager; + + { + ReaderWriterLock::WriteHolder write(&m_ModuleListLock); + + m_TypeManagerList.PushHead(pEntry); + } + + return true; +} + +COOP_PINVOKE_HELPER(TypeManagerHandle, RhpCreateTypeManager, (HANDLE osModule, void* pModuleHeader, PTR_PTR_VOID pClasslibFunctions, UInt32 nClasslibFunctions)) +{ + TypeManager * typeManager = TypeManager::Create(osModule, pModuleHeader, pClasslibFunctions, nClasslibFunctions); + GetRuntimeInstance()->RegisterTypeManager(typeManager); + + // This event must occur after the module is added to the enumeration + if (osModule != nullptr) + DebugEventSource::SendModuleLoadEvent(osModule); + + return TypeManagerHandle::Create(typeManager); +} + +COOP_PINVOKE_HELPER(HANDLE, RhGetOSModuleForMrt, ()) +{ + return GetRuntimeInstance()->GetPalInstance(); +} + +COOP_PINVOKE_HELPER(void*, RhpRegisterOsModule, (HANDLE hOsModule)) +{ + RuntimeInstance::OsModuleEntry * pEntry = new (nothrow) RuntimeInstance::OsModuleEntry(); + if (NULL == pEntry) + return nullptr; // Return null on failure. + + pEntry->m_osModule = hOsModule; + + { + RuntimeInstance *pRuntimeInstance = GetRuntimeInstance(); + ReaderWriterLock::WriteHolder write(&pRuntimeInstance->GetTypeManagerLock()); + + pRuntimeInstance->GetOsModuleList()->PushHead(pEntry); + } + + return hOsModule; // Return non-null on success +} + +RuntimeInstance::TypeManagerList& RuntimeInstance::GetTypeManagerList() +{ + return m_TypeManagerList; +} + +// static +bool RuntimeInstance::Initialize(HANDLE hPalInstance) +{ + NewHolder pRuntimeInstance = new (nothrow) RuntimeInstance(); + if (NULL == pRuntimeInstance) + return false; + + CreateHolder pThreadStore = ThreadStore::Create(pRuntimeInstance); + if (NULL == pThreadStore) + return false; + + pThreadStore.SuppressRelease(); + pRuntimeInstance.SuppressRelease(); + + pRuntimeInstance->m_pThreadStore = pThreadStore; + pRuntimeInstance->m_hPalInstance = hPalInstance; + + ASSERT_MSG(g_pTheRuntimeInstance == NULL, "multi-instances are not supported"); + g_pTheRuntimeInstance = pRuntimeInstance; + + return true; +} + +void RuntimeInstance::Destroy() +{ + delete this; +} + +bool RuntimeInstance::ShouldHijackLoopForGcStress(UIntNative CallsiteIP) +{ +#ifdef FEATURE_GC_STRESS + return ShouldHijackForGcStress(CallsiteIP, htLoop); +#else // FEATURE_GC_STRESS + UNREFERENCED_PARAMETER(CallsiteIP); + return false; +#endif // FEATURE_GC_STRESS +} + +bool RuntimeInstance::ShouldHijackCallsiteForGcStress(UIntNative CallsiteIP) +{ +#ifdef FEATURE_GC_STRESS + return ShouldHijackForGcStress(CallsiteIP, htCallsite); +#else // FEATURE_GC_STRESS + UNREFERENCED_PARAMETER(CallsiteIP); + return false; +#endif // FEATURE_GC_STRESS +} + +COOP_PINVOKE_HELPER(UInt32, RhGetGCDescSize, (EEType* pEEType)) +{ + return RedhawkGCInterface::GetGCDescSize(pEEType); +} + + +// Keep in sync with ndp\fxcore\src\System.Private.CoreLib\system\runtime\runtimeimports.cs +enum RuntimeHelperKind +{ + AllocateObject, + IsInst, + CastClass, + AllocateArray, + CheckArrayElementType, +}; + +// The dictionary codegen expects a pointer that points at a memory location that points to the method pointer +// Create indirections for all helpers used below + +#define DECLARE_INDIRECTION(RET_TYPE, HELPER_NAME, ARGS) \ + EXTERN_C RET_TYPE HELPER_NAME ARGS; \ + const PTR_VOID indirection_##HELPER_NAME = (PTR_VOID)&HELPER_NAME + +#define INDIRECTION(HELPER_NAME) ((PTR_VOID)&indirection_##HELPER_NAME) + +DECLARE_INDIRECTION(Object *, RhpNewFast, (EEType *)); +DECLARE_INDIRECTION(Object *, RhpNewFinalizable, (EEType *)); + +DECLARE_INDIRECTION(Array *, RhpNewArray, (EEType *, int)); + +DECLARE_INDIRECTION(Object *, RhTypeCast_IsInstanceOf, (EEType *, Object *)); +DECLARE_INDIRECTION(Object *, RhTypeCast_CheckCast, (EEType *, Object *)); +DECLARE_INDIRECTION(Object *, RhTypeCast_IsInstanceOfClass, (EEType *, Object *)); +DECLARE_INDIRECTION(Object *, RhTypeCast_CheckCastClass, (EEType *, Object *)); +DECLARE_INDIRECTION(Object *, RhTypeCast_IsInstanceOfArray, (EEType *, Object *)); +DECLARE_INDIRECTION(Object *, RhTypeCast_CheckCastArray, (EEType *, Object *)); +DECLARE_INDIRECTION(Object *, RhTypeCast_IsInstanceOfInterface, (EEType *, Object *)); +DECLARE_INDIRECTION(Object *, RhTypeCast_CheckCastInterface, (EEType *, Object *)); + +DECLARE_INDIRECTION(void, RhTypeCast_CheckVectorElemAddr, (EEType *, Object *)); + +#ifdef HOST_ARM +DECLARE_INDIRECTION(Object *, RhpNewFinalizableAlign8, (EEType *)); +DECLARE_INDIRECTION(Object *, RhpNewFastMisalign, (EEType *)); +DECLARE_INDIRECTION(Object *, RhpNewFastAlign8, (EEType *)); + +DECLARE_INDIRECTION(Array *, RhpNewArrayAlign8, (EEType *, int)); +#endif + +COOP_PINVOKE_HELPER(PTR_VOID, RhGetRuntimeHelperForType, (EEType * pEEType, int helperKind)) +{ + // This implementation matches what the binder does (MetaDataEngine::*() in rh\src\tools\rhbind\MetaDataEngine.cpp) + // If you change the binder's behavior, change this implementation too + + switch (helperKind) + { + case RuntimeHelperKind::AllocateObject: +#ifdef HOST_ARM + if ((pEEType->get_RareFlags() & EEType::RareFlags::RequiresAlign8Flag) == EEType::RareFlags::RequiresAlign8Flag) + { + if (pEEType->HasFinalizer()) + return INDIRECTION(RhpNewFinalizableAlign8); + else if (pEEType->get_IsValueType()) // returns true for enum types as well + return INDIRECTION(RhpNewFastMisalign); + else + return INDIRECTION(RhpNewFastAlign8); + } +#endif + if (pEEType->HasFinalizer()) + return INDIRECTION(RhpNewFinalizable); + else + return INDIRECTION(RhpNewFast); + + case RuntimeHelperKind::IsInst: + if (pEEType->IsArray()) + return INDIRECTION(RhTypeCast_IsInstanceOfArray); + else if (pEEType->IsInterface()) + return INDIRECTION(RhTypeCast_IsInstanceOfInterface); + else if (pEEType->IsParameterizedType()) + return INDIRECTION(RhTypeCast_IsInstanceOf); // Array handled above; pointers and byrefs handled here + else + return INDIRECTION(RhTypeCast_IsInstanceOfClass); + + case RuntimeHelperKind::CastClass: + if (pEEType->IsArray()) + return INDIRECTION(RhTypeCast_CheckCastArray); + else if (pEEType->IsInterface()) + return INDIRECTION(RhTypeCast_CheckCastInterface); + else if (pEEType->IsParameterizedType()) + return INDIRECTION(RhTypeCast_CheckCast); // Array handled above; pointers and byrefs handled here + else + return INDIRECTION(RhTypeCast_CheckCastClass); + + case RuntimeHelperKind::AllocateArray: +#ifdef HOST_ARM + if (pEEType->RequiresAlign8()) + return INDIRECTION(RhpNewArrayAlign8); +#endif + return INDIRECTION(RhpNewArray); + + case RuntimeHelperKind::CheckArrayElementType: + return INDIRECTION(RhTypeCast_CheckVectorElemAddr); + + default: + UNREACHABLE(); + } +} + +#undef DECLARE_INDIRECTION +#undef INDIRECTION + +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH +EXTERN_C void RhpInitialDynamicInterfaceDispatch(); + +COOP_PINVOKE_HELPER(void *, RhNewInterfaceDispatchCell, (EEType * pInterface, Int32 slotNumber)) +{ + InterfaceDispatchCell * pCell = new (nothrow) InterfaceDispatchCell[2]; + if (pCell == NULL) + return NULL; + + // Due to the synchronization mechanism used to update this indirection cell we must ensure the cell's alignment is twice that of a pointer. + // Fortunately, Windows heap guarantees this alignment. + ASSERT(IS_ALIGNED(pCell, 2 * POINTER_SIZE)); + ASSERT(IS_ALIGNED(pInterface, (InterfaceDispatchCell::IDC_CachePointerMask + 1))); + + pCell[0].m_pStub = (UIntNative)&RhpInitialDynamicInterfaceDispatch; + pCell[0].m_pCache = ((UIntNative)pInterface) | InterfaceDispatchCell::IDC_CachePointerIsInterfacePointerOrMetadataToken; + pCell[1].m_pStub = 0; + pCell[1].m_pCache = (UIntNative)slotNumber; + + return pCell; +} +#endif // FEATURE_CACHED_INTERFACE_DISPATCH + +COOP_PINVOKE_HELPER(PTR_UInt8, RhGetThreadLocalStorageForDynamicType, (UInt32 uOffset, UInt32 tlsStorageSize, UInt32 numTlsCells)) +{ + Thread * pCurrentThread = ThreadStore::GetCurrentThread(); + + PTR_UInt8 pResult = pCurrentThread->GetThreadLocalStorageForDynamicType(uOffset); + if (pResult != NULL || tlsStorageSize == 0 || numTlsCells == 0) + return pResult; + + ASSERT(tlsStorageSize > 0 && numTlsCells > 0); + return pCurrentThread->AllocateThreadLocalStorageForDynamicType(uOffset, tlsStorageSize, numTlsCells); +} + +#endif // DACCESS_COMPILE diff --git a/src/coreclr/src/nativeaot/Runtime/RuntimeInstance.h b/src/coreclr/src/nativeaot/Runtime/RuntimeInstance.h new file mode 100644 index 0000000000000..8a89b04858c2c --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/RuntimeInstance.h @@ -0,0 +1,127 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#ifndef __RuntimeInstance_h__ +#define __RuntimeInstance_h__ + +class ThreadStore; +typedef DPTR(ThreadStore) PTR_ThreadStore; +class ICodeManager; +struct StaticGcDesc; +typedef SPTR(StaticGcDesc) PTR_StaticGcDesc; +class TypeManager; +enum GenericVarianceType : UInt8; + +#include "ICodeManager.h" + +class RuntimeInstance +{ + friend class AsmOffsets; + friend struct DefaultSListTraits; + friend class Thread; + + PTR_ThreadStore m_pThreadStore; + HANDLE m_hPalInstance; // this is the HANDLE passed into DllMain + ReaderWriterLock m_ModuleListLock; + +public: + struct OsModuleEntry; + typedef DPTR(OsModuleEntry) PTR_OsModuleEntry; + struct OsModuleEntry + { + PTR_OsModuleEntry m_pNext; + HANDLE m_osModule; + }; + + typedef SList OsModuleList; +private: + OsModuleList m_OsModuleList; + + struct CodeManagerEntry; + typedef DPTR(CodeManagerEntry) PTR_CodeManagerEntry; + + struct CodeManagerEntry + { + PTR_CodeManagerEntry m_pNext; + PTR_VOID m_pvStartRange; + UInt32 m_cbRange; + ICodeManager * m_pCodeManager; + }; + + typedef SList CodeManagerList; + CodeManagerList m_CodeManagerList; + +public: + struct TypeManagerEntry + { + TypeManagerEntry* m_pNext; + TypeManager* m_pTypeManager; + }; + + typedef SList TypeManagerList; + +private: + TypeManagerList m_TypeManagerList; + + bool m_conservativeStackReportingEnabled; + + struct UnboxingStubsRegion + { + PTR_VOID m_pRegionStart; + UInt32 m_cbRegion; + UnboxingStubsRegion* m_pNextRegion; + + UnboxingStubsRegion() : m_pRegionStart(0), m_cbRegion(0), m_pNextRegion(NULL) { } + }; + + UnboxingStubsRegion* m_pUnboxingStubsRegion; + + RuntimeInstance(); + + SList* GetModuleList(); + + SList* GetModuleManagerList(); + + bool BuildGenericTypeHashTable(); + + ICodeManager * FindCodeManagerForClasslibFunction(PTR_VOID address); + +public: + ~RuntimeInstance(); + ThreadStore * GetThreadStore(); + HANDLE GetPalInstance(); + + PTR_UInt8 FindMethodStartAddress(PTR_VOID ControlPC); + PTR_UInt8 GetTargetOfUnboxingAndInstantiatingStub(PTR_VOID ControlPC); + void EnableConservativeStackReporting(); + bool IsConservativeStackReportingEnabled() { return m_conservativeStackReportingEnabled; } + + bool RegisterCodeManager(ICodeManager * pCodeManager, PTR_VOID pvStartRange, UInt32 cbRange); + void UnregisterCodeManager(ICodeManager * pCodeManager); + + ICodeManager * FindCodeManagerByAddress(PTR_VOID ControlPC); + PTR_VOID GetClasslibFunctionFromCodeAddress(PTR_VOID address, ClasslibFunctionId functionId); + + bool RegisterTypeManager(TypeManager * pTypeManager); + TypeManagerList& GetTypeManagerList(); + OsModuleList* GetOsModuleList(); + ReaderWriterLock& GetTypeManagerLock(); + + bool RegisterUnboxingStubs(PTR_VOID pvStartRange, UInt32 cbRange); + bool IsUnboxingStub(UInt8* pCode); + + static bool Initialize(HANDLE hPalInstance); + void Destroy(); + + void EnumAllStaticGCRefs(void * pfnCallback, void * pvCallbackData); + + bool ShouldHijackCallsiteForGcStress(UIntNative CallsiteIP); + bool ShouldHijackLoopForGcStress(UIntNative CallsiteIP); + void SetLoopHijackFlags(UInt32 flag); +}; +typedef DPTR(RuntimeInstance) PTR_RuntimeInstance; + + +PTR_RuntimeInstance GetRuntimeInstance(); + +#endif // __RuntimeInstance_h__ diff --git a/src/coreclr/src/nativeaot/Runtime/SpinLock.h b/src/coreclr/src/nativeaot/Runtime/SpinLock.h new file mode 100644 index 0000000000000..61c648c935a7f --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/SpinLock.h @@ -0,0 +1,71 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +#ifndef __SPINLOCK_H__ +#define __SPINLOCK_H__ + +// #SwitchToThreadSpinning +// +// If you call __SwitchToThread in a loop waiting for a condition to be met, +// it is critical that you insert periodic sleeps. This is because the thread +// you are waiting for to set that condition may need your CPU, and simply +// calling __SwitchToThread(0) will NOT guarantee that it gets a chance to run. +// If there are other runnable threads of higher priority, or even if there +// aren't and it is in another processor's queue, you will be spinning a very +// long time. +// +// To force all callers to consider this issue and to avoid each having to +// duplicate the same backoff code, __SwitchToThread takes a required second +// parameter. If you want it to handle backoff for you, this parameter should +// be the number of successive calls you have made to __SwitchToThread (a loop +// count). If you want to take care of backing off yourself, you can pass +// CALLER_LIMITS_SPINNING. There are three valid cases for doing this: +// +// - You count iterations and induce a sleep periodically +// - The number of consecutive __SwitchToThreads is limited +// - Your call to __SwitchToThread includes a non-zero sleep duration +// +// Lastly, to simplify this requirement for the following common coding pattern: +// +// while (!condition) +// SwitchToThread +// +// you can use the YIELD_WHILE macro. + +#define YIELD_WHILE(condition) \ + { \ + UInt32 __dwSwitchCount = 0; \ + while (condition) \ + { \ + __SwitchToThread(0, ++__dwSwitchCount); \ + } \ + } + +class SpinLock +{ +private: + enum LOCK_STATE + { + UNLOCKED = 0, + LOCKED = 1 + }; + + volatile Int32 m_lock; + + static void Lock(SpinLock& lock) + { YIELD_WHILE (PalInterlockedExchange(&lock.m_lock, LOCKED) == LOCKED); } + + static void Unlock(SpinLock& lock) + { PalInterlockedExchange(&lock.m_lock, UNLOCKED); } + +public: + SpinLock() + : m_lock(UNLOCKED) { } + + typedef HolderNoDefaultValue + Holder; +}; + +#endif + diff --git a/src/coreclr/src/nativeaot/Runtime/StackFrameIterator.cpp b/src/coreclr/src/nativeaot/Runtime/StackFrameIterator.cpp new file mode 100644 index 0000000000000..9b6a9119a9f5c --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/StackFrameIterator.cpp @@ -0,0 +1,1914 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +#include "common.h" +#include "CommonTypes.h" +#include "CommonMacros.h" +#include "daccess.h" +#include "PalRedhawkCommon.h" +#include "PalRedhawk.h" +#include "RedhawkWarnings.h" +#include "rhassert.h" +#include "slist.h" +#include "gcrhinterface.h" +#include "varint.h" +#include "regdisplay.h" +#include "StackFrameIterator.h" +#include "thread.h" +#include "holder.h" +#include "Crst.h" +#include "RWLock.h" +#include "event.h" +#include "threadstore.h" +#include "threadstore.inl" +#include "thread.inl" +#include "stressLog.h" + +#include "shash.h" +#include "RuntimeInstance.h" +#include "rhbinder.h" + +#include "DebugFuncEval.h" + +// warning C4061: enumerator '{blah}' in switch of enum '{blarg}' is not explicitly handled by a case label +#pragma warning(disable:4061) + +#if !defined(USE_PORTABLE_HELPERS) // @TODO: CORERT: these are (currently) only implemented in assembly helpers + +#if defined(FEATURE_DYNAMIC_CODE) +EXTERN_C void * RhpUniversalTransition(); +GPTR_IMPL_INIT(PTR_VOID, g_RhpUniversalTransitionAddr, (void**)&RhpUniversalTransition); + +EXTERN_C PTR_VOID PointerToReturnFromUniversalTransition; +GVAL_IMPL_INIT(PTR_VOID, g_ReturnFromUniversalTransitionAddr, PointerToReturnFromUniversalTransition); + +EXTERN_C PTR_VOID PointerToReturnFromUniversalTransition_DebugStepTailCall; +GVAL_IMPL_INIT(PTR_VOID, g_ReturnFromUniversalTransition_DebugStepTailCallAddr, PointerToReturnFromUniversalTransition_DebugStepTailCall); + +EXTERN_C PTR_VOID PointerToReturnFromCallDescrThunk; +GVAL_IMPL_INIT(PTR_VOID, g_ReturnFromCallDescrThunkAddr, PointerToReturnFromCallDescrThunk); +#endif + +#ifdef TARGET_X86 +EXTERN_C void * PointerToRhpCallFunclet2; +GVAL_IMPL_INIT(PTR_VOID, g_RhpCallFunclet2Addr, PointerToRhpCallFunclet2); +#endif +EXTERN_C void * PointerToRhpCallCatchFunclet2; +GVAL_IMPL_INIT(PTR_VOID, g_RhpCallCatchFunclet2Addr, PointerToRhpCallCatchFunclet2); +EXTERN_C void * PointerToRhpCallFinallyFunclet2; +GVAL_IMPL_INIT(PTR_VOID, g_RhpCallFinallyFunclet2Addr, PointerToRhpCallFinallyFunclet2); +EXTERN_C void * PointerToRhpCallFilterFunclet2; +GVAL_IMPL_INIT(PTR_VOID, g_RhpCallFilterFunclet2Addr, PointerToRhpCallFilterFunclet2); +EXTERN_C void * PointerToRhpThrowEx2; +GVAL_IMPL_INIT(PTR_VOID, g_RhpThrowEx2Addr, PointerToRhpThrowEx2); +EXTERN_C void * PointerToRhpThrowHwEx2; +GVAL_IMPL_INIT(PTR_VOID, g_RhpThrowHwEx2Addr, PointerToRhpThrowHwEx2); +EXTERN_C void * PointerToRhpRethrow2; +GVAL_IMPL_INIT(PTR_VOID, g_RhpRethrow2Addr, PointerToRhpRethrow2); +#endif // !defined(USE_PORTABLE_HELPERS) + +// Addresses of functions in the DAC won't match their runtime counterparts so we +// assign them to globals. However it is more performant in the runtime to compare +// against immediates than to fetch the global. This macro hides the difference. +// +// We use a special code path for the return address from thunks as +// having the return address public confuses today DIA stackwalker. Before we can +// ingest the updated DIA, we're instead exposing a global void * variable +// holding the return address. +#ifdef DACCESS_COMPILE +#define EQUALS_RETURN_ADDRESS(x, func_name) ((x) == g_ ## func_name ## Addr) +#else +#define EQUALS_RETURN_ADDRESS(x, func_name) (((x)) == (PointerTo ## func_name)) +#endif + +#ifdef DACCESS_COMPILE +#define FAILFAST_OR_DAC_FAIL(x) if(!(x)) { DacError(E_FAIL); } +#define FAILFAST_OR_DAC_FAIL_MSG(x, msg) if(!(x)) { DacError(E_FAIL); } +#define FAILFAST_OR_DAC_FAIL_UNCONDITIONALLY(msg) DacError(E_FAIL) +#else +#define FAILFAST_OR_DAC_FAIL(x) if(!(x)) { ASSERT_UNCONDITIONALLY(#x); RhFailFast(); } +#define FAILFAST_OR_DAC_FAIL_MSG(x, msg) if(!(x)) { ASSERT_MSG((x), msg); ASSERT_UNCONDITIONALLY(#x); RhFailFast(); } +#define FAILFAST_OR_DAC_FAIL_UNCONDITIONALLY(msg) { ASSERT_UNCONDITIONALLY(msg); RhFailFast(); } +#endif + +PTR_PInvokeTransitionFrame GetPInvokeTransitionFrame(PTR_VOID pTransitionFrame) +{ + return static_cast(pTransitionFrame); +} + +StackFrameIterator::StackFrameIterator(Thread * pThreadToWalk, PTR_VOID pInitialTransitionFrame) +{ + STRESS_LOG0(LF_STACKWALK, LL_INFO10000, "----Init---- [ GC ]\n"); + ASSERT(!pThreadToWalk->DangerousCrossThreadIsHijacked()); + InternalInit(pThreadToWalk, GetPInvokeTransitionFrame(pInitialTransitionFrame), GcStackWalkFlags); + PrepareToYieldFrame(); +} + +StackFrameIterator::StackFrameIterator(Thread * pThreadToWalk, PTR_PAL_LIMITED_CONTEXT pCtx) +{ + STRESS_LOG0(LF_STACKWALK, LL_INFO10000, "----Init---- [ hijack ]\n"); + InternalInit(pThreadToWalk, pCtx, 0); + PrepareToYieldFrame(); +} + +void StackFrameIterator::ResetNextExInfoForSP(UIntNative SP) +{ + while (m_pNextExInfo && (SP > (UIntNative)dac_cast(m_pNextExInfo))) + m_pNextExInfo = m_pNextExInfo->m_pPrevExInfo; +} + +void StackFrameIterator::EnterInitialInvalidState(Thread * pThreadToWalk) +{ + m_pThread = pThreadToWalk; + m_pInstance = GetRuntimeInstance(); + m_pCodeManager = NULL; + m_pHijackedReturnValue = NULL; + m_HijackedReturnValueKind = GCRK_Unknown; + m_pConservativeStackRangeLowerBound = NULL; + m_pConservativeStackRangeUpperBound = NULL; + m_ShouldSkipRegularGcReporting = false; + m_pendingFuncletFramePointer = NULL; + m_pNextExInfo = pThreadToWalk->GetCurExInfo(); + SetControlPC(0); +} + +// Prepare to start a stack walk from the context listed in the supplied PInvokeTransitionFrame. +// The supplied frame can be TOP_OF_STACK_MARKER to indicate that there are no more managed +// frames on the stack. Otherwise, the context in the frame always describes a callsite +// where control transitioned from managed to unmanaged code. +// NOTE: When a return address hijack is executed, the PC in the generated PInvokeTransitionFrame +// matches the hijacked return address. This PC is not guaranteed to be in managed code +// since the hijacked return address may refer to a location where an assembly thunk called +// into managed code. +// NOTE: When the PC is in an assembly thunk, this function will unwind to the next managed +// frame and may publish a conservative stack range (if and only if any of the unwound +// thunks report a conservative range). +void StackFrameIterator::InternalInit(Thread * pThreadToWalk, PTR_PInvokeTransitionFrame pFrame, UInt32 dwFlags) +{ + // EH stackwalks are always required to unwind non-volatile floating point state. This + // state is never carried by PInvokeTransitionFrames, implying that they can never be used + // as the initial state for an EH stackwalk. + ASSERT_MSG(!(dwFlags & ApplyReturnAddressAdjustment), + "PInvokeTransitionFrame content is not sufficient to seed an EH stackwalk"); + + EnterInitialInvalidState(pThreadToWalk); + + if (pFrame == TOP_OF_STACK_MARKER) + { + // There are no managed frames on the stack. Leave the iterator in its initial invalid state. + return; + } + + m_dwFlags = dwFlags; + + // We need to walk the ExInfo chain in parallel with the stackwalk so that we know when we cross over + // exception throw points. So we must find our initial point in the ExInfo chain here so that we can + // properly walk it in parallel. + ResetNextExInfoForSP((UIntNative)dac_cast(pFrame)); + +#if !defined(USE_PORTABLE_HELPERS) // @TODO: CORERT: no portable version of regdisplay + memset(&m_RegDisplay, 0, sizeof(m_RegDisplay)); + m_RegDisplay.SetIP((PCODE)pFrame->m_RIP); + m_RegDisplay.SetAddrOfIP((PTR_PCODE)PTR_HOST_MEMBER(PInvokeTransitionFrame, pFrame, m_RIP)); + SetControlPC(dac_cast(*(m_RegDisplay.pIP))); + + PTR_UIntNative pPreservedRegsCursor = (PTR_UIntNative)PTR_HOST_MEMBER(PInvokeTransitionFrame, pFrame, m_PreservedRegs); + +#ifdef TARGET_ARM + m_RegDisplay.pLR = (PTR_UIntNative)PTR_HOST_MEMBER(PInvokeTransitionFrame, pFrame, m_RIP); + m_RegDisplay.pR11 = (PTR_UIntNative)PTR_HOST_MEMBER(PInvokeTransitionFrame, pFrame, m_ChainPointer); + + if (pFrame->m_Flags & PTFF_SAVE_R4) { m_RegDisplay.pR4 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_R5) { m_RegDisplay.pR5 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_R6) { m_RegDisplay.pR6 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_R7) { m_RegDisplay.pR7 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_R8) { m_RegDisplay.pR8 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_R9) { m_RegDisplay.pR9 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_R10) { m_RegDisplay.pR10 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_SP) { m_RegDisplay.SP = *pPreservedRegsCursor++; } + m_RegDisplay.pR11 = (PTR_UIntNative) PTR_HOST_MEMBER(PInvokeTransitionFrame, pFrame, m_FramePointer); + if (pFrame->m_Flags & PTFF_SAVE_R0) { m_RegDisplay.pR0 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_R1) { m_RegDisplay.pR1 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_R2) { m_RegDisplay.pR2 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_R3) { m_RegDisplay.pR3 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_LR) { m_RegDisplay.pLR = pPreservedRegsCursor++; } + + if (pFrame->m_Flags & PTFF_R0_IS_GCREF) + { + m_pHijackedReturnValue = (PTR_RtuObjectRef) m_RegDisplay.pR0; + m_HijackedReturnValueKind = GCRK_Object; + } + if (pFrame->m_Flags & PTFF_R0_IS_BYREF) + { + m_pHijackedReturnValue = (PTR_RtuObjectRef) m_RegDisplay.pR0; + m_HijackedReturnValueKind = GCRK_Byref; + } + +#elif defined(TARGET_ARM64) + m_RegDisplay.pFP = (PTR_UIntNative)PTR_HOST_MEMBER(PInvokeTransitionFrame, pFrame, m_FramePointer); + m_RegDisplay.pLR = (PTR_UIntNative)PTR_HOST_MEMBER(PInvokeTransitionFrame, pFrame, m_RIP); + + ASSERT(!(pFrame->m_Flags & PTFF_SAVE_FP)); // FP should never contain a GC ref because we require + // a frame pointer for methods with pinvokes + + if (pFrame->m_Flags & PTFF_SAVE_X19) { m_RegDisplay.pX19 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_X20) { m_RegDisplay.pX20 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_X21) { m_RegDisplay.pX21 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_X22) { m_RegDisplay.pX22 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_X23) { m_RegDisplay.pX23 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_X24) { m_RegDisplay.pX24 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_X25) { m_RegDisplay.pX25 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_X26) { m_RegDisplay.pX26 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_X27) { m_RegDisplay.pX27 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_X28) { m_RegDisplay.pX28 = pPreservedRegsCursor++; } + + if (pFrame->m_Flags & PTFF_SAVE_SP) { m_RegDisplay.SP = *pPreservedRegsCursor++; } + + if (pFrame->m_Flags & PTFF_SAVE_X0) { m_RegDisplay.pX0 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_X1) { m_RegDisplay.pX1 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_X2) { m_RegDisplay.pX2 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_X3) { m_RegDisplay.pX3 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_X4) { m_RegDisplay.pX4 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_X5) { m_RegDisplay.pX5 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_X6) { m_RegDisplay.pX6 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_X7) { m_RegDisplay.pX7 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_X8) { m_RegDisplay.pX8 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_X9) { m_RegDisplay.pX9 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_X10) { m_RegDisplay.pX10 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_X11) { m_RegDisplay.pX11 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_X12) { m_RegDisplay.pX12 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_X13) { m_RegDisplay.pX13 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_X14) { m_RegDisplay.pX14 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_X15) { m_RegDisplay.pX15 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_X16) { m_RegDisplay.pX16 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_X17) { m_RegDisplay.pX17 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_X18) { m_RegDisplay.pX18 = pPreservedRegsCursor++; } + + if (pFrame->m_Flags & PTFF_SAVE_LR) { m_RegDisplay.pLR = pPreservedRegsCursor++; } + + GCRefKind retValueKind = TransitionFrameFlagsToReturnKind(pFrame->m_Flags); + if (retValueKind != GCRK_Scalar) + { + m_pHijackedReturnValue = (PTR_RtuObjectRef)m_RegDisplay.pX0; + m_HijackedReturnValueKind = retValueKind; + } + +#else // TARGET_ARM + if (pFrame->m_Flags & PTFF_SAVE_RBX) { m_RegDisplay.pRbx = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_RSI) { m_RegDisplay.pRsi = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_RDI) { m_RegDisplay.pRdi = pPreservedRegsCursor++; } + ASSERT(!(pFrame->m_Flags & PTFF_SAVE_RBP)); // RBP should never contain a GC ref because we require + // a frame pointer for methods with pinvokes +#ifdef TARGET_AMD64 + if (pFrame->m_Flags & PTFF_SAVE_R12) { m_RegDisplay.pR12 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_R13) { m_RegDisplay.pR13 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_R14) { m_RegDisplay.pR14 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_R15) { m_RegDisplay.pR15 = pPreservedRegsCursor++; } +#endif // TARGET_AMD64 + + m_RegDisplay.pRbp = (PTR_UIntNative) PTR_HOST_MEMBER(PInvokeTransitionFrame, pFrame, m_FramePointer); + + if (pFrame->m_Flags & PTFF_SAVE_RSP) { m_RegDisplay.SP = *pPreservedRegsCursor++; } + + if (pFrame->m_Flags & PTFF_SAVE_RAX) { m_RegDisplay.pRax = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_RCX) { m_RegDisplay.pRcx = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_RDX) { m_RegDisplay.pRdx = pPreservedRegsCursor++; } +#ifdef TARGET_AMD64 + if (pFrame->m_Flags & PTFF_SAVE_R8 ) { m_RegDisplay.pR8 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_R9 ) { m_RegDisplay.pR9 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_R10) { m_RegDisplay.pR10 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_R11) { m_RegDisplay.pR11 = pPreservedRegsCursor++; } +#endif // TARGET_AMD64 + + if (pFrame->m_Flags & PTFF_RAX_IS_GCREF) + { + m_pHijackedReturnValue = (PTR_RtuObjectRef) m_RegDisplay.pRax; + m_HijackedReturnValueKind = GCRK_Object; + } + if (pFrame->m_Flags & PTFF_RAX_IS_BYREF) + { + m_pHijackedReturnValue = (PTR_RtuObjectRef) m_RegDisplay.pRax; + m_HijackedReturnValueKind = GCRK_Byref; + } + +#endif // TARGET_ARM + +#endif // defined(USE_PORTABLE_HELPERS) + + // @TODO: currently, we always save all registers -- how do we handle the onese we don't save once we + // start only saving those that weren't already saved? + + // This function guarantees that the final initialized context will refer to a managed + // frame. In the rare case where the PC does not refer to managed code (and refers to an + // assembly thunk instead), unwind through the thunk sequence to find the nearest managed + // frame. + // NOTE: When thunks are present, the thunk sequence may report a conservative GC reporting + // lower bound that must be applied when processing the managed frame. + + ReturnAddressCategory category = CategorizeUnadjustedReturnAddress(m_ControlPC); + + if (category == InManagedCode) + { + ASSERT(m_pInstance->FindCodeManagerByAddress(m_ControlPC)); + } + else if (IsNonEHThunk(category)) + { + UnwindNonEHThunkSequence(); + ASSERT(m_pInstance->FindCodeManagerByAddress(m_ControlPC)); + } + else + { + FAILFAST_OR_DAC_FAIL_UNCONDITIONALLY("PInvokeTransitionFrame PC points to an unexpected assembly thunk kind."); + } + + STRESS_LOG1(LF_STACKWALK, LL_INFO10000, " %p\n", m_ControlPC); +} + +#ifndef DACCESS_COMPILE + +void StackFrameIterator::InternalInitForEH(Thread * pThreadToWalk, PAL_LIMITED_CONTEXT * pCtx, bool instructionFault) +{ + STRESS_LOG0(LF_STACKWALK, LL_INFO10000, "----Init---- [ EH ]\n"); + InternalInit(pThreadToWalk, pCtx, EHStackWalkFlags); + + if (instructionFault) + { + // We treat the IP as a return-address and adjust backward when doing EH-related things. The faulting + // instruction IP here will be the start of the faulting instruction and so we have the right IP for + // EH-related things already. + m_dwFlags &= ~ApplyReturnAddressAdjustment; + PrepareToYieldFrame(); + m_dwFlags |= ApplyReturnAddressAdjustment; + } + else + { + PrepareToYieldFrame(); + } + + STRESS_LOG1(LF_STACKWALK, LL_INFO10000, " %p\n", m_ControlPC); +} + +void StackFrameIterator::InternalInitForStackTrace() +{ + STRESS_LOG0(LF_STACKWALK, LL_INFO10000, "----Init---- [ StackTrace ]\n"); + Thread * pThreadToWalk = ThreadStore::GetCurrentThread(); + PTR_VOID pFrame = pThreadToWalk->GetTransitionFrameForStackTrace(); + InternalInit(pThreadToWalk, GetPInvokeTransitionFrame(pFrame), StackTraceStackWalkFlags); + PrepareToYieldFrame(); +} + +#endif //!DACCESS_COMPILE + +// Prepare to start a stack walk from the context listed in the supplied PAL_LIMITED_CONTEXT. +// The supplied context can describe a location in either managed or unmanaged code. In the +// latter case the iterator is left in an invalid state when this function returns. +void StackFrameIterator::InternalInit(Thread * pThreadToWalk, PTR_PAL_LIMITED_CONTEXT pCtx, UInt32 dwFlags) +{ + ASSERT((dwFlags & MethodStateCalculated) == 0); + + EnterInitialInvalidState(pThreadToWalk); + + m_dwFlags = dwFlags; + + // We need to walk the ExInfo chain in parallel with the stackwalk so that we know when we cross over + // exception throw points. So we must find our initial point in the ExInfo chain here so that we can + // properly walk it in parallel. + ResetNextExInfoForSP(pCtx->GetSp()); + + // This codepath is used by the hijack stackwalk and we can get arbitrary ControlPCs from there. If this + // context has a non-managed control PC, then we're done. + if (!m_pInstance->FindCodeManagerByAddress(dac_cast(pCtx->GetIp()))) + return; + + // + // control state + // + SetControlPC(dac_cast(pCtx->GetIp())); + m_RegDisplay.SP = pCtx->GetSp(); + m_RegDisplay.IP = pCtx->GetIp(); + m_RegDisplay.pIP = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, IP); + +#ifdef TARGET_ARM + // + // preserved regs + // + m_RegDisplay.pR4 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, R4); + m_RegDisplay.pR5 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, R5); + m_RegDisplay.pR6 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, R6); + m_RegDisplay.pR7 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, R7); + m_RegDisplay.pR8 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, R8); + m_RegDisplay.pR9 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, R9); + m_RegDisplay.pR10 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, R10); + m_RegDisplay.pR11 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, R11); + m_RegDisplay.pLR = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, LR); + + // + // preserved vfp regs + // + for (Int32 i = 0; i < 16 - 8; i++) + { + m_RegDisplay.D[i] = pCtx->D[i]; + } + // + // scratch regs + // + m_RegDisplay.pR0 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, R0); + +#elif defined(TARGET_ARM64) + // + // preserved regs + // + m_RegDisplay.pX19 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, X19); + m_RegDisplay.pX20 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, X20); + m_RegDisplay.pX21 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, X21); + m_RegDisplay.pX22 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, X22); + m_RegDisplay.pX23 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, X23); + m_RegDisplay.pX24 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, X24); + m_RegDisplay.pX25 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, X25); + m_RegDisplay.pX26 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, X26); + m_RegDisplay.pX27 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, X27); + m_RegDisplay.pX28 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, X28); + m_RegDisplay.pFP = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, FP); + m_RegDisplay.pLR = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, LR); + + // + // preserved vfp regs + // + for (Int32 i = 0; i < 16 - 8; i++) + { + m_RegDisplay.D[i] = pCtx->D[i]; + } + // + // scratch regs + // + m_RegDisplay.pX0 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, X0); + m_RegDisplay.pX1 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, X1); + // TODO: Copy X2-X7 when we start supporting HVA's + +#elif defined(UNIX_AMD64_ABI) + // + // preserved regs + // + m_RegDisplay.pRbp = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, Rbp); + m_RegDisplay.pRbx = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, Rbx); + m_RegDisplay.pR12 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, R12); + m_RegDisplay.pR13 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, R13); + m_RegDisplay.pR14 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, R14); + m_RegDisplay.pR15 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, R15); + + // + // scratch regs + // + m_RegDisplay.pRax = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, Rax); + m_RegDisplay.pRcx = NULL; + m_RegDisplay.pRdx = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, Rdx); + m_RegDisplay.pRsi = NULL; + m_RegDisplay.pRdi = NULL; + m_RegDisplay.pR8 = NULL; + m_RegDisplay.pR9 = NULL; + m_RegDisplay.pR10 = NULL; + m_RegDisplay.pR11 = NULL; + +#elif defined(TARGET_X86) || defined(TARGET_AMD64) + // + // preserved regs + // + m_RegDisplay.pRbp = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, Rbp); + m_RegDisplay.pRsi = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, Rsi); + m_RegDisplay.pRdi = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, Rdi); + m_RegDisplay.pRbx = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, Rbx); +#ifdef TARGET_AMD64 + m_RegDisplay.pR12 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, R12); + m_RegDisplay.pR13 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, R13); + m_RegDisplay.pR14 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, R14); + m_RegDisplay.pR15 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, R15); + // + // preserved xmm regs + // + memcpy(m_RegDisplay.Xmm, &pCtx->Xmm6, sizeof(m_RegDisplay.Xmm)); +#endif // TARGET_AMD64 + + // + // scratch regs + // + m_RegDisplay.pRax = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, Rax); + m_RegDisplay.pRcx = NULL; + m_RegDisplay.pRdx = NULL; +#ifdef TARGET_AMD64 + m_RegDisplay.pR8 = NULL; + m_RegDisplay.pR9 = NULL; + m_RegDisplay.pR10 = NULL; + m_RegDisplay.pR11 = NULL; +#endif // TARGET_AMD64 +#else + PORTABILITY_ASSERT("StackFrameIterator::InternalInit"); +#endif // TARGET_ARM +} + +PTR_VOID StackFrameIterator::HandleExCollide(PTR_ExInfo pExInfo) +{ + STRESS_LOG3(LF_STACKWALK, LL_INFO10000, " [ ex collide ] kind = %d, pass = %d, idxCurClause = %d\n", + pExInfo->m_kind, pExInfo->m_passNumber, pExInfo->m_idxCurClause); + + PTR_VOID collapsingTargetFrame = NULL; + UInt32 curFlags = m_dwFlags; + + // Capture and clear the pending funclet frame pointer (if any). This field is only set + // when stack walks collide with active exception dispatch, and only exists to save the + // funclet frame pointer until the next ExInfo collision (which has now occurred). + PTR_VOID activeFuncletFramePointer = m_pendingFuncletFramePointer; + m_pendingFuncletFramePointer = NULL; + + // If we aren't invoking a funclet (i.e. idxCurClause == -1), and we're doing a GC stackwalk, we don't + // want the 2nd-pass collided behavior because that behavior assumes that the previous frame was a + // funclet, which isn't the case when taking a GC at some points in the EH dispatch code. So we treat it + // as if the 2nd pass hasn't actually started yet. + if ((pExInfo->m_passNumber == 1) || + (pExInfo->m_idxCurClause == 0xFFFFFFFF)) + { + FAILFAST_OR_DAC_FAIL_MSG(!(curFlags & ApplyReturnAddressAdjustment), + "did not expect to collide with a 1st-pass ExInfo during a EH stackwalk"); + InternalInit(m_pThread, pExInfo->m_pExContext, curFlags); + m_pNextExInfo = pExInfo->m_pPrevExInfo; + CalculateCurrentMethodState(); + ASSERT(IsValid()); + + if ((pExInfo->m_kind & EK_HardwareFault) && (curFlags & RemapHardwareFaultsToSafePoint)) + m_effectiveSafePointAddress = GetCodeManager()->RemapHardwareFaultToGCSafePoint(&m_methodInfo, m_ControlPC); + } + else + { + ASSERT_MSG(activeFuncletFramePointer != NULL, + "collided with an active funclet invoke but the funclet frame pointer is unknown"); + + // + // Copy our state from the previous StackFrameIterator + // + this->UpdateFromExceptionDispatch((PTR_StackFrameIterator)&pExInfo->m_frameIter); + + // Sync our 'current' ExInfo with the updated state (we may have skipped other dispatches) + ResetNextExInfoForSP(m_RegDisplay.GetSP()); + + // In case m_ControlPC is pre-adjusted, counteract here, since the caller of this routine + // will apply the adjustment again once we return. If the m_ControlPC is not pre-adjusted, + // this is simply an no-op. + m_ControlPC = m_OriginalControlPC; + + m_dwFlags = curFlags; + + // The iterator has been moved to the "owner frame" (either a parent funclet or the main + // code body) of the funclet being invoked by this ExInfo. As a result, both the active + // funclet and the current frame must be "part of the same function" and therefore must + // have identical frame pointer values. + + CalculateCurrentMethodState(); + ASSERT(IsValid()); + ASSERT(m_FramePointer == activeFuncletFramePointer); + + if ((m_ControlPC != 0) && // the dispatch in ExInfo could have gone unhandled + (m_dwFlags & CollapseFunclets)) + { + // GC stack walks must skip the owner frame since GC information for the entire function + // has already been reported by the leafmost active funclet. In general, the GC stack walk + // must skip all parent frames that are "part of the same function" (i.e., have the same + // frame pointer). + collapsingTargetFrame = activeFuncletFramePointer; + } + } + return collapsingTargetFrame; +} + +void StackFrameIterator::UpdateFromExceptionDispatch(PTR_StackFrameIterator pSourceIterator) +{ + ASSERT(m_pendingFuncletFramePointer == NULL); + PreservedRegPtrs thisFuncletPtrs = this->m_funcletPtrs; + + // Blast over 'this' with everything from the 'source'. + *this = *pSourceIterator; + + // Clear the funclet frame pointer (if any) that was loaded from the previous iterator. + // This field does not relate to the transferrable state of the previous iterator (it + // instead tracks the frame-by-frame progression of a particular iterator instance) and + // therefore has no meaning in the context of the current stack walk. + m_pendingFuncletFramePointer = NULL; + + // Then, put back the pointers to the funclet's preserved registers (since those are the correct values + // until the funclet completes, at which point the values will be copied back to the ExInfo's REGDISPLAY). + +#ifdef TARGET_ARM + m_RegDisplay.pR4 = thisFuncletPtrs.pR4 ; + m_RegDisplay.pR5 = thisFuncletPtrs.pR5 ; + m_RegDisplay.pR6 = thisFuncletPtrs.pR6 ; + m_RegDisplay.pR7 = thisFuncletPtrs.pR7 ; + m_RegDisplay.pR8 = thisFuncletPtrs.pR8 ; + m_RegDisplay.pR9 = thisFuncletPtrs.pR9 ; + m_RegDisplay.pR10 = thisFuncletPtrs.pR10; + m_RegDisplay.pR11 = thisFuncletPtrs.pR11; + +#elif defined(TARGET_ARM64) + m_RegDisplay.pX19 = thisFuncletPtrs.pX19; + m_RegDisplay.pX20 = thisFuncletPtrs.pX20; + m_RegDisplay.pX21 = thisFuncletPtrs.pX21; + m_RegDisplay.pX22 = thisFuncletPtrs.pX22; + m_RegDisplay.pX23 = thisFuncletPtrs.pX23; + m_RegDisplay.pX24 = thisFuncletPtrs.pX24; + m_RegDisplay.pX25 = thisFuncletPtrs.pX25; + m_RegDisplay.pX26 = thisFuncletPtrs.pX26; + m_RegDisplay.pX27 = thisFuncletPtrs.pX27; + m_RegDisplay.pX28 = thisFuncletPtrs.pX28; + m_RegDisplay.pFP = thisFuncletPtrs.pFP; + +#elif defined(UNIX_AMD64_ABI) + // Save the preserved regs portion of the REGDISPLAY across the unwind through the C# EH dispatch code. + m_RegDisplay.pRbp = thisFuncletPtrs.pRbp; + m_RegDisplay.pRbx = thisFuncletPtrs.pRbx; + m_RegDisplay.pR12 = thisFuncletPtrs.pR12; + m_RegDisplay.pR13 = thisFuncletPtrs.pR13; + m_RegDisplay.pR14 = thisFuncletPtrs.pR14; + m_RegDisplay.pR15 = thisFuncletPtrs.pR15; + +#elif defined(TARGET_X86) || defined(TARGET_AMD64) + // Save the preserved regs portion of the REGDISPLAY across the unwind through the C# EH dispatch code. + m_RegDisplay.pRbp = thisFuncletPtrs.pRbp; + m_RegDisplay.pRdi = thisFuncletPtrs.pRdi; + m_RegDisplay.pRsi = thisFuncletPtrs.pRsi; + m_RegDisplay.pRbx = thisFuncletPtrs.pRbx; +#ifdef TARGET_AMD64 + m_RegDisplay.pR12 = thisFuncletPtrs.pR12; + m_RegDisplay.pR13 = thisFuncletPtrs.pR13; + m_RegDisplay.pR14 = thisFuncletPtrs.pR14; + m_RegDisplay.pR15 = thisFuncletPtrs.pR15; +#endif // TARGET_AMD64 +#else + PORTABILITY_ASSERT("StackFrameIterator::UpdateFromExceptionDispatch"); +#endif +} + +#ifdef TARGET_AMD64 +typedef DPTR(Fp128) PTR_Fp128; +#endif + +// The invoke of a funclet is a bit special and requires an assembly thunk, but we don't want to break the +// stackwalk due to this. So this routine will unwind through the assembly thunks used to invoke funclets. +// It's also used to disambiguate exceptionally- and non-exceptionally-invoked funclets. +void StackFrameIterator::UnwindFuncletInvokeThunk() +{ + ASSERT((m_dwFlags & MethodStateCalculated) == 0); + +#if defined(USE_PORTABLE_HELPERS) // @TODO: CORERT: Currently no funclet invoke defined in a portable way + return; +#else // defined(USE_PORTABLE_HELPERS) + ASSERT(CategorizeUnadjustedReturnAddress(m_ControlPC) == InFuncletInvokeThunk); + + PTR_UIntNative SP; + +#ifdef TARGET_X86 + // First, unwind RhpCallFunclet + SP = (PTR_UIntNative)(m_RegDisplay.SP + 0x4); // skip the saved assembly-routine-EBP + m_RegDisplay.SetAddrOfIP(SP); + m_RegDisplay.SetIP(*SP++); + m_RegDisplay.SetSP((UIntNative)dac_cast(SP)); + SetControlPC(dac_cast(*(m_RegDisplay.pIP))); + + ASSERT( + EQUALS_RETURN_ADDRESS(m_ControlPC, RhpCallCatchFunclet2) || + EQUALS_RETURN_ADDRESS(m_ControlPC, RhpCallFinallyFunclet2) || + EQUALS_RETURN_ADDRESS(m_ControlPC, RhpCallFilterFunclet2) + ); +#endif + + bool isFilterInvoke = EQUALS_RETURN_ADDRESS(m_ControlPC, RhpCallFilterFunclet2); + +#if defined(UNIX_AMD64_ABI) + SP = (PTR_UIntNative)(m_RegDisplay.SP); + + if (isFilterInvoke) + { + SP++; // stack alignment + } + else + { + // Save the preserved regs portion of the REGDISPLAY across the unwind through the C# EH dispatch code. + m_funcletPtrs.pRbp = m_RegDisplay.pRbp; + m_funcletPtrs.pRbx = m_RegDisplay.pRbx; + m_funcletPtrs.pR12 = m_RegDisplay.pR12; + m_funcletPtrs.pR13 = m_RegDisplay.pR13; + m_funcletPtrs.pR14 = m_RegDisplay.pR14; + m_funcletPtrs.pR15 = m_RegDisplay.pR15; + + if (EQUALS_RETURN_ADDRESS(m_ControlPC, RhpCallCatchFunclet2)) + { + SP += 6 + 1; // 6 locals and stack alignment + } + else + { + SP += 3; // 3 locals + } + } + + m_RegDisplay.pRbp = SP++; + m_RegDisplay.pRbx = SP++; + m_RegDisplay.pR12 = SP++; + m_RegDisplay.pR13 = SP++; + m_RegDisplay.pR14 = SP++; + m_RegDisplay.pR15 = SP++; +#elif defined(TARGET_AMD64) + static const int ArgumentsScratchAreaSize = 4 * 8; + + PTR_Fp128 xmm = (PTR_Fp128)(m_RegDisplay.SP + ArgumentsScratchAreaSize); + + for (int i = 0; i < 10; i++) + { + m_RegDisplay.Xmm[i] = *xmm++; + } + + SP = (PTR_UIntNative)xmm; + + if (isFilterInvoke) + { + SP++; // stack alignment + } + else + { + // Save the preserved regs portion of the REGDISPLAY across the unwind through the C# EH dispatch code. + m_funcletPtrs.pRbp = m_RegDisplay.pRbp; + m_funcletPtrs.pRdi = m_RegDisplay.pRdi; + m_funcletPtrs.pRsi = m_RegDisplay.pRsi; + m_funcletPtrs.pRbx = m_RegDisplay.pRbx; + m_funcletPtrs.pR12 = m_RegDisplay.pR12; + m_funcletPtrs.pR13 = m_RegDisplay.pR13; + m_funcletPtrs.pR14 = m_RegDisplay.pR14; + m_funcletPtrs.pR15 = m_RegDisplay.pR15; + + if (EQUALS_RETURN_ADDRESS(m_ControlPC, RhpCallCatchFunclet2)) + { + SP += 3; // 3 locals + } + else + { + SP++; // 1 local + } + } + + m_RegDisplay.pRbp = SP++; + m_RegDisplay.pRdi = SP++; + m_RegDisplay.pRsi = SP++; + m_RegDisplay.pRbx = SP++; + m_RegDisplay.pR12 = SP++; + m_RegDisplay.pR13 = SP++; + m_RegDisplay.pR14 = SP++; + m_RegDisplay.pR15 = SP++; + +#elif defined(TARGET_X86) + SP = (PTR_UIntNative)(m_RegDisplay.SP); + + if (!isFilterInvoke) + { + // Save the preserved regs portion of the REGDISPLAY across the unwind through the C# EH dispatch code. + m_funcletPtrs.pRbp = m_RegDisplay.pRbp; + m_funcletPtrs.pRdi = m_RegDisplay.pRdi; + m_funcletPtrs.pRsi = m_RegDisplay.pRsi; + m_funcletPtrs.pRbx = m_RegDisplay.pRbx; + } + + if (EQUALS_RETURN_ADDRESS(m_ControlPC, RhpCallCatchFunclet2)) + { + SP += 2; // 2 locals + } + else + { + SP++; // 1 local + } + m_RegDisplay.pRdi = SP++; + m_RegDisplay.pRsi = SP++; + m_RegDisplay.pRbx = SP++; + m_RegDisplay.pRbp = SP++; +#elif defined(TARGET_ARM) + + PTR_UInt64 d = (PTR_UInt64)(m_RegDisplay.SP); + + for (int i = 0; i < 8; i++) + { + m_RegDisplay.D[i] = *d++; + } + + SP = (PTR_UIntNative)d; + + if (!isFilterInvoke) + { + // RhpCallCatchFunclet puts a couple of extra things on the stack that aren't put there by the other two + // thunks, but we don't need to know what they are here, so we just skip them. + SP += EQUALS_RETURN_ADDRESS(m_ControlPC, RhpCallCatchFunclet2) ? 3 : 1; + + // Save the preserved regs portion of the REGDISPLAY across the unwind through the C# EH dispatch code. + m_funcletPtrs.pR4 = m_RegDisplay.pR4; + m_funcletPtrs.pR5 = m_RegDisplay.pR5; + m_funcletPtrs.pR6 = m_RegDisplay.pR6; + m_funcletPtrs.pR7 = m_RegDisplay.pR7; + m_funcletPtrs.pR8 = m_RegDisplay.pR8; + m_funcletPtrs.pR9 = m_RegDisplay.pR9; + m_funcletPtrs.pR10 = m_RegDisplay.pR10; + m_funcletPtrs.pR11 = m_RegDisplay.pR11; + } + + m_RegDisplay.pR4 = SP++; + m_RegDisplay.pR5 = SP++; + m_RegDisplay.pR6 = SP++; + m_RegDisplay.pR7 = SP++; + m_RegDisplay.pR8 = SP++; + m_RegDisplay.pR9 = SP++; + m_RegDisplay.pR10 = SP++; + m_RegDisplay.pR11 = SP++; + +#elif defined(TARGET_ARM64) + PTR_UInt64 d = (PTR_UInt64)(m_RegDisplay.SP); + + for (int i = 0; i < 8; i++) + { + m_RegDisplay.D[i] = *d++; + } + + SP = (PTR_UIntNative)d; + + if (!isFilterInvoke) + { + // RhpCallCatchFunclet puts a couple of extra things on the stack that aren't put there by the other two + // thunks, but we don't need to know what they are here, so we just skip them. + SP += EQUALS_RETURN_ADDRESS(m_ControlPC, RhpCallCatchFunclet2) ? 4 : 2; + + // Save the preserved regs portion of the REGDISPLAY across the unwind through the C# EH dispatch code. + m_funcletPtrs.pX19 = m_RegDisplay.pX19; + m_funcletPtrs.pX20 = m_RegDisplay.pX20; + m_funcletPtrs.pX21 = m_RegDisplay.pX21; + m_funcletPtrs.pX22 = m_RegDisplay.pX22; + m_funcletPtrs.pX23 = m_RegDisplay.pX23; + m_funcletPtrs.pX24 = m_RegDisplay.pX24; + m_funcletPtrs.pX25 = m_RegDisplay.pX25; + m_funcletPtrs.pX26 = m_RegDisplay.pX26; + m_funcletPtrs.pX27 = m_RegDisplay.pX27; + m_funcletPtrs.pX28 = m_RegDisplay.pX28; + m_funcletPtrs.pFP = m_RegDisplay.pFP; + } + + m_RegDisplay.pFP = SP++; + + m_RegDisplay.SetAddrOfIP((PTR_PCODE)SP); + m_RegDisplay.SetIP(*SP++); + + m_RegDisplay.pX19 = SP++; + m_RegDisplay.pX20 = SP++; + m_RegDisplay.pX21 = SP++; + m_RegDisplay.pX22 = SP++; + m_RegDisplay.pX23 = SP++; + m_RegDisplay.pX24 = SP++; + m_RegDisplay.pX25 = SP++; + m_RegDisplay.pX26 = SP++; + m_RegDisplay.pX27 = SP++; + m_RegDisplay.pX28 = SP++; + +#else + SP = (PTR_UIntNative)(m_RegDisplay.SP); + ASSERT_UNCONDITIONALLY("NYI for this arch"); +#endif + +#if !defined(TARGET_ARM64) + m_RegDisplay.SetAddrOfIP((PTR_PCODE)SP); + m_RegDisplay.SetIP(*SP++); +#endif + + m_RegDisplay.SetSP((UIntNative)dac_cast(SP)); + SetControlPC(dac_cast(*(m_RegDisplay.pIP))); + + // We expect to be called by the runtime's C# EH implementation, and since this function's notion of how + // to unwind through the stub is brittle relative to the stub itself, we want to check as soon as we can. + ASSERT(m_pInstance->FindCodeManagerByAddress(m_ControlPC) && "unwind from funclet invoke stub failed"); +#endif // defined(USE_PORTABLE_HELPERS) +} + +// For a given target architecture, the layout of this structure must precisely match the +// stack frame layout used by the associated architecture-specific RhpUniversalTransition +// implementation. +struct UniversalTransitionStackFrame +{ + +// In DAC builds, the "this" pointer refers to an object in the DAC host. +#define GET_POINTER_TO_FIELD(_FieldName) \ + (PTR_UIntNative)PTR_HOST_MEMBER(UniversalTransitionStackFrame, this, _FieldName) + +#if defined(UNIX_AMD64_ABI) + + // Conservative GC reporting must be applied to everything between the base of the + // ReturnBlock and the top of the StackPassedArgs. +private: + Fp128 m_fpArgRegs[8]; // ChildSP+000 CallerSP-0D0 (0x80 bytes) (xmm0-xmm7) + UIntNative m_returnBlock[2]; // ChildSP+080 CallerSP-050 (0x10 bytes) + UIntNative m_intArgRegs[6]; // ChildSP+090 CallerSP-040 (0x30 bytes) (rdi,rsi,rcx,rdx,r8,r9) + UIntNative m_alignmentPad; // ChildSP+0C0 CallerSP-010 (0x8 bytes) + UIntNative m_callerRetaddr; // ChildSP+0C8 CallerSP-008 (0x8 bytes) + UIntNative m_stackPassedArgs[1]; // ChildSP+0D0 CallerSP+000 (unknown size) + +public: + PTR_UIntNative get_CallerSP() { return GET_POINTER_TO_FIELD(m_stackPassedArgs[0]); } + PTR_UIntNative get_AddressOfPushedCallerIP() { return GET_POINTER_TO_FIELD(m_callerRetaddr); } + PTR_UIntNative get_LowerBoundForConservativeReporting() { return GET_POINTER_TO_FIELD(m_returnBlock[0]); } + + void UnwindNonVolatileRegisters(REGDISPLAY * pRegisterSet) + { + // RhpUniversalTransition does not touch any non-volatile state on amd64. + UNREFERENCED_PARAMETER(pRegisterSet); + } + +#elif defined(TARGET_AMD64) + + // Conservative GC reporting must be applied to everything between the base of the + // ReturnBlock and the top of the StackPassedArgs. +private: + UIntNative m_calleeArgumentHomes[4]; // ChildSP+000 CallerSP-080 (0x20 bytes) + Fp128 m_fpArgRegs[4]; // ChildSP+020 CallerSP-060 (0x40 bytes) (xmm0-xmm3) + UIntNative m_returnBlock[2]; // ChildSP+060 CallerSP-020 (0x10 bytes) + UIntNative m_alignmentPad; // ChildSP+070 CallerSP-010 (0x8 bytes) + UIntNative m_callerRetaddr; // ChildSP+078 CallerSP-008 (0x8 bytes) + UIntNative m_intArgRegs[4]; // ChildSP+080 CallerSP+000 (0x20 bytes) (rcx,rdx,r8,r9) + UIntNative m_stackPassedArgs[1]; // ChildSP+0a0 CallerSP+020 (unknown size) + +public: + PTR_UIntNative get_CallerSP() { return GET_POINTER_TO_FIELD(m_intArgRegs[0]); } + PTR_UIntNative get_AddressOfPushedCallerIP() { return GET_POINTER_TO_FIELD(m_callerRetaddr); } + PTR_UIntNative get_LowerBoundForConservativeReporting() { return GET_POINTER_TO_FIELD(m_returnBlock[0]); } + + void UnwindNonVolatileRegisters(REGDISPLAY * pRegisterSet) + { + // RhpUniversalTransition does not touch any non-volatile state on amd64. + UNREFERENCED_PARAMETER(pRegisterSet); + } + +#elif defined(TARGET_ARM) + + // Conservative GC reporting must be applied to everything between the base of the + // ReturnBlock and the top of the StackPassedArgs. +private: + UIntNative m_pushedR11; // ChildSP+000 CallerSP-078 (0x4 bytes) (r11) + UIntNative m_pushedLR; // ChildSP+004 CallerSP-074 (0x4 bytes) (lr) + UInt64 m_fpArgRegs[8]; // ChildSP+008 CallerSP-070 (0x40 bytes) (d0-d7) + UInt64 m_returnBlock[4]; // ChildSP+048 CallerSP-030 (0x20 bytes) + UIntNative m_intArgRegs[4]; // ChildSP+068 CallerSP-010 (0x10 bytes) (r0-r3) + UIntNative m_stackPassedArgs[1]; // ChildSP+078 CallerSP+000 (unknown size) + +public: + PTR_UIntNative get_CallerSP() { return GET_POINTER_TO_FIELD(m_stackPassedArgs[0]); } + PTR_UIntNative get_AddressOfPushedCallerIP() { return GET_POINTER_TO_FIELD(m_pushedLR); } + PTR_UIntNative get_LowerBoundForConservativeReporting() { return GET_POINTER_TO_FIELD(m_returnBlock[0]); } + + void UnwindNonVolatileRegisters(REGDISPLAY * pRegisterSet) + { + pRegisterSet->pR11 = GET_POINTER_TO_FIELD(m_pushedR11); + } + +#elif defined(TARGET_X86) + + // Conservative GC reporting must be applied to everything between the base of the + // IntArgRegs and the top of the StackPassedArgs. +private: + UIntNative m_intArgRegs[2]; // ChildSP+000 CallerSP-018 (0x8 bytes) (edx,ecx) + UIntNative m_returnBlock[2]; // ChildSP+008 CallerSP-010 (0x8 bytes) + UIntNative m_pushedEBP; // ChildSP+010 CallerSP-008 (0x4 bytes) + UIntNative m_callerRetaddr; // ChildSP+014 CallerSP-004 (0x4 bytes) + UIntNative m_stackPassedArgs[1]; // ChildSP+018 CallerSP+000 (unknown size) + +public: + PTR_UIntNative get_CallerSP() { return GET_POINTER_TO_FIELD(m_stackPassedArgs[0]); } + PTR_UIntNative get_AddressOfPushedCallerIP() { return GET_POINTER_TO_FIELD(m_callerRetaddr); } + PTR_UIntNative get_LowerBoundForConservativeReporting() { return GET_POINTER_TO_FIELD(m_intArgRegs[0]); } + + void UnwindNonVolatileRegisters(REGDISPLAY * pRegisterSet) + { + pRegisterSet->pRbp = GET_POINTER_TO_FIELD(m_pushedEBP); + } + +#elif defined(TARGET_ARM64) + + // Conservative GC reporting must be applied to everything between the base of the + // ReturnBlock and the top of the StackPassedArgs. +private: + UIntNative m_pushedFP; // ChildSP+000 CallerSP-0C0 (0x08 bytes) (fp) + UIntNative m_pushedLR; // ChildSP+008 CallerSP-0B8 (0x08 bytes) (lr) + UInt64 m_fpArgRegs[8]; // ChildSP+010 CallerSP-0B0 (0x40 bytes) (d0-d7) + UIntNative m_returnBlock[4]; // ChildSP+050 CallerSP-070 (0x40 bytes) + UIntNative m_intArgRegs[9]; // ChildSP+070 CallerSP-050 (0x48 bytes) (x0-x8) + UIntNative m_alignmentPad; // ChildSP+0B8 CallerSP-008 (0x08 bytes) + UIntNative m_stackPassedArgs[1]; // ChildSP+0C0 CallerSP+000 (unknown size) + +public: + PTR_UIntNative get_CallerSP() { return GET_POINTER_TO_FIELD(m_stackPassedArgs[0]); } + PTR_UIntNative get_AddressOfPushedCallerIP() { return GET_POINTER_TO_FIELD(m_pushedLR); } + PTR_UIntNative get_LowerBoundForConservativeReporting() { return GET_POINTER_TO_FIELD(m_returnBlock[0]); } + + void UnwindNonVolatileRegisters(REGDISPLAY * pRegisterSet) + { + pRegisterSet->pFP = GET_POINTER_TO_FIELD(m_pushedFP); + } +#elif defined(TARGET_WASM) +private: + // WASMTODO: #error NYI for this arch + UIntNative m_stackPassedArgs[1]; // Placeholder +public: + PTR_UIntNative get_CallerSP() { PORTABILITY_ASSERT("@TODO: FIXME:WASM"); return NULL; } + PTR_UIntNative get_AddressOfPushedCallerIP() { PORTABILITY_ASSERT("@TODO: FIXME:WASM"); return NULL; } + PTR_UIntNative get_LowerBoundForConservativeReporting() { PORTABILITY_ASSERT("@TODO: FIXME:WASM"); return NULL; } + + void UnwindNonVolatileRegisters(REGDISPLAY * pRegisterSet) + { + UNREFERENCED_PARAMETER(pRegisterSet); + PORTABILITY_ASSERT("@TODO: FIXME:WASM"); + } +#else +#error NYI for this arch +#endif + +#undef GET_POINTER_TO_FIELD + +}; + +typedef DPTR(UniversalTransitionStackFrame) PTR_UniversalTransitionStackFrame; + +// NOTE: This function always publishes a non-NULL conservative stack range lower bound. +// +// NOTE: In x86 cases, the unwound callsite often uses a calling convention that expects some amount +// of stack-passed argument space to be callee-popped before control returns (or unwinds) to the +// callsite. Since the callsite signature (and thus the amount of callee-popped space) is unknown, +// the recovered SP does not account for the callee-popped space is therefore "wrong" for the +// purposes of unwind. This implies that any x86 function which calls into RhpUniversalTransition +// must have a frame pointer to ensure that the incorrect SP value is ignored and does not break the +// unwind. +void StackFrameIterator::UnwindUniversalTransitionThunk() +{ + ASSERT((m_dwFlags & MethodStateCalculated) == 0); + +#if defined(USE_PORTABLE_HELPERS) // @TODO: CORERT: Corresponding helper code is only defined in assembly code + return; +#else // defined(USE_PORTABLE_HELPERS) + ASSERT(CategorizeUnadjustedReturnAddress(m_ControlPC) == InUniversalTransitionThunk); + + // The current PC is within RhpUniversalTransition, so establish a view of the surrounding stack frame. + // NOTE: In DAC builds, the pointer will refer to a newly constructed object in the DAC host. + UniversalTransitionStackFrame * stackFrame = (PTR_UniversalTransitionStackFrame)m_RegDisplay.SP; + + stackFrame->UnwindNonVolatileRegisters(&m_RegDisplay); + + PTR_UIntNative addressOfPushedCallerIP = stackFrame->get_AddressOfPushedCallerIP(); + m_RegDisplay.SetAddrOfIP((PTR_PCODE)addressOfPushedCallerIP); + m_RegDisplay.SetIP(*addressOfPushedCallerIP); + m_RegDisplay.SetSP((UIntNative)dac_cast(stackFrame->get_CallerSP())); + SetControlPC(dac_cast(*(m_RegDisplay.pIP))); + + // All universal transition cases rely on conservative GC reporting being applied to the + // full argument set that flowed into the call. Report the lower bound of this range (the + // caller will compute the upper bound). + PTR_UIntNative pLowerBound = stackFrame->get_LowerBoundForConservativeReporting(); + ASSERT(pLowerBound != NULL); + ASSERT(m_pConservativeStackRangeLowerBound == NULL); + m_pConservativeStackRangeLowerBound = pLowerBound; +#endif // defined(USE_PORTABLE_HELPERS) +} + +#ifdef TARGET_AMD64 +#define STACK_ALIGN_SIZE 16 +#elif defined(TARGET_ARM) +#define STACK_ALIGN_SIZE 8 +#elif defined(TARGET_ARM64) +#define STACK_ALIGN_SIZE 16 +#elif defined(TARGET_X86) +#define STACK_ALIGN_SIZE 4 +#elif defined(TARGET_WASM) +#define STACK_ALIGN_SIZE 4 +#endif + +#ifdef TARGET_AMD64 +struct CALL_DESCR_CONTEXT +{ + UIntNative Rbp; + UIntNative Rsi; + UIntNative Rbx; + UIntNative IP; +}; +#elif defined(TARGET_ARM) +struct CALL_DESCR_CONTEXT +{ + UIntNative R4; + UIntNative R5; + UIntNative R7; + UIntNative IP; +}; +#elif defined(TARGET_ARM64) +struct CALL_DESCR_CONTEXT +{ + UIntNative FP; + UIntNative IP; + UIntNative X19; + UIntNative X20; +}; +#elif defined(TARGET_X86) +struct CALL_DESCR_CONTEXT +{ + UIntNative Rbx; + UIntNative Rbp; + UIntNative IP; +}; +#elif defined (TARGET_WASM) +struct CALL_DESCR_CONTEXT +{ + UIntNative IP; +}; +#else +#error NYI - For this arch +#endif + +typedef DPTR(CALL_DESCR_CONTEXT) PTR_CALL_DESCR_CONTEXT; + +void StackFrameIterator::UnwindCallDescrThunk() +{ + ASSERT((m_dwFlags & MethodStateCalculated) == 0); + +#if defined(USE_PORTABLE_HELPERS) // @TODO: CORERT: Corresponding helper code is only defined in assembly code + return; +#else // defined(USE_PORTABLE_HELPERS) + ASSERT(CategorizeUnadjustedReturnAddress(m_ControlPC) == InCallDescrThunk); + + UIntNative newSP; +#ifdef TARGET_AMD64 + // RBP points to the SP that we want to capture. (This arrangement allows for + // the arguments from this function to be loaded into memory with an adjustment + // to SP, like an alloca + newSP = *(PTR_UIntNative)m_RegDisplay.pRbp; + + PTR_CALL_DESCR_CONTEXT pContext = (PTR_CALL_DESCR_CONTEXT)newSP; + + m_RegDisplay.pRbp = PTR_TO_MEMBER(CALL_DESCR_CONTEXT, pContext, Rbp); + m_RegDisplay.pRsi = PTR_TO_MEMBER(CALL_DESCR_CONTEXT, pContext, Rsi); + m_RegDisplay.pRbx = PTR_TO_MEMBER(CALL_DESCR_CONTEXT, pContext, Rbx); + + // And adjust SP to be the state that it should be in just after returning from + // the CallDescrFunction + newSP += sizeof(CALL_DESCR_CONTEXT); +#elif defined(TARGET_ARM) + // R7 points to the SP that we want to capture. (This arrangement allows for + // the arguments from this function to be loaded into memory with an adjustment + // to SP, like an alloca + newSP = *(PTR_UIntNative)m_RegDisplay.pR7; + PTR_CALL_DESCR_CONTEXT pContext = (PTR_CALL_DESCR_CONTEXT)newSP; + + m_RegDisplay.pR4 = PTR_TO_MEMBER(CALL_DESCR_CONTEXT, pContext, R4); + m_RegDisplay.pR5 = PTR_TO_MEMBER(CALL_DESCR_CONTEXT, pContext, R5); + m_RegDisplay.pR7 = PTR_TO_MEMBER(CALL_DESCR_CONTEXT, pContext, R7); + + // And adjust SP to be the state that it should be in just after returning from + // the CallDescrFunction + newSP += sizeof(CALL_DESCR_CONTEXT); + +#elif defined(TARGET_ARM64) + // pFP points to the SP that we want to capture. (This arrangement allows for + // the arguments from this function to be loaded into memory with an adjustment + // to SP, like an alloca + newSP = *(PTR_UIntNative)m_RegDisplay.pFP; + PTR_CALL_DESCR_CONTEXT pContext = (PTR_CALL_DESCR_CONTEXT)newSP; + + m_RegDisplay.pX19 = PTR_TO_MEMBER(CALL_DESCR_CONTEXT, pContext, X19); + m_RegDisplay.pX20 = PTR_TO_MEMBER(CALL_DESCR_CONTEXT, pContext, X20); + + // And adjust SP to be the state that it should be in just after returning from + // the CallDescrFunction + newSP += sizeof(CALL_DESCR_CONTEXT); + +#elif defined(TARGET_X86) + // RBP points to the SP that we want to capture. (This arrangement allows for + // the arguments from this function to be loaded into memory with an adjustment + // to SP, like an alloca + newSP = *(PTR_UIntNative)m_RegDisplay.pRbp; + + PTR_CALL_DESCR_CONTEXT pContext = (PTR_CALL_DESCR_CONTEXT)(newSP - offsetof(CALL_DESCR_CONTEXT, Rbp)); + + m_RegDisplay.pRbp = PTR_TO_MEMBER(CALL_DESCR_CONTEXT, pContext, Rbp); + m_RegDisplay.pRbx = PTR_TO_MEMBER(CALL_DESCR_CONTEXT, pContext, Rbx); + + // And adjust SP to be the state that it should be in just after returning from + // the CallDescrFunction + newSP += sizeof(CALL_DESCR_CONTEXT) - offsetof(CALL_DESCR_CONTEXT, Rbp); + +#else + PORTABILITY_ASSERT("UnwindCallDescrThunk"); + PTR_CALL_DESCR_CONTEXT pContext = NULL; +#endif + + m_RegDisplay.SetAddrOfIP(PTR_TO_MEMBER(CALL_DESCR_CONTEXT, pContext, IP)); + m_RegDisplay.SetIP(pContext->IP); + m_RegDisplay.SetSP(newSP); + SetControlPC(dac_cast(pContext->IP)); + +#endif // defined(USE_PORTABLE_HELPERS) +} + +void StackFrameIterator::UnwindThrowSiteThunk() +{ + ASSERT((m_dwFlags & MethodStateCalculated) == 0); + +#if defined(USE_PORTABLE_HELPERS) // @TODO: CORERT: no portable version of throw helpers + return; +#else // defined(USE_PORTABLE_HELPERS) + ASSERT(CategorizeUnadjustedReturnAddress(m_ControlPC) == InThrowSiteThunk); + + const UIntNative STACKSIZEOF_ExInfo = ((sizeof(ExInfo) + (STACK_ALIGN_SIZE-1)) & ~(STACK_ALIGN_SIZE-1)); +#if defined(TARGET_AMD64) && !defined(UNIX_AMD64_ABI) + const UIntNative SIZEOF_OutgoingScratch = 0x20; +#else + const UIntNative SIZEOF_OutgoingScratch = 0; +#endif + + PTR_PAL_LIMITED_CONTEXT pContext = (PTR_PAL_LIMITED_CONTEXT) + (m_RegDisplay.SP + SIZEOF_OutgoingScratch + STACKSIZEOF_ExInfo); + +#if defined(UNIX_AMD64_ABI) + m_RegDisplay.pRbp = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, Rbp); + m_RegDisplay.pRbx = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, Rbx); + m_RegDisplay.pR12 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, R12); + m_RegDisplay.pR13 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, R13); + m_RegDisplay.pR14 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, R14); + m_RegDisplay.pR15 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, R15); +#elif defined(TARGET_AMD64) + m_RegDisplay.pRbp = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, Rbp); + m_RegDisplay.pRdi = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, Rdi); + m_RegDisplay.pRsi = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, Rsi); + m_RegDisplay.pRbx = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, Rbx); + m_RegDisplay.pR12 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, R12); + m_RegDisplay.pR13 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, R13); + m_RegDisplay.pR14 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, R14); + m_RegDisplay.pR15 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, R15); +#elif defined(TARGET_ARM) + m_RegDisplay.pR4 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, R4); + m_RegDisplay.pR5 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, R5); + m_RegDisplay.pR6 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, R6); + m_RegDisplay.pR7 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, R7); + m_RegDisplay.pR8 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, R8); + m_RegDisplay.pR9 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, R9); + m_RegDisplay.pR10 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, R10); + m_RegDisplay.pR11 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, R11); +#elif defined(TARGET_ARM64) + m_RegDisplay.pX19 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, X19); + m_RegDisplay.pX20 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, X20); + m_RegDisplay.pX21 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, X21); + m_RegDisplay.pX22 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, X22); + m_RegDisplay.pX23 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, X23); + m_RegDisplay.pX24 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, X24); + m_RegDisplay.pX25 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, X25); + m_RegDisplay.pX26 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, X26); + m_RegDisplay.pX27 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, X27); + m_RegDisplay.pX28 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, X28); + m_RegDisplay.pFP = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, FP); +#elif defined(TARGET_X86) + m_RegDisplay.pRbp = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, Rbp); + m_RegDisplay.pRdi = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, Rdi); + m_RegDisplay.pRsi = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, Rsi); + m_RegDisplay.pRbx = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, Rbx); +#else + ASSERT_UNCONDITIONALLY("NYI for this arch"); +#endif + + m_RegDisplay.SetAddrOfIP(PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, IP)); + m_RegDisplay.SetIP(pContext->IP); + m_RegDisplay.SetSP(pContext->GetSp()); + SetControlPC(dac_cast(pContext->IP)); + + // We expect the throw site to be in managed code, and since this function's notion of how to unwind + // through the stub is brittle relative to the stub itself, we want to check as soon as we can. + ASSERT(m_pInstance->FindCodeManagerByAddress(m_ControlPC) && "unwind from throw site stub failed"); +#endif // defined(USE_PORTABLE_HELPERS) +} + +bool StackFrameIterator::IsValid() +{ + return (m_ControlPC != 0); +} + +void StackFrameIterator::Next() +{ + NextInternal(); + STRESS_LOG1(LF_STACKWALK, LL_INFO10000, " %p\n", m_ControlPC); +} + +void StackFrameIterator::NextInternal() +{ +UnwindOutOfCurrentManagedFrame: + ASSERT(m_dwFlags & MethodStateCalculated); + m_dwFlags &= ~(ExCollide|MethodStateCalculated|UnwoundReversePInvoke); + ASSERT(IsValid()); + + m_pHijackedReturnValue = NULL; + m_HijackedReturnValueKind = GCRK_Unknown; + +#ifdef _DEBUG + SetControlPC(dac_cast((void*)666)); +#endif // _DEBUG + + // Clear any preceding published conservative range. The current unwind will compute a new range + // from scratch if one is needed. + m_pConservativeStackRangeLowerBound = NULL; + m_pConservativeStackRangeUpperBound = NULL; + +#if defined(_DEBUG) && !defined(DACCESS_COMPILE) + UIntNative DEBUG_preUnwindSP = m_RegDisplay.GetSP(); +#endif + + PTR_VOID pPreviousTransitionFrame; + FAILFAST_OR_DAC_FAIL(GetCodeManager()->UnwindStackFrame(&m_methodInfo, &m_RegDisplay, &pPreviousTransitionFrame)); + + bool doingFuncletUnwind = GetCodeManager()->IsFunclet(&m_methodInfo); + + if (pPreviousTransitionFrame != NULL) + { + ASSERT(!doingFuncletUnwind); + + if (pPreviousTransitionFrame == TOP_OF_STACK_MARKER) + { + SetControlPC(0); + } + else + { + // NOTE: If this is an EH stack walk, then reinitializing the iterator using the GC stack + // walk flags is incorrect. That said, this is OK because the exception dispatcher will + // immediately trigger a failfast when it sees the UnwoundReversePInvoke flag. + // NOTE: This can generate a conservative stack range if the recovered PInvoke callsite + // resides in an assembly thunk and not in normal managed code. In this case InternalInit + // will unwind through the thunk and back to the nearest managed frame, and therefore may + // see a conservative range reported by one of the thunks encountered during this "nested" + // unwind. + InternalInit(m_pThread, GetPInvokeTransitionFrame(pPreviousTransitionFrame), GcStackWalkFlags); + ASSERT(m_pInstance->FindCodeManagerByAddress(m_ControlPC)); + } + m_dwFlags |= UnwoundReversePInvoke; + } + else + { + // if the thread is safe to walk, it better not have a hijack in place. + ASSERT((ThreadStore::GetCurrentThread() == m_pThread) || !m_pThread->DangerousCrossThreadIsHijacked()); + + SetControlPC(dac_cast(*(m_RegDisplay.GetAddrOfIP()))); + + PTR_VOID collapsingTargetFrame = NULL; + + // Starting from the unwound return address, unwind further (if needed) until reaching + // either the next managed frame (i.e., the next frame that should be yielded from the + // stack frame iterator) or a collision point that requires complex handling. + + bool exCollide = false; + ReturnAddressCategory category = CategorizeUnadjustedReturnAddress(m_ControlPC); + + if (doingFuncletUnwind) + { + ASSERT(m_pendingFuncletFramePointer == NULL); + ASSERT(m_FramePointer != NULL); + + if (category == InFuncletInvokeThunk) + { + // The iterator is unwinding out of an exceptionally invoked funclet. Before proceeding, + // record the funclet frame pointer so that the iterator can verify that the remainder of + // the stack walk encounters "owner frames" (i.e., parent funclets or the main code body) + // in the expected order. + // NOTE: m_pendingFuncletFramePointer will be cleared by HandleExCollide the stack walk + // collides with the ExInfo that invoked this funclet. + m_pendingFuncletFramePointer = m_FramePointer; + + // Unwind through the funclet invoke assembly thunk to reach the topmost managed frame in + // the exception dispatch code. All non-GC stack walks collide at this point (whereas GC + // stack walks collide at the throw site which is reached after processing all of the + // exception dispatch frames). + UnwindFuncletInvokeThunk(); + if (!(m_dwFlags & CollapseFunclets)) + { + exCollide = true; + } + } + else if (category == InManagedCode) + { + // Non-exceptionally invoked funclet case. The caller is processed as a normal managed + // frame, with the caveat that funclet collapsing must be applied in GC stack walks (since + // the caller is either a parent funclet or the main code body and the leafmost funclet + // already provided GC information for the entire function). + if (m_dwFlags & CollapseFunclets) + { + collapsingTargetFrame = m_FramePointer; + } + } + else + { + FAILFAST_OR_DAC_FAIL_UNCONDITIONALLY("Unexpected thunk encountered when unwinding out of a funclet."); + } + } + else if (category != InManagedCode) + { + // Unwinding the current (non-funclet) managed frame revealed that its caller is one of the + // well-known assembly thunks. Unwind through the thunk to find the next managed frame + // that should be yielded from the stack frame iterator. + // NOTE: It is generally possible for a sequence of multiple thunks to appear "on top of + // each other" on the stack (e.g., the CallDescrThunk can be used to invoke the + // UniversalTransitionThunk), but EH thunks can never appear in such sequences. + + if (IsNonEHThunk(category)) + { + // Unwind the current sequence of one or more thunks until the next managed frame is reached. + // NOTE: This can generate a conservative stack range if one or more of the thunks in the + // sequence report a conservative lower bound. + UnwindNonEHThunkSequence(); + } + else if (category == InThrowSiteThunk) + { + // EH stack walks collide at the funclet invoke thunk and are never expected to encounter + // throw sites (except in illegal cases such as exceptions escaping from the managed + // exception dispatch code itself). + FAILFAST_OR_DAC_FAIL_MSG(!(m_dwFlags & ApplyReturnAddressAdjustment), + "EH stack walk is attempting to propagate an exception across a throw site."); + + UnwindThrowSiteThunk(); + + if (m_dwFlags & CollapseFunclets) + { + UIntNative postUnwindSP = m_RegDisplay.SP; + + if (m_pNextExInfo && (postUnwindSP > ((UIntNative)dac_cast(m_pNextExInfo)))) + { + // This GC stack walk has processed all managed exception frames associated with the + // current throw site, meaning it has now collided with the associated ExInfo. + exCollide = true; + } + } + } + else + { + FAILFAST_OR_DAC_FAIL_UNCONDITIONALLY("Unexpected thunk encountered when unwinding out of a non-funclet."); + } + } + + if (exCollide) + { + // OK, so we just hit (collided with) an exception throw point. We continue by consulting the + // ExInfo. + + // In the GC stackwalk, this means walking all the way off the end of the managed exception + // dispatch code to the throw site. In the EH stackwalk, this means hitting the special funclet + // invoke ASM thunks. + + // Double-check that the ExInfo that is being consulted is at or below the 'current' stack pointer + ASSERT(DEBUG_preUnwindSP <= (UIntNative)m_pNextExInfo); + + ASSERT(collapsingTargetFrame == NULL); + + collapsingTargetFrame = HandleExCollide(m_pNextExInfo); + } + + // Now that all assembly thunks and ExInfo collisions have been processed, it is guaranteed + // that the next managed frame has been located. The located frame must now be yielded + // from the iterator with the one and only exception being cases where a managed frame must + // be skipped due to funclet collapsing. + + ASSERT(m_pInstance->FindCodeManagerByAddress(m_ControlPC)); + + if (collapsingTargetFrame != NULL) + { + // The iterator is positioned on a parent funclet or main code body in a function where GC + // information has already been reported by the leafmost funclet, implying that the current + // frame needs to be skipped by the GC stack walk. In general, the GC stack walk must skip + // all parent frames that are "part of the same function" (i.e., have the same frame + // pointer). + ASSERT(m_dwFlags & CollapseFunclets); + CalculateCurrentMethodState(); + ASSERT(IsValid()); + FAILFAST_OR_DAC_FAIL(m_FramePointer == collapsingTargetFrame); + + // Fail if the skipped frame has no associated conservative stack range (since any + // attached stack range is about to be dropped without ever being reported to the GC). + // This should never happen since funclet collapsing cases and only triggered when + // unwinding out of managed frames and never when unwinding out of the thunks that report + // conservative ranges. + FAILFAST_OR_DAC_FAIL(m_pConservativeStackRangeLowerBound == NULL); + + STRESS_LOG0(LF_STACKWALK, LL_INFO10000, "[ KeepUnwinding ]\n"); + goto UnwindOutOfCurrentManagedFrame; + } + + // Before yielding this frame, indicate that it was located via an ExInfo collision as + // opposed to normal unwind. + if (exCollide) + m_dwFlags |= ExCollide; + } + + // At this point, the iterator is in an invalid state if there are no more managed frames + // on the current stack, and is otherwise positioned on the next managed frame to yield to + // the caller. + PrepareToYieldFrame(); +} + +// NOTE: This function will publish a non-NULL conservative stack range lower bound if and +// only if one or more of the thunks in the sequence report conservative stack ranges. +void StackFrameIterator::UnwindNonEHThunkSequence() +{ + ReturnAddressCategory category = CategorizeUnadjustedReturnAddress(m_ControlPC); + ASSERT(IsNonEHThunk(category)); + + // Unwind the current sequence of thunks until the next managed frame is reached, being + // careful to detect and aggregate any conservative stack ranges reported by the thunks. + PTR_UIntNative pLowestLowerBound = NULL; + PTR_UIntNative pPrecedingLowerBound = NULL; + while (category != InManagedCode) + { + ASSERT(m_pConservativeStackRangeLowerBound == NULL); + + if (category == InCallDescrThunk) + { + UnwindCallDescrThunk(); + } + else if (category == InUniversalTransitionThunk) + { + UnwindUniversalTransitionThunk(); + ASSERT(m_pConservativeStackRangeLowerBound != NULL); + } + else + { + FAILFAST_OR_DAC_FAIL_UNCONDITIONALLY("Unexpected thunk encountered when unwinding a non-EH thunk sequence."); + } + + if (m_pConservativeStackRangeLowerBound != NULL) + { + // The newly unwound thunk reported a conservative stack range lower bound. The thunk + // sequence being unwound needs to generate a single conservative range that will be + // reported along with the managed frame eventually yielded by the iterator. To ensure + // sufficient reporting, this range always extends from the first (i.e., lowest) lower + // bound all the way to the top of the outgoing arguments area in the next managed frame. + // This aggregate range therefore covers all intervening thunk frames (if any), and also + // covers all necessary conservative ranges in the pathological case where a sequence of + // thunks contains multiple frames which report distinct conservative lower bound values. + // + // Capture the initial lower bound, and assert that the lower bound values are compatible + // with the "aggregate range" approach described above (i.e., that they never exceed the + // unwound thunk's stack frame and are always larger than all previously encountered lower + // bound values). + + if (pLowestLowerBound == NULL) + pLowestLowerBound = m_pConservativeStackRangeLowerBound; + + FAILFAST_OR_DAC_FAIL(m_pConservativeStackRangeLowerBound < (PTR_UIntNative)m_RegDisplay.SP); + FAILFAST_OR_DAC_FAIL(m_pConservativeStackRangeLowerBound > pPrecedingLowerBound); + pPrecedingLowerBound = m_pConservativeStackRangeLowerBound; + m_pConservativeStackRangeLowerBound = NULL; + } + + category = CategorizeUnadjustedReturnAddress(m_ControlPC); + } + + // The iterator has reached the next managed frame. Publish the computed lower bound value. + ASSERT(m_pConservativeStackRangeLowerBound == NULL); + m_pConservativeStackRangeLowerBound = pLowestLowerBound; +} + +// This function is called immediately before a given frame is yielded from the iterator +// (i.e., before a given frame is exposed outside of the iterator). At yield points, +// iterator must either be invalid (indicating that all managed frames have been processed) +// or must describe a valid managed frame. In the latter case, some common postprocessing +// steps must always be applied before the frame is exposed outside of the iterator. +void StackFrameIterator::PrepareToYieldFrame() +{ + if (!IsValid()) + return; + + ASSERT(m_pInstance->FindCodeManagerByAddress(m_ControlPC)); + + bool atDebuggerHijackSite = (this->m_ControlPC == (PTR_VOID)(TADDR)DebugFuncEval::GetMostRecentFuncEvalHijackInstructionPointer()); + + if (atDebuggerHijackSite) + { + FAILFAST_OR_DAC_FAIL_MSG(m_pConservativeStackRangeLowerBound != NULL, + "Debugger hijack unwind is missing the required conservative range from a preceding transition thunk."); + } + + if (m_dwFlags & ApplyReturnAddressAdjustment) + { + FAILFAST_OR_DAC_FAIL_MSG(!atDebuggerHijackSite, + "EH stack walk is attempting to propagate an exception across a debugger hijack site."); + + m_ControlPC = AdjustReturnAddressBackward(m_ControlPC); + } + + m_ShouldSkipRegularGcReporting = false; + + // Each time a managed frame is yielded, configure the iterator to explicitly indicate + // whether or not unwinding to the current frame has revealed a stack range that must be + // conservatively reported by the GC. + if ((m_pConservativeStackRangeLowerBound != NULL) && (m_dwFlags & CollapseFunclets)) + { + // Conservatively reported stack ranges always correspond to the full extent of the + // argument set (including stack-passed arguments and spilled argument registers) that + // flowed into a managed callsite which called into the runtime. The runtime has no + // knowledge of the callsite signature in these cases, and unwind through these callsites + // is only possible via the associated assembly thunk (e.g., the ManagedCalloutThunk or + // UniversalTransitionThunk). + // + // The iterator is currently positioned on the managed frame which contains the callsite of + // interest. The lower bound of the argument set was already computed while unwinding + // through the assembly thunk. The upper bound of the argument set is always at or below + // the top of the outgoing arguments area in the current managed frame (i.e., in the + // managed frame which contains the callsite). + // + // Compute a conservative upper bound and then publish the total range so that it can be + // observed by the current GC stack walk (via HasStackRangeToReportConservatively). Note + // that the upper bound computation never mutates m_RegDisplay. + CalculateCurrentMethodState(); + ASSERT(IsValid()); + + if (!atDebuggerHijackSite) + { + UIntNative rawUpperBound = GetCodeManager()->GetConservativeUpperBoundForOutgoingArgs(&m_methodInfo, &m_RegDisplay); + m_pConservativeStackRangeUpperBound = (PTR_UIntNative)rawUpperBound; + } + else + { + // Debugger hijack points differ from all other unwind cases in that they are not + // guaranteed to be GC safe points, which implies that regular GC reporting will not + // protect the GC references that the function was using (in registers and/or in local + // stack slots) at the time of the hijack. + // + // GC references held in registers at the time of the hijack are reported by the + // debugger and therefore do not need to be handled here. (The debugger does this by + // conservatively reporting the entire CONTEXT record which lists the full register + // set that was observed when the thread was stopped at the hijack point.) + // + // This code is therefore only responsible for reporting the GC references that were + // stored on the stack at the time of the hijack. Conceptually, this is done by + // conservatively reporting the entire stack frame. Since debugger hijack unwind + // always occurs via a UniversalTransitionThunk, the conservative lower bound + // published by the thunk can be used as a workable lower bound for the entire stack + // frame. + // + // Computing a workable upper bound is more difficult, especially because the stack + // frame of a funclet can contain FP-relative locals which reside arbitrarily far up + // the stack compared to the current SP. The top of the thread's stack is currently + // used as an extremely conservative upper bound as away to cover all cases without + // introducing more stack walker complexity. + + PTR_VOID pStackLow; + PTR_VOID pStackHigh; +#ifndef DACCESS_COMPILE + m_pThread->GetStackBounds(&pStackLow, &pStackHigh); +#endif + m_pConservativeStackRangeUpperBound = (PTR_UIntNative)pStackHigh; + m_ShouldSkipRegularGcReporting = true; + } + + ASSERT(m_pConservativeStackRangeLowerBound != NULL); + ASSERT(m_pConservativeStackRangeUpperBound != NULL); + ASSERT(m_pConservativeStackRangeUpperBound > m_pConservativeStackRangeLowerBound); + } + else + { + m_pConservativeStackRangeLowerBound = NULL; + m_pConservativeStackRangeUpperBound = NULL; + } +} + +REGDISPLAY * StackFrameIterator::GetRegisterSet() +{ + ASSERT(IsValid()); + return &m_RegDisplay; +} + +PTR_VOID StackFrameIterator::GetEffectiveSafePointAddress() +{ + ASSERT(IsValid()); + return m_effectiveSafePointAddress; +} + +PTR_ICodeManager StackFrameIterator::GetCodeManager() +{ + ASSERT(IsValid()); + return m_pCodeManager; +} + +MethodInfo * StackFrameIterator::GetMethodInfo() +{ + ASSERT(IsValid()); + return &m_methodInfo; +} + +#ifdef DACCESS_COMPILE +#define FAILFAST_OR_DAC_RETURN_FALSE(x) if(!(x)) return false; +#else +#define FAILFAST_OR_DAC_RETURN_FALSE(x) if(!(x)) { ASSERT_UNCONDITIONALLY(#x); RhFailFast(); } +#endif + +void StackFrameIterator::CalculateCurrentMethodState() +{ + if (m_dwFlags & MethodStateCalculated) + return; + + // Assume that the caller is likely to be in the same module + if (m_pCodeManager == NULL || !m_pCodeManager->FindMethodInfo(m_ControlPC, &m_methodInfo)) + { + m_pCodeManager = dac_cast(m_pInstance->FindCodeManagerByAddress(m_ControlPC)); + FAILFAST_OR_DAC_FAIL(m_pCodeManager); + + FAILFAST_OR_DAC_FAIL(m_pCodeManager->FindMethodInfo(m_ControlPC, &m_methodInfo)); + } + + m_effectiveSafePointAddress = m_ControlPC; + m_FramePointer = GetCodeManager()->GetFramePointer(&m_methodInfo, &m_RegDisplay); + + m_dwFlags |= MethodStateCalculated; +} + +bool StackFrameIterator::GetHijackedReturnValueLocation(PTR_RtuObjectRef * pLocation, GCRefKind * pKind) +{ + if (GCRK_Unknown == m_HijackedReturnValueKind) + return false; + + ASSERT((GCRK_Scalar < m_HijackedReturnValueKind) && (m_HijackedReturnValueKind <= GCRK_LastValid)); + + *pLocation = m_pHijackedReturnValue; + *pKind = m_HijackedReturnValueKind; + return true; +} + +void StackFrameIterator::SetControlPC(PTR_VOID controlPC) +{ + m_OriginalControlPC = m_ControlPC = controlPC; +} + +bool StackFrameIterator::IsNonEHThunk(ReturnAddressCategory category) +{ + switch (category) + { + default: + return false; + case InUniversalTransitionThunk: + case InCallDescrThunk: + return true; + } +} + +bool StackFrameIterator::IsValidReturnAddress(PTR_VOID pvAddress) +{ + // These are return addresses into functions that call into managed (non-funclet) code, so we might see + // them as hijacked return addresses. + ReturnAddressCategory category = CategorizeUnadjustedReturnAddress(pvAddress); + + // All non-EH thunks call out to normal managed code, implying that return addresses into + // them can be hijacked. + if (IsNonEHThunk(category)) + return true; + + // Throw site thunks call out to managed code, but control never returns from the managed + // callee. As a result, return addresses into these thunks can be hijacked, but the + // hijacks will never execute. + if (category == InThrowSiteThunk) + return true; + + return (NULL != GetRuntimeInstance()->FindCodeManagerByAddress(pvAddress)); +} + +// Support for conservatively reporting GC references in a stack range. This is used when managed methods with +// an unknown signature potentially including GC references call into the runtime and we need to let a GC +// proceed (typically because we call out into managed code again). Instead of storing signature metadata for +// every possible managed method that might make such a call we identify a small range of the stack that might +// contain outgoing arguments. We then report every pointer that looks like it might refer to the GC heap as a +// fixed interior reference. + +bool StackFrameIterator::HasStackRangeToReportConservatively() +{ + // When there's no range to report both the lower and upper bounds will be NULL. + return IsValid() && (m_pConservativeStackRangeUpperBound != NULL); +} + +void StackFrameIterator::GetStackRangeToReportConservatively(PTR_RtuObjectRef * ppLowerBound, PTR_RtuObjectRef * ppUpperBound) +{ + ASSERT(HasStackRangeToReportConservatively()); + *ppLowerBound = (PTR_RtuObjectRef)m_pConservativeStackRangeLowerBound; + *ppUpperBound = (PTR_RtuObjectRef)m_pConservativeStackRangeUpperBound; +} + +PTR_VOID StackFrameIterator::AdjustReturnAddressBackward(PTR_VOID controlPC) +{ +#ifdef TARGET_ARM + return (PTR_VOID)(((PTR_UInt8)controlPC) - 2); +#elif defined(TARGET_ARM64) + return (PTR_VOID)(((PTR_UInt8)controlPC) - 4); +#else + return (PTR_VOID)(((PTR_UInt8)controlPC) - 1); +#endif +} + +// Given a return address, determine the category of function where it resides. In +// general, return addresses encountered by the stack walker are required to reside in +// managed code unless they reside in one of the well-known assembly thunks. + +// static +StackFrameIterator::ReturnAddressCategory StackFrameIterator::CategorizeUnadjustedReturnAddress(PTR_VOID returnAddress) +{ +#if defined(USE_PORTABLE_HELPERS) // @TODO: CORERT: no portable thunks are defined + + return InManagedCode; + +#else // defined(USE_PORTABLE_HELPERS) + +#if defined(FEATURE_DYNAMIC_CODE) + if (EQUALS_RETURN_ADDRESS(returnAddress, ReturnFromCallDescrThunk)) + { + return InCallDescrThunk; + } + else if (EQUALS_RETURN_ADDRESS(returnAddress, ReturnFromUniversalTransition) || + EQUALS_RETURN_ADDRESS(returnAddress, ReturnFromUniversalTransition_DebugStepTailCall)) + { + return InUniversalTransitionThunk; + } +#endif + + if (EQUALS_RETURN_ADDRESS(returnAddress, RhpThrowEx2) || + EQUALS_RETURN_ADDRESS(returnAddress, RhpThrowHwEx2) || + EQUALS_RETURN_ADDRESS(returnAddress, RhpRethrow2)) + { + return InThrowSiteThunk; + } + + if ( +#ifdef TARGET_X86 + EQUALS_RETURN_ADDRESS(returnAddress, RhpCallFunclet2) +#else + EQUALS_RETURN_ADDRESS(returnAddress, RhpCallCatchFunclet2) || + EQUALS_RETURN_ADDRESS(returnAddress, RhpCallFinallyFunclet2) || + EQUALS_RETURN_ADDRESS(returnAddress, RhpCallFilterFunclet2) +#endif + ) + { + return InFuncletInvokeThunk; + } + + return InManagedCode; +#endif // defined(USE_PORTABLE_HELPERS) +} + +bool StackFrameIterator::ShouldSkipRegularGcReporting() +{ + return m_ShouldSkipRegularGcReporting; +} + +#ifndef DACCESS_COMPILE + +COOP_PINVOKE_HELPER(Boolean, RhpSfiInit, (StackFrameIterator* pThis, PAL_LIMITED_CONTEXT* pStackwalkCtx, Boolean instructionFault)) +{ + Thread * pCurThread = ThreadStore::GetCurrentThread(); + + // The stackwalker is intolerant to hijacked threads, as it is largely expecting to be called from C++ + // where the hijack state of the thread is invariant. Because we've exposed the iterator out to C#, we + // need to unhijack every time we callback into C++ because the thread could have been hijacked during our + // time executing C#. + pCurThread->Unhijack(); + + // Passing NULL is a special-case to request a standard managed stack trace for the current thread. + if (pStackwalkCtx == NULL) + pThis->InternalInitForStackTrace(); + else + pThis->InternalInitForEH(pCurThread, pStackwalkCtx, instructionFault); + + bool isValid = pThis->IsValid(); + if (isValid) + pThis->CalculateCurrentMethodState(); + return isValid ? Boolean_true : Boolean_false; +} + +COOP_PINVOKE_HELPER(Boolean, RhpSfiNext, (StackFrameIterator* pThis, UInt32* puExCollideClauseIdx, Boolean* pfUnwoundReversePInvoke)) +{ + // The stackwalker is intolerant to hijacked threads, as it is largely expecting to be called from C++ + // where the hijack state of the thread is invariant. Because we've exposed the iterator out to C#, we + // need to unhijack every time we callback into C++ because the thread could have been hijacked during our + // time executing C#. + ThreadStore::GetCurrentThread()->Unhijack(); + + const UInt32 MaxTryRegionIdx = 0xFFFFFFFF; + + ExInfo * pCurExInfo = pThis->m_pNextExInfo; + pThis->Next(); + bool isValid = pThis->IsValid(); + if (isValid) + pThis->CalculateCurrentMethodState(); + + if (pThis->m_dwFlags & StackFrameIterator::ExCollide) + { + ASSERT(pCurExInfo->m_idxCurClause != MaxTryRegionIdx); + *puExCollideClauseIdx = pCurExInfo->m_idxCurClause; + pCurExInfo->m_kind = (ExKind)(pCurExInfo->m_kind | EK_SupersededFlag); + } + else + { + *puExCollideClauseIdx = MaxTryRegionIdx; + } + + *pfUnwoundReversePInvoke = (pThis->m_dwFlags & StackFrameIterator::UnwoundReversePInvoke) + ? Boolean_true + : Boolean_false; + return isValid; +} + +#endif // !DACCESS_COMPILE diff --git a/src/coreclr/src/nativeaot/Runtime/StackFrameIterator.h b/src/coreclr/src/nativeaot/Runtime/StackFrameIterator.h new file mode 100644 index 0000000000000..fe28f53f3ff89 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/StackFrameIterator.h @@ -0,0 +1,211 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +#include "ICodeManager.h" + +struct ExInfo; +typedef DPTR(ExInfo) PTR_ExInfo; +typedef VPTR(ICodeManager) PTR_ICodeManager; + +enum ExKind : UInt8 +{ + EK_HardwareFault = 2, + EK_SupersededFlag = 8, +}; + +struct EHEnum +{ + ICodeManager * m_pCodeManager; + EHEnumState m_state; +}; + +EXTERN_C Boolean FASTCALL RhpSfiInit(StackFrameIterator* pThis, PAL_LIMITED_CONTEXT* pStackwalkCtx, Boolean instructionFault); +EXTERN_C Boolean FASTCALL RhpSfiNext(StackFrameIterator* pThis, UInt32* puExCollideClauseIdx, Boolean* pfUnwoundReversePInvoke); + +struct PInvokeTransitionFrame; +typedef DPTR(PInvokeTransitionFrame) PTR_PInvokeTransitionFrame; +typedef DPTR(PAL_LIMITED_CONTEXT) PTR_PAL_LIMITED_CONTEXT; + +class StackFrameIterator +{ + friend class AsmOffsets; + friend Boolean FASTCALL RhpSfiInit(StackFrameIterator* pThis, PAL_LIMITED_CONTEXT* pStackwalkCtx, Boolean instructionFault); + friend Boolean FASTCALL RhpSfiNext(StackFrameIterator* pThis, UInt32* puExCollideClauseIdx, Boolean* pfUnwoundReversePInvoke); + +public: + StackFrameIterator() {} + StackFrameIterator(Thread * pThreadToWalk, PTR_VOID pInitialTransitionFrame); + StackFrameIterator(Thread * pThreadToWalk, PTR_PAL_LIMITED_CONTEXT pCtx); + + + bool IsValid(); + void CalculateCurrentMethodState(); + void Next(); + PTR_VOID GetEffectiveSafePointAddress(); + REGDISPLAY * GetRegisterSet(); + PTR_ICodeManager GetCodeManager(); + MethodInfo * GetMethodInfo(); + bool GetHijackedReturnValueLocation(PTR_RtuObjectRef * pLocation, GCRefKind * pKind); + void SetControlPC(PTR_VOID controlPC); + + static bool IsValidReturnAddress(PTR_VOID pvAddress); + + // Support for conservatively reporting GC references in a stack range. This is used when managed methods + // with an unknown signature potentially including GC references call into the runtime and we need to let + // a GC proceed (typically because we call out into managed code again). Instead of storing signature + // metadata for every possible managed method that might make such a call we identify a small range of the + // stack that might contain outgoing arguments. We then report every pointer that looks like it might + // refer to the GC heap as a fixed interior reference. + bool HasStackRangeToReportConservatively(); + void GetStackRangeToReportConservatively(PTR_RtuObjectRef * ppLowerBound, PTR_RtuObjectRef * ppUpperBound); + + // Debugger Hijacked frame looks very much like a usual managed frame except when the + // frame must be reported conservatively, and when that happens, regular GC reporting should be skipped + bool ShouldSkipRegularGcReporting(); + +private: + // The invoke of a funclet is a bit special and requires an assembly thunk, but we don't want to break the + // stackwalk due to this. So this routine will unwind through the assembly thunks used to invoke funclets. + // It's also used to disambiguate exceptionally- and non-exceptionally-invoked funclets. + void UnwindFuncletInvokeThunk(); + void UnwindThrowSiteThunk(); + + // If our control PC indicates that we're in the universal transition thunk that we use to generically + // dispatch arbitrary managed calls, then handle the stack walk specially. + // NOTE: This function always publishes a non-NULL conservative stack range lower bound. + void UnwindUniversalTransitionThunk(); + + // If our control PC indicates that we're in the call descr thunk that we use to call an arbitrary managed + // function with an arbitrary signature from a normal managed function handle the stack walk specially. + void UnwindCallDescrThunk(); + + void EnterInitialInvalidState(Thread * pThreadToWalk); + + void InternalInit(Thread * pThreadToWalk, PTR_PInvokeTransitionFrame pFrame, UInt32 dwFlags); // GC stackwalk + void InternalInit(Thread * pThreadToWalk, PTR_PAL_LIMITED_CONTEXT pCtx, UInt32 dwFlags); // EH and hijack stackwalk, and collided unwind + void InternalInitForEH(Thread * pThreadToWalk, PAL_LIMITED_CONTEXT * pCtx, bool instructionFault); // EH stackwalk + void InternalInitForStackTrace(); // Environment.StackTrace + + PTR_VOID HandleExCollide(PTR_ExInfo pExInfo); + void NextInternal(); + + // This will walk m_pNextExInfo from its current value until it finds the next ExInfo at a higher address + // than the SP reference value passed in. This is useful when 'restarting' the stackwalk from a + // particular PInvokeTransitionFrame or after we have a 'collided unwind' that may skip over ExInfos. + void ResetNextExInfoForSP(UIntNative SP); + + void UpdateFromExceptionDispatch(PTR_StackFrameIterator pSourceIterator); + + // helpers to ApplyReturnAddressAdjustment + PTR_VOID AdjustReturnAddressForward(PTR_VOID controlPC); + PTR_VOID AdjustReturnAddressBackward(PTR_VOID controlPC); + + void UnwindNonEHThunkSequence(); + void PrepareToYieldFrame(); + + enum ReturnAddressCategory + { + InManagedCode, + InThrowSiteThunk, + InFuncletInvokeThunk, + InCallDescrThunk, + InUniversalTransitionThunk, + }; + + static ReturnAddressCategory CategorizeUnadjustedReturnAddress(PTR_VOID returnAddress); + static bool IsNonEHThunk(ReturnAddressCategory category); + + enum Flags + { + // If this flag is set, each unwind will apply a -1 to the ControlPC. This is used by EH to ensure + // that the ControlPC of a callsite stays within the containing try region. + ApplyReturnAddressAdjustment = 1, + + // Used by the GC stackwalk, this flag will ensure that multiple funclet frames for a given method + // activation will be given only one callback. The one callback is given for the most nested physical + // stack frame of a given activation of a method. (i.e. the leafmost funclet) + CollapseFunclets = 2, + + // This is a state returned by Next() which indicates that we just crossed an ExInfo in our unwind. + ExCollide = 4, + + // If a hardware fault frame is encountered, report its control PC at the binder-inserted GC safe + // point immediately after the prolog of the most nested enclosing try-region's handler. + RemapHardwareFaultsToSafePoint = 8, + + MethodStateCalculated = 0x10, + + // This is a state returned by Next() which indicates that we just unwound a reverse pinvoke method + UnwoundReversePInvoke = 0x20, + + GcStackWalkFlags = (CollapseFunclets | RemapHardwareFaultsToSafePoint), + EHStackWalkFlags = ApplyReturnAddressAdjustment, + StackTraceStackWalkFlags = GcStackWalkFlags + }; + + struct PreservedRegPtrs + { +#ifdef TARGET_ARM + PTR_UIntNative pR4; + PTR_UIntNative pR5; + PTR_UIntNative pR6; + PTR_UIntNative pR7; + PTR_UIntNative pR8; + PTR_UIntNative pR9; + PTR_UIntNative pR10; + PTR_UIntNative pR11; +#elif defined(TARGET_ARM64) + PTR_UIntNative pX19; + PTR_UIntNative pX20; + PTR_UIntNative pX21; + PTR_UIntNative pX22; + PTR_UIntNative pX23; + PTR_UIntNative pX24; + PTR_UIntNative pX25; + PTR_UIntNative pX26; + PTR_UIntNative pX27; + PTR_UIntNative pX28; + PTR_UIntNative pFP; +#elif defined(UNIX_AMD64_ABI) + PTR_UIntNative pRbp; + PTR_UIntNative pRbx; + PTR_UIntNative pR12; + PTR_UIntNative pR13; + PTR_UIntNative pR14; + PTR_UIntNative pR15; +#else // TARGET_ARM + PTR_UIntNative pRbp; + PTR_UIntNative pRdi; + PTR_UIntNative pRsi; + PTR_UIntNative pRbx; +#ifdef TARGET_AMD64 + PTR_UIntNative pR12; + PTR_UIntNative pR13; + PTR_UIntNative pR14; + PTR_UIntNative pR15; +#endif // TARGET_AMD64 +#endif // TARGET_ARM + }; + +protected: + Thread * m_pThread; + RuntimeInstance * m_pInstance; + PTR_VOID m_FramePointer; + PTR_VOID m_ControlPC; + REGDISPLAY m_RegDisplay; + PTR_ICodeManager m_pCodeManager; + MethodInfo m_methodInfo; + PTR_VOID m_effectiveSafePointAddress; + PTR_RtuObjectRef m_pHijackedReturnValue; + GCRefKind m_HijackedReturnValueKind; + PTR_UIntNative m_pConservativeStackRangeLowerBound; + PTR_UIntNative m_pConservativeStackRangeUpperBound; + UInt32 m_dwFlags; + PTR_ExInfo m_pNextExInfo; + PTR_VOID m_pendingFuncletFramePointer; + PreservedRegPtrs m_funcletPtrs; // @TODO: Placing the 'scratch space' in the StackFrameIterator is not + // preferred because not all StackFrameIterators require this storage + // space. However, the implementation simpler by doing it this way. + bool m_ShouldSkipRegularGcReporting; + PTR_VOID m_OriginalControlPC; +}; + diff --git a/src/coreclr/src/nativeaot/Runtime/SyncClean.cpp b/src/coreclr/src/nativeaot/Runtime/SyncClean.cpp new file mode 100644 index 0000000000000..927d3f5bf307a --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/SyncClean.cpp @@ -0,0 +1,29 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +#include "common.h" +#include "CommonTypes.h" +#include "CommonMacros.h" +#include "daccess.h" +#include "PalRedhawkCommon.h" +#include "PalRedhawk.h" +#include "rhassert.h" +#include "slist.h" +#include "holder.h" +#include "SpinLock.h" +#include "rhbinder.h" +#include "CachedInterfaceDispatch.h" + +#include "SyncClean.hpp" + +void SyncClean::Terminate() +{ + CleanUp(); +} + +void SyncClean::CleanUp () +{ +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH + // Update any interface dispatch caches that were unsafe to modify outside of this GC. + ReclaimUnusedInterfaceDispatchCaches(); +#endif +} diff --git a/src/coreclr/src/nativeaot/Runtime/SyncClean.hpp b/src/coreclr/src/nativeaot/Runtime/SyncClean.hpp new file mode 100644 index 0000000000000..c9ea16263075e --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/SyncClean.hpp @@ -0,0 +1,17 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#ifndef _SYNCCLEAN_HPP_ +#define _SYNCCLEAN_HPP_ + +// We keep a list of memory blocks to be freed at the end of GC, but before we resume EE. +// To make this work, we need to make sure that these data are accessed in cooperative GC +// mode. + +class SyncClean { +public: + static void Terminate (); + static void CleanUp (); +}; + +#endif diff --git a/src/coreclr/src/nativeaot/Runtime/ThunksMapping.cpp b/src/coreclr/src/nativeaot/Runtime/ThunksMapping.cpp new file mode 100644 index 0000000000000..7076a5af30fbe --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/ThunksMapping.cpp @@ -0,0 +1,266 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +#include "common.h" + +#include "CommonTypes.h" +#include "CommonMacros.h" +#include "daccess.h" +#include "PalRedhawkCommon.h" +#include "CommonMacros.inl" +#include "volatile.h" +#include "PalRedhawk.h" +#include "rhassert.h" + + +#ifdef FEATURE_RX_THUNKS + +#ifdef TARGET_AMD64 +#define THUNK_SIZE 20 +#elif TARGET_X86 +#define THUNK_SIZE 12 +#elif TARGET_ARM +#define THUNK_SIZE 20 +#else +#define THUNK_SIZE (2 * OS_PAGE_SIZE) // This will cause RhpGetNumThunksPerBlock to return 0 +#endif + +static_assert((THUNK_SIZE % 4) == 0, "Thunk stubs size not aligned correctly. This will cause runtime failures."); + +#define THUNKS_MAP_SIZE 0x8000 // 32 K + +#ifdef TARGET_ARM +//***************************************************************************** +// Encode a 16-bit immediate mov/movt in ARM Thumb2 Instruction (format T2_N) +//***************************************************************************** +void EncodeThumb2Mov16(UInt16 * pCode, UInt16 value, UInt8 rDestination, bool topWord) +{ + pCode[0] = ((topWord ? 0xf2c0 : 0xf240) | + ((value >> 12) & 0x000f) | + ((value >> 1) & 0x0400)); + pCode[1] = (((value << 4) & 0x7000) | + (value & 0x00ff) | + (rDestination << 8)); +} + +//***************************************************************************** +// Encode a 32-bit immediate mov in ARM Thumb2 Instruction (format T2_N) +//***************************************************************************** +void EncodeThumb2Mov32(UInt16 * pCode, UInt32 value, UInt8 rDestination) +{ + EncodeThumb2Mov16(pCode, (UInt16)(value & 0x0000ffff), rDestination, false); + EncodeThumb2Mov16(pCode + 2, (UInt16)(value >> 16), rDestination, true); +} +#endif + +COOP_PINVOKE_HELPER(int, RhpGetNumThunkBlocksPerMapping, ()) +{ + static_assert((THUNKS_MAP_SIZE % OS_PAGE_SIZE) == 0, "Thunks map size should be in multiples of pages"); + + return THUNKS_MAP_SIZE / OS_PAGE_SIZE; +} + +COOP_PINVOKE_HELPER(int, RhpGetNumThunksPerBlock, ()) +{ + return min( + OS_PAGE_SIZE / THUNK_SIZE, // Number of thunks that can fit in a page + (OS_PAGE_SIZE - POINTER_SIZE) / (POINTER_SIZE * 2) // Number of pointer pairs, minus the jump stub cell, that can fit in a page + ); +} + +COOP_PINVOKE_HELPER(int, RhpGetThunkSize, ()) +{ + return THUNK_SIZE; +} + +COOP_PINVOKE_HELPER(void*, RhpGetThunkDataBlockAddress, (void* pThunkStubAddress)) +{ + return (void*)(((UIntNative)pThunkStubAddress & ~(OS_PAGE_SIZE - 1)) + THUNKS_MAP_SIZE); +} + +COOP_PINVOKE_HELPER(void*, RhpGetThunkStubsBlockAddress, (void* pThunkDataAddress)) +{ + return (void*)(((UIntNative)pThunkDataAddress & ~(OS_PAGE_SIZE - 1)) - THUNKS_MAP_SIZE); +} + +COOP_PINVOKE_HELPER(int, RhpGetThunkBlockSize, ()) +{ + return OS_PAGE_SIZE; +} + +EXTERN_C REDHAWK_API void* __cdecl RhAllocateThunksMapping() +{ +#ifdef WIN32 + + void * pNewMapping = PalVirtualAlloc(NULL, THUNKS_MAP_SIZE * 2, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE); + if (pNewMapping == NULL) + return NULL; + + void * pThunksSection = pNewMapping; + void * pDataSection = (UInt8*)pNewMapping + THUNKS_MAP_SIZE; + +#else + + // Note: On secure linux systems, we can't add execute permissions to a mapped virtual memory if it was not created + // with execute permissions in the first place. This is why we create the virtual section with RX permissions, then + // reduce it to RW for the data section and RX for the stubs section after generating the stubs instructions. + void * pNewMapping = PalVirtualAlloc(NULL, THUNKS_MAP_SIZE * 2, MEM_RESERVE | MEM_COMMIT, PAGE_EXECUTE_READ); + if (pNewMapping == NULL) + return NULL; + + void * pThunksSection = pNewMapping; + void * pDataSection = (UInt8*)pNewMapping + THUNKS_MAP_SIZE; + + if (!PalVirtualProtect(pDataSection, THUNKS_MAP_SIZE, PAGE_READWRITE) || + !PalVirtualProtect(pThunksSection, THUNKS_MAP_SIZE, PAGE_EXECUTE_READWRITE)) + { + PalVirtualFree(pNewMapping, 0, MEM_RELEASE); + return NULL; + } + +#endif + + int numBlocksPerMap = RhpGetNumThunkBlocksPerMapping(); + int numThunksPerBlock = RhpGetNumThunksPerBlock(); + + for (int m = 0; m < numBlocksPerMap; m++) + { + UInt8* pDataBlockAddress = (UInt8*)pDataSection + m * OS_PAGE_SIZE; + UInt8* pThunkBlockAddress = (UInt8*)pThunksSection + m * OS_PAGE_SIZE; + + for (int i = 0; i < numThunksPerBlock; i++) + { + UInt8* pCurrentThunkAddress = pThunkBlockAddress + THUNK_SIZE * i; + UInt8* pCurrentDataAddress = pDataBlockAddress + i * POINTER_SIZE * 2; + +#ifdef TARGET_AMD64 + + // mov r10, + // jmp [r10 + + // jmp [eax + + // str r12,[sp,#-4] + // ldr r12,[r12, Signature == ReadyToRunHeaderConstants::Signature); + if (pReadyToRunHeader->Signature != ReadyToRunHeaderConstants::Signature) + return nullptr; + + // Only the current major version is supported currently + ASSERT(pReadyToRunHeader->MajorVersion == ReadyToRunHeaderConstants::CurrentMajorVersion); + if (pReadyToRunHeader->MajorVersion != ReadyToRunHeaderConstants::CurrentMajorVersion) + return nullptr; + + return new (nothrow) TypeManager(osModule, pReadyToRunHeader, pClasslibFunctions, nClasslibFunctions); +} + +TypeManager::TypeManager(HANDLE osModule, ReadyToRunHeader * pHeader, void** pClasslibFunctions, UInt32 nClasslibFunctions) + : m_osModule(osModule), m_pHeader(pHeader), + m_pClasslibFunctions(pClasslibFunctions), m_nClasslibFunctions(nClasslibFunctions) +{ + int length; + m_pStaticsGCDataSection = (UInt8*)GetModuleSection(ReadyToRunSectionType::GCStaticRegion, &length); + m_pStaticsGCInfo = (StaticGcDesc*)GetModuleSection(ReadyToRunSectionType::GCStaticDesc, &length); + m_pThreadStaticsDataSection = (UInt8*)GetModuleSection(ReadyToRunSectionType::ThreadStaticRegion, &length); + m_pThreadStaticsGCInfo = (StaticGcDesc*)GetModuleSection(ReadyToRunSectionType::ThreadStaticGCDescRegion, &length); + m_pTlsIndex = (UInt32*)GetModuleSection(ReadyToRunSectionType::ThreadStaticIndex, &length); + m_pLoopHijackFlag = (UInt32*)GetModuleSection(ReadyToRunSectionType::LoopHijackFlag, &length); + m_pDispatchMapTable = (DispatchMap **)GetModuleSection(ReadyToRunSectionType::InterfaceDispatchTable, &length); +} + +void * TypeManager::GetModuleSection(ReadyToRunSectionType sectionId, int * length) +{ + ModuleInfoRow * pModuleInfoRows = (ModuleInfoRow *)(m_pHeader + 1); + + ASSERT(m_pHeader->EntrySize == sizeof(ModuleInfoRow)); + + // TODO: Binary search + for (int i = 0; i < m_pHeader->NumberOfSections; i++) + { + ModuleInfoRow * pCurrent = pModuleInfoRows + i; + if ((int32_t)sectionId == pCurrent->SectionId) + { + *length = pCurrent->GetLength(); + return pCurrent->Start; + } + } + + *length = 0; + return nullptr; +} + +void * TypeManager::GetClasslibFunction(ClasslibFunctionId functionId) +{ + uint32_t id = (uint32_t)functionId; + + if (id >= m_nClasslibFunctions) + return nullptr; + + return m_pClasslibFunctions[id]; +} + +bool TypeManager::ModuleInfoRow::HasEndPointer() +{ + return Flags & (int32_t)ModuleInfoFlags::HasEndPointer; +} + +int TypeManager::ModuleInfoRow::GetLength() +{ + if (HasEndPointer()) + { + return (int)((UInt8*)End - (UInt8*)Start); + } + else + { + return sizeof(void*); + } +} + +void TypeManager::EnumStaticGCRefsBlock(void * pfnCallback, void * pvCallbackData, StaticGcDesc* pStaticGcInfo) +{ + if (pStaticGcInfo == NULL) + return; + + for (UInt32 idxSeries = 0; idxSeries < pStaticGcInfo->m_numSeries; idxSeries++) + { + PTR_StaticGcDescGCSeries pSeries = dac_cast(dac_cast(pStaticGcInfo) + + offsetof(StaticGcDesc, m_series) + (idxSeries * sizeof(StaticGcDesc::GCSeries))); + + // The m_startOffset field is really 32-bit relocation (IMAGE_REL_BASED_RELPTR32) to the GC static base of the type + // the GCSeries is describing for. This makes it tolerable to the symbol sorting that the linker conducts. + PTR_RtuObjectRef pRefLocation = dac_cast(dac_cast(&pSeries->m_startOffset) + (Int32)pSeries->m_startOffset); + UInt32 numObjects = pSeries->m_size; + + RedhawkGCInterface::BulkEnumGcObjRef(pRefLocation, numObjects, pfnCallback, pvCallbackData); + } +} + +void TypeManager::EnumThreadStaticGCRefsBlock(void * pfnCallback, void * pvCallbackData, StaticGcDesc* pStaticGcInfo, UInt8* pbThreadStaticData) +{ + if (pStaticGcInfo == NULL) + return; + + for (UInt32 idxSeries = 0; idxSeries < pStaticGcInfo->m_numSeries; idxSeries++) + { + PTR_StaticGcDescGCSeries pSeries = dac_cast(dac_cast(pStaticGcInfo) + + offsetof(StaticGcDesc, m_series) + (idxSeries * sizeof(StaticGcDesc::GCSeries))); + + // The m_startOffset field is really a 32-bit relocation (IMAGE_REL_SECREL) to the TLS section. + UInt8* pTlsObject = pbThreadStaticData + pSeries->m_startOffset; + PTR_RtuObjectRef pRefLocation = dac_cast(pTlsObject); + UInt32 numObjects = pSeries->m_size; + + RedhawkGCInterface::BulkEnumGcObjRef(pRefLocation, numObjects, pfnCallback, pvCallbackData); + } +} + +void TypeManager::EnumStaticGCRefs(void * pfnCallback, void * pvCallbackData) +{ + // Regular statics. + EnumStaticGCRefsBlock(pfnCallback, pvCallbackData, m_pStaticsGCInfo); + + // Thread local statics. + if (m_pThreadStaticsGCInfo != NULL) + { + FOREACH_THREAD(pThread) + { + // To calculate the address of the data for each thread's TLS fields we need two values: + // 1) The TLS slot index allocated for this module by the OS loader. We keep a pointer to this + // value in the module header. + // 2) The offset into the TLS block at which managed data begins. + EnumThreadStaticGCRefsBlock(pfnCallback, pvCallbackData, m_pThreadStaticsGCInfo, + dac_cast(pThread->GetThreadLocalStorage(*m_pTlsIndex, 0))); + } + END_FOREACH_THREAD + } +} + +HANDLE TypeManager::GetOsModuleHandle() +{ + return m_osModule; +} + +TypeManager* TypeManagerHandle::AsTypeManager() +{ + return (TypeManager*)_value; +} diff --git a/src/coreclr/src/nativeaot/Runtime/TypeManager.h b/src/coreclr/src/nativeaot/Runtime/TypeManager.h new file mode 100644 index 0000000000000..193a6cb3707ae --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/TypeManager.h @@ -0,0 +1,76 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +#pragma once +#include "ModuleHeaders.h" +#include "ICodeManager.h" + +struct StaticGcDesc; +class DispatchMap; +typedef unsigned char UInt8; + +class TypeManager +{ + // NOTE: Part of this layout is a contract with the managed side in TypeManagerHandle.cs + HANDLE m_osModule; + ReadyToRunHeader * m_pHeader; + DispatchMap** m_pDispatchMapTable; + StaticGcDesc* m_pStaticsGCInfo; + StaticGcDesc* m_pThreadStaticsGCInfo; + UInt8* m_pStaticsGCDataSection; + UInt8* m_pThreadStaticsDataSection; + UInt32* m_pTlsIndex; // Pointer to TLS index if this module uses thread statics + void** m_pClasslibFunctions; + UInt32 m_nClasslibFunctions; + UInt32* m_pLoopHijackFlag; + + TypeManager(HANDLE osModule, ReadyToRunHeader * pHeader, void** pClasslibFunctions, UInt32 nClasslibFunctions); + +public: + static TypeManager * Create(HANDLE osModule, void * pModuleHeader, void** pClasslibFunctions, UInt32 nClasslibFunctions); + void * GetModuleSection(ReadyToRunSectionType sectionId, int * length); + void EnumStaticGCRefs(void * pfnCallback, void * pvCallbackData); + HANDLE GetOsModuleHandle(); + void* GetClasslibFunction(ClasslibFunctionId functionId); + UInt32* GetPointerToTlsIndex() { return m_pTlsIndex; } + void SetLoopHijackFlag(UInt32 flag) { if (m_pLoopHijackFlag != nullptr) *m_pLoopHijackFlag = flag; } + +private: + + struct ModuleInfoRow + { + int32_t SectionId; + int32_t Flags; + void * Start; + void * End; + + bool HasEndPointer(); + int GetLength(); + }; + + void EnumStaticGCRefsBlock(void * pfnCallback, void * pvCallbackData, StaticGcDesc* pStaticGcInfo); + void EnumThreadStaticGCRefsBlock(void * pfnCallback, void * pvCallbackData, StaticGcDesc* pStaticGcInfo, UInt8* pbThreadStaticData); +}; + +// TypeManagerHandle represents an AOT module in MRT based runtimes. +// These handles are a pointer to a TypeManager. +struct TypeManagerHandle +{ + static TypeManagerHandle Null() + { + TypeManagerHandle handle; + handle._value = nullptr; + return handle; + } + + static TypeManagerHandle Create(TypeManager * value) + { + TypeManagerHandle handle; + handle._value = value; + return handle; + } + + void *_value; + + TypeManager* AsTypeManager(); +}; + diff --git a/src/coreclr/src/nativeaot/Runtime/UniversalTransitionHelpers.cpp b/src/coreclr/src/nativeaot/Runtime/UniversalTransitionHelpers.cpp new file mode 100644 index 0000000000000..377c5f8998582 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/UniversalTransitionHelpers.cpp @@ -0,0 +1,67 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +#include "common.h" +#include "CommonTypes.h" +#include "CommonMacros.h" +#include "PalRedhawkCommon.h" +#include "PalRedhawk.h" + +#ifdef _DEBUG +#define TRASH_SAVED_ARGUMENT_REGISTERS +#endif + +#ifdef TRASH_SAVED_ARGUMENT_REGISTERS + +// +// Define tables of predictable distinguished values that RhpUniversalTransition can use to +// trash argument registers after they have been saved into the transition frame. +// +// Trashing these registers is a testability aid that makes it easier to detect bugs where +// the transition frame content is not correctly propagated to the eventual callee. +// +// In the absence of trashing, such bugs can become undetectable if the code that +// dispatches the call happens to never touch the impacted argument register (e.g., xmm3 on +// amd64 or d5 on arm32). In such a case, the original enregistered argument will flow +// unmodified into the eventual callee, obscuring the fact that the dispatcher failed to +// propagate the transition frame copy of this register. +// +// These tables are manually aligned as a conservative safeguard to ensure that the +// consumers can use arbitrary access widths without ever needing to worry about alignment. +// The comments in each table show the %d/%f renderings of each 32-bit value, plus the +// %I64d/%f rendering of the combined 64-bit value of each aligned pair of 32-bit values. +// + +#define TRASH_VALUE_ALIGNMENT 16 + +EXTERN_C +DECLSPEC_ALIGN(TRASH_VALUE_ALIGNMENT) +const UInt32 RhpIntegerTrashValues[] = { + // Lo32 Hi32 Lo32 Hi32 Hi32:Lo32 + // ----------- ----------- --------- --------- ------------------ + 0x07801001U, 0x07802002U, // (125833217, 125837314) (540467148372316161) + 0x07803003U, 0x07804004U, // (125841411, 125845508) (540502341334347779) + 0x07805005U, 0x07806006U, // (125849605, 125853702) (540537534296379397) + 0x07807007U, 0x07808008U, // (125857799, 125861896) (540572727258411015) + 0x07809009U, 0x0780a00aU, // (125865993, 125870090) (540607920220442633) + 0x0780b00bU, 0x0780c00cU, // (125874187, 125878284) (540643113182474251) + 0x0780d00dU, 0x0780e00eU, // (125882381, 125886478) (540678306144505869) + 0x0780f00fU, 0x07810010U, // (125890575, 125894672) (540713499106537487) +}; + +EXTERN_C +DECLSPEC_ALIGN(TRASH_VALUE_ALIGNMENT) +const UInt32 RhpFpTrashValues[] = { + // Lo32 Hi32 Lo32 Hi32 Hi32:Lo32 + // ----------- ----------- ------------------- ------------------- ------------------- + 0x42001001U, 0x42002002U, // (32.0156288146972660, 32.0312576293945310) (8657061952.00781440) + 0x42003003U, 0x42004004U, // (32.0468864440917970, 32.0625152587890630) (8724187200.02344320) + 0x42005005U, 0x42006006U, // (32.0781440734863280, 32.0937728881835940) (8791312448.03907200) + 0x42007007U, 0x42008008U, // (32.1094017028808590, 32.1250305175781250) (8858437696.05470090) + 0x42009009U, 0x4200a00aU, // (32.1406593322753910, 32.1562881469726560) (8925562944.07032970) + 0x4200b00bU, 0x4200c00cU, // (32.1719169616699220, 32.1875457763671880) (8992688192.08595850) + 0x4200d00dU, 0x4200e00eU, // (32.2031745910644530, 32.2188034057617190) (9059813440.10158730) + 0x4200f00fU, 0x42010010U, // (32.2344322204589840, 32.2500610351562500) (9126938688.11721610) +}; + +#endif // TRASH_SAVED_ARGUMENT_REGISTERS + diff --git a/src/coreclr/src/nativeaot/Runtime/allocheap.cpp b/src/coreclr/src/nativeaot/Runtime/allocheap.cpp new file mode 100644 index 0000000000000..c5d1e9ede3fea --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/allocheap.cpp @@ -0,0 +1,372 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +#include "common.h" +#include "CommonTypes.h" +#include "CommonMacros.h" +#include "daccess.h" +#include "DebugMacrosExt.h" +#include "PalRedhawkCommon.h" +#include "PalRedhawk.h" +#include "rhassert.h" +#include "slist.h" +#include "holder.h" +#include "Crst.h" +#include "Range.h" +#ifdef FEATURE_RWX_MEMORY +#include "memaccessmgr.h" +#endif +#include "allocheap.h" + +#include "CommonMacros.inl" +#include "slist.inl" + +using namespace rh::util; + +//------------------------------------------------------------------------------------------------- +AllocHeap::AllocHeap() + : m_blockList(), + m_rwProtectType(PAGE_READWRITE), + m_roProtectType(PAGE_READWRITE), +#ifdef FEATURE_RWX_MEMORY + m_pAccessMgr(NULL), + m_hCurPageRW(), +#endif // FEATURE_RWX_MEMORY + m_pNextFree(NULL), + m_pFreeCommitEnd(NULL), + m_pFreeReserveEnd(NULL), + m_pbInitialMem(NULL), + m_fShouldFreeInitialMem(false), + m_lock(CrstAllocHeap) + COMMA_INDEBUG(m_fIsInit(false)) +{ + ASSERT(!_UseAccessManager()); +} + +#ifdef FEATURE_RWX_MEMORY +//------------------------------------------------------------------------------------------------- +AllocHeap::AllocHeap( + UInt32 rwProtectType, + UInt32 roProtectType, + MemAccessMgr* pAccessMgr) + : m_blockList(), + m_rwProtectType(rwProtectType), + m_roProtectType(roProtectType == 0 ? rwProtectType : roProtectType), + m_pAccessMgr(pAccessMgr), + m_hCurPageRW(), + m_pNextFree(NULL), + m_pFreeCommitEnd(NULL), + m_pFreeReserveEnd(NULL), + m_pbInitialMem(NULL), + m_fShouldFreeInitialMem(false), + m_lock(CrstAllocHeap) + COMMA_INDEBUG(m_fIsInit(false)) +{ + ASSERT(!_UseAccessManager() || (m_rwProtectType != m_roProtectType && m_pAccessMgr != NULL)); +} +#endif // FEATURE_RWX_MEMORY + +//------------------------------------------------------------------------------------------------- +bool AllocHeap::Init() +{ + ASSERT(!m_fIsInit); + INDEBUG(m_fIsInit = true;) + + return true; +} + +//------------------------------------------------------------------------------------------------- +// This is for using pre-allocated memory on heap construction. +// Should never use this more than once, and should always follow construction of heap. + +bool AllocHeap::Init( + UInt8 * pbInitialMem, + UIntNative cbInitialMemCommit, + UIntNative cbInitialMemReserve, + bool fShouldFreeInitialMem) +{ + ASSERT(!m_fIsInit); + +#ifdef FEATURE_RWX_MEMORY + // Manage the committed portion of memory + if (_UseAccessManager()) + { + m_pAccessMgr->ManageMemoryRange(MemRange(pbInitialMem, cbInitialMemCommit), true); + } +#endif // FEATURE_RWX_MEMORY + + BlockListElem *pBlock = new (nothrow) BlockListElem(pbInitialMem, cbInitialMemReserve); + if (pBlock == NULL) + return false; + m_blockList.PushHead(pBlock); + + if (!_UpdateMemPtrs(pbInitialMem, + pbInitialMem + cbInitialMemCommit, + pbInitialMem + cbInitialMemReserve)) + { + return false; + } + + m_pbInitialMem = pbInitialMem; + m_fShouldFreeInitialMem = fShouldFreeInitialMem; + + INDEBUG(m_fIsInit = true;) + return true; +} + +//------------------------------------------------------------------------------------------------- +AllocHeap::~AllocHeap() +{ + while (!m_blockList.IsEmpty()) + { + BlockListElem *pCur = m_blockList.PopHead(); + if (pCur->GetStart() != m_pbInitialMem || m_fShouldFreeInitialMem) + PalVirtualFree(pCur->GetStart(), pCur->GetLength(), MEM_RELEASE); + delete pCur; + } +} + +//------------------------------------------------------------------------------------------------- +UInt8 * AllocHeap::_Alloc( + UIntNative cbMem, + UIntNative alignment + WRITE_ACCESS_HOLDER_ARG + ) +{ +#ifndef FEATURE_RWX_MEMORY + const void* pRWAccessHolder = NULL; +#endif // FEATURE_RWX_MEMORY + + ASSERT((alignment & (alignment - 1)) == 0); // Power of 2 only. + ASSERT(alignment <= OS_PAGE_SIZE); // Can't handle this right now. + ASSERT((m_rwProtectType == m_roProtectType) == (pRWAccessHolder == NULL)); + ASSERT(!_UseAccessManager() || pRWAccessHolder != NULL); + + if (_UseAccessManager() && pRWAccessHolder == NULL) + return NULL; + + CrstHolder lock(&m_lock); + + UInt8 * pbMem = _AllocFromCurBlock(cbMem, alignment PASS_WRITE_ACCESS_HOLDER_ARG); + if (pbMem != NULL) + return pbMem; + + // Must allocate new block + if (!_AllocNewBlock(cbMem)) + return NULL; + + pbMem = _AllocFromCurBlock(cbMem, alignment PASS_WRITE_ACCESS_HOLDER_ARG); + ASSERT_MSG(pbMem != NULL, "AllocHeap::Alloc: failed to alloc mem after new block alloc"); + + return pbMem; +} + +//------------------------------------------------------------------------------------------------- +UInt8 * AllocHeap::Alloc( + UIntNative cbMem + WRITE_ACCESS_HOLDER_ARG) +{ + return _Alloc(cbMem, 1 PASS_WRITE_ACCESS_HOLDER_ARG); +} + +//------------------------------------------------------------------------------------------------- +UInt8 * AllocHeap::AllocAligned( + UIntNative cbMem, + UIntNative alignment + WRITE_ACCESS_HOLDER_ARG) +{ + return _Alloc(cbMem, alignment PASS_WRITE_ACCESS_HOLDER_ARG); +} + +//------------------------------------------------------------------------------------------------- +bool AllocHeap::Contains(void* pvMem, UIntNative cbMem) +{ + MemRange range(pvMem, cbMem); + for (BlockList::Iterator it = m_blockList.Begin(); it != m_blockList.End(); ++it) + { + if (it->Contains(range)) + { + return true; + } + } + return false; +} + +#ifdef FEATURE_RWX_MEMORY +//------------------------------------------------------------------------------------------------- +bool AllocHeap::_AcquireWriteAccess( + UInt8* pvMem, + UIntNative cbMem, + WriteAccessHolder* pHolder) +{ + ASSERT(!_UseAccessManager() || m_pAccessMgr != NULL); + + if (_UseAccessManager()) + return m_pAccessMgr->AcquireWriteAccess(MemRange(pvMem, cbMem), m_hCurPageRW, pHolder); + else + return true; +} + +//------------------------------------------------------------------------------------------------- +bool AllocHeap::AcquireWriteAccess( + void* pvMem, + UIntNative cbMem, + WriteAccessHolder* pHolder) +{ + return _AcquireWriteAccess(static_cast(pvMem), cbMem, pHolder); +} +#endif // FEATURE_RWX_MEMORY + +//------------------------------------------------------------------------------------------------- +bool AllocHeap::_UpdateMemPtrs(UInt8* pNextFree, UInt8* pFreeCommitEnd, UInt8* pFreeReserveEnd) +{ + ASSERT(MemRange(pNextFree, pFreeReserveEnd).Contains(MemRange(pNextFree, pFreeCommitEnd))); + ASSERT(ALIGN_DOWN(pFreeCommitEnd, OS_PAGE_SIZE) == pFreeCommitEnd); + ASSERT(ALIGN_DOWN(pFreeReserveEnd, OS_PAGE_SIZE) == pFreeReserveEnd); + +#ifdef FEATURE_RWX_MEMORY + // See if we need to update current allocation holder or protect committed pages. + if (_UseAccessManager()) + { + if (pFreeCommitEnd - pNextFree > 0) + { +#ifndef STRESS_MEMACCESSMGR + // Create or update the alloc cache, used to speed up new allocations. + // If there is available commited memory and either m_pNextFree is + // being updated past a page boundary or the current cache is empty, + // then update the cache. + if (ALIGN_DOWN(m_pNextFree, OS_PAGE_SIZE) != ALIGN_DOWN(pNextFree, OS_PAGE_SIZE) || + m_hCurPageRW.GetRange().GetLength() == 0) + { + // Update current alloc page write access holder. + if (!_AcquireWriteAccess(ALIGN_DOWN(pNextFree, OS_PAGE_SIZE), + OS_PAGE_SIZE, + &m_hCurPageRW)) + { + return false; + } + } +#endif // STRESS_MEMACCESSMGR + + } + else + { // No available committed memory. Release the cache. + m_hCurPageRW.Release(); + } + } +#endif // FEATURE_RWX_MEMORY + + m_pNextFree = pNextFree; + m_pFreeCommitEnd = pFreeCommitEnd; + m_pFreeReserveEnd = pFreeReserveEnd; + return true; +} + +//------------------------------------------------------------------------------------------------- +bool AllocHeap::_UpdateMemPtrs(UInt8* pNextFree, UInt8* pFreeCommitEnd) +{ + return _UpdateMemPtrs(pNextFree, pFreeCommitEnd, m_pFreeReserveEnd); +} + +//------------------------------------------------------------------------------------------------- +bool AllocHeap::_UpdateMemPtrs(UInt8* pNextFree) +{ + return _UpdateMemPtrs(pNextFree, m_pFreeCommitEnd); +} + +//------------------------------------------------------------------------------------------------- +bool AllocHeap::_AllocNewBlock(UIntNative cbMem) +{ + cbMem = ALIGN_UP(max(cbMem, s_minBlockSize), OS_PAGE_SIZE);; + + UInt8 * pbMem = reinterpret_cast + (PalVirtualAlloc(NULL, cbMem, MEM_COMMIT, m_roProtectType)); + + if (pbMem == NULL) + return false; + + BlockListElem *pBlockListElem = new (nothrow) BlockListElem(pbMem, cbMem); + if (pBlockListElem == NULL) + { + PalVirtualFree(pbMem, 0, MEM_RELEASE); + return false; + } + + // Add to the list. While there is no race for writers (we hold the lock) we have the + // possibility of simultaneous readers, and using the interlocked version creates a + // memory barrier to make sure any reader sees a consistent list. + m_blockList.PushHeadInterlocked(pBlockListElem); + + return _UpdateMemPtrs(pbMem, pbMem + cbMem, pbMem + cbMem); +} + +//------------------------------------------------------------------------------------------------- +UInt8 * AllocHeap::_AllocFromCurBlock( + UIntNative cbMem, + UIntNative alignment + WRITE_ACCESS_HOLDER_ARG) +{ + UInt8 * pbMem = NULL; + + cbMem += (UInt8 *)ALIGN_UP(m_pNextFree, alignment) - m_pNextFree; + + if (m_pNextFree + cbMem <= m_pFreeCommitEnd || + _CommitFromCurBlock(cbMem)) + { + ASSERT(cbMem + m_pNextFree <= m_pFreeCommitEnd); +#ifdef FEATURE_RWX_MEMORY + if (pRWAccessHolder != NULL) + { + if (!_AcquireWriteAccess(m_pNextFree, cbMem, pRWAccessHolder)) + return NULL; + } +#endif // FEATURE_RWX_MEMORY + pbMem = ALIGN_UP(m_pNextFree, alignment); + + if (!_UpdateMemPtrs(m_pNextFree + cbMem)) + return NULL; + } + + return pbMem; +} + +//------------------------------------------------------------------------------------------------- +bool AllocHeap::_CommitFromCurBlock(UIntNative cbMem) +{ + ASSERT(m_pFreeCommitEnd < m_pNextFree + cbMem); + + if (m_pNextFree + cbMem <= m_pFreeReserveEnd) + { + UIntNative cbMemToCommit = ALIGN_UP(cbMem, OS_PAGE_SIZE); + +#ifdef FEATURE_RWX_MEMORY + if (_UseAccessManager()) + { + if (!m_pAccessMgr->ManageMemoryRange(MemRange(m_pFreeCommitEnd, cbMemToCommit), false)) + return false; + } + else + { + UInt32 oldProtectType; + if (!PalVirtualProtect(m_pFreeCommitEnd, cbMemToCommit, m_roProtectType, &oldProtectType)) + return false; + } +#endif // FEATURE_RWX_MEMORY + + return _UpdateMemPtrs(m_pNextFree, m_pFreeCommitEnd + cbMemToCommit); + } + + return false; +} + +//------------------------------------------------------------------------------------------------- +void * __cdecl operator new(size_t n, AllocHeap * alloc) +{ + return alloc->Alloc(n); +} + +//------------------------------------------------------------------------------------------------- +void * __cdecl operator new[](size_t n, AllocHeap * alloc) +{ + return alloc->Alloc(n); +} + diff --git a/src/coreclr/src/nativeaot/Runtime/allocheap.h b/src/coreclr/src/nativeaot/Runtime/allocheap.h new file mode 100644 index 0000000000000..171d63109b557 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/allocheap.h @@ -0,0 +1,122 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +#include "forward_declarations.h" + +#ifdef FEATURE_RWX_MEMORY +#define WRITE_ACCESS_HOLDER_ARG , rh::util::WriteAccessHolder *pRWAccessHolder +#define WRITE_ACCESS_HOLDER_ARG_NULL_DEFAULT , rh::util::WriteAccessHolder *pRWAccessHolder = NULL +#define PASS_WRITE_ACCESS_HOLDER_ARG , pRWAccessHolder +#else // FEATURE_RWX_MEMORY +#define WRITE_ACCESS_HOLDER_ARG +#define WRITE_ACCESS_HOLDER_ARG_NULL_DEFAULT +#define PASS_WRITE_ACCESS_HOLDER_ARG +#endif // FEATURE_RWX_MEMORY + +class AllocHeap +{ + public: + AllocHeap(); + +#ifdef FEATURE_RWX_MEMORY + // If pAccessMgr is non-NULL, it will be used to manage R/W access to the memory allocated. + AllocHeap(UInt32 rwProtectType = PAGE_READWRITE, + UInt32 roProtectType = 0, // 0 indicates "same as rwProtectType" + rh::util::MemAccessMgr* pAccessMgr = NULL); +#endif // FEATURE_RWX_MEMORY + + bool Init(); + + bool Init(UInt8 * pbInitialMem, + UIntNative cbInitialMemCommit, + UIntNative cbInitialMemReserve, + bool fShouldFreeInitialMem); + + ~AllocHeap(); + + // If AllocHeap was created with a MemAccessMgr, pRWAccessHolder must be non-NULL. + // On return, the holder will permit R/W access to the allocated memory until it + // is destructed. + UInt8 * Alloc(UIntNative cbMem WRITE_ACCESS_HOLDER_ARG_NULL_DEFAULT); + + // If AllocHeap was created with a MemAccessMgr, pRWAccessHolder must be non-NULL. + // On return, the holder will permit R/W access to the allocated memory until it + // is destructed. + UInt8 * AllocAligned(UIntNative cbMem, + UIntNative alignment + WRITE_ACCESS_HOLDER_ARG_NULL_DEFAULT); + + // Returns true if this AllocHeap owns the memory range [pvMem, pvMem+cbMem) + bool Contains(void * pvMem, + UIntNative cbMem); + +#ifdef FEATURE_RWX_MEMORY + // Used with previously-allocated memory for which RW access is needed again. + // Returns true on success. R/W access will be granted until the holder is + // destructed. + bool AcquireWriteAccess(void* pvMem, + UIntNative cbMem, + rh::util::WriteAccessHolder* pHolder); +#endif // FEATURE_RWX_MEMORY + + private: + // Allocation Helpers + UInt8* _Alloc(UIntNative cbMem, UIntNative alignment WRITE_ACCESS_HOLDER_ARG); + bool _AllocNewBlock(UIntNative cbMem); + UInt8* _AllocFromCurBlock(UIntNative cbMem, UIntNative alignment WRITE_ACCESS_HOLDER_ARG); + bool _CommitFromCurBlock(UIntNative cbMem); + + // Access protection helpers +#ifdef FEATURE_RWX_MEMORY + bool _AcquireWriteAccess(UInt8* pvMem, UIntNative cbMem, rh::util::WriteAccessHolder* pHolder); +#endif // FEATURE_RWX_MEMORY + bool _UpdateMemPtrs(UInt8* pNextFree, UInt8* pFreeCommitEnd, UInt8* pFreeReserveEnd); + bool _UpdateMemPtrs(UInt8* pNextFree, UInt8* pFreeCommitEnd); + bool _UpdateMemPtrs(UInt8* pNextFree); + bool _UseAccessManager() { return m_rwProtectType != m_roProtectType; } + + static const UIntNative s_minBlockSize = OS_PAGE_SIZE; + + typedef rh::util::MemRange Block; + typedef DPTR(Block) PTR_Block; + struct BlockListElem : public Block + { + BlockListElem(Block const & block) + : Block(block) + {} + + BlockListElem(UInt8 * pbMem, UIntNative cbMem) + : Block(pbMem, cbMem) + {} + + Block m_block; + PTR_Block m_pNext; + }; + + typedef SList BlockList; + BlockList m_blockList; + + UInt32 m_rwProtectType; // READ/WRITE/EXECUTE/etc + UInt32 m_roProtectType; // What to do with fully allocated and initialized pages. + +#ifdef FEATURE_RWX_MEMORY + rh::util::MemAccessMgr* m_pAccessMgr; + rh::util::WriteAccessHolder m_hCurPageRW; // Used to hold RW access to the current allocation page + // Passed as pHint to MemAccessMgr::AcquireWriteAccess. +#endif // FEATURE_RWX_MEMORY + UInt8 * m_pNextFree; + UInt8 * m_pFreeCommitEnd; + UInt8 * m_pFreeReserveEnd; + + UInt8 * m_pbInitialMem; + bool m_fShouldFreeInitialMem; + + Crst m_lock; + + INDEBUG(bool m_fIsInit;) +}; +typedef DPTR(AllocHeap) PTR_AllocHeap; + +//------------------------------------------------------------------------------------------------- +void * __cdecl operator new(size_t n, AllocHeap * alloc); +void * __cdecl operator new[](size_t n, AllocHeap * alloc); + diff --git a/src/coreclr/src/nativeaot/Runtime/amd64/AllocFast.S b/src/coreclr/src/nativeaot/Runtime/amd64/AllocFast.S new file mode 100644 index 0000000000000..c1098e78245d1 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/amd64/AllocFast.S @@ -0,0 +1,343 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +.intel_syntax noprefix +#include // generated by the build from AsmOffsets.cpp +#include + +// Allocate non-array, non-finalizable object. If the allocation doesn't fit into the current thread's +// allocation context then automatically fallback to the slow allocation path. +// RDI == EEType +NESTED_ENTRY RhpNewFast, _TEXT, NoHandler + push_nonvol_reg rbx + mov rbx, rdi + + // rax = GetThread() + INLINE_GETTHREAD + + // + // rbx contains EEType pointer + // + mov edx, [rbx + OFFSETOF__EEType__m_uBaseSize] + + // + // rax: Thread pointer + // rbx: EEType pointer + // rdx: base size + // + + mov rsi, [rax + OFFSETOF__Thread__m_alloc_context__alloc_ptr] + add rdx, rsi + cmp rdx, [rax + OFFSETOF__Thread__m_alloc_context__alloc_limit] + ja LOCAL_LABEL(RhpNewFast_RarePath) + + // set the new alloc pointer + mov [rax + OFFSETOF__Thread__m_alloc_context__alloc_ptr], rdx + + mov rax, rsi + + // set the new object's EEType pointer + mov [rsi], rbx + + .cfi_remember_state + pop_nonvol_reg rbx + ret + + .cfi_restore_state + .cfi_def_cfa_offset 16 // workaround cfi_restore_state bug +LOCAL_LABEL(RhpNewFast_RarePath): + mov rdi, rbx // restore EEType + xor esi, esi + pop_nonvol_reg rbx + jmp C_FUNC(RhpNewObject) + +NESTED_END RhpNewFast, _TEXT + + + +// Allocate non-array object with finalizer +// RDI == EEType +LEAF_ENTRY RhpNewFinalizable, _TEXT + mov esi, GC_ALLOC_FINALIZE + jmp C_FUNC(RhpNewObject) +LEAF_END RhpNewFinalizable, _TEXT + + + +// Allocate non-array object +// RDI == EEType +// ESI == alloc flags +NESTED_ENTRY RhpNewObject, _TEXT, NoHandler + + PUSH_COOP_PINVOKE_FRAME rcx + END_PROLOGUE + + // RCX: transition frame + + // Preserve the EEType in RBX + mov rbx, rdi + + mov edx, [rdi + OFFSETOF__EEType__m_uBaseSize] // cbSize + + // Call the rest of the allocation helper. + // void* RhpGcAlloc(EEType *pEEType, UInt32 uFlags, UIntNative cbSize, void * pTransitionFrame) + call C_FUNC(RhpGcAlloc) + + // Set the new object's EEType pointer on success. + test rax, rax + jz LOCAL_LABEL(NewOutOfMemory) + mov [rax + OFFSETOF__Object__m_pEEType], rbx + + // If the object is bigger than RH_LARGE_OBJECT_SIZE, we must publish it to the BGC + mov esi, [rbx + OFFSETOF__EEType__m_uBaseSize] +.att_syntax + cmp $RH_LARGE_OBJECT_SIZE, %rsi +.intel_syntax noprefix + jb LOCAL_LABEL(New_SkipPublish) + mov rdi, rax // rdi: object + // rsi: already contains object size + call C_FUNC(RhpPublishObject) // rax: this function returns the object that was passed-in +LOCAL_LABEL(New_SkipPublish): + + .cfi_remember_state + POP_COOP_PINVOKE_FRAME + ret + + .cfi_restore_state + .cfi_def_cfa_offset 96 // workaround cfi_restore_state bug +LOCAL_LABEL(NewOutOfMemory): + // This is the OOM failure path. We're going to tail-call to a managed helper that will throw + // an out of memory exception that the caller of this allocator understands. + + mov rdi, rbx // EEType pointer + xor esi, esi // Indicate that we should throw OOM. + + POP_COOP_PINVOKE_FRAME + + jmp EXTERNAL_C_FUNC(RhExceptionHandling_FailedAllocation) +NESTED_END RhpNewObject, _TEXT + + +// Allocate a string. +// RDI == EEType +// ESI == character/element count +NESTED_ENTRY RhNewString, _TEXT, NoHandler + // we want to limit the element count to the non-negative 32-bit int range + cmp rsi, MAX_STRING_LENGTH + ja LOCAL_LABEL(StringSizeOverflow) + + push_nonvol_reg rbx + push_nonvol_reg r12 + push_register rcx // padding + + mov rbx, rdi // save EEType + mov r12, rsi // save element count + + // rax = GetThread() + INLINE_GETTHREAD + + mov rcx, rax // rcx = Thread* + + // Compute overall allocation size (align(base size + (element size * elements), 8)). + lea rax, [r12 * STRING_COMPONENT_SIZE + STRING_BASE_SIZE + 7] + and rax, -8 + + // rax == string size + // rbx == EEType + // rcx == Thread* + // r12 == element count + + mov rdx, rax + add rax, [rcx + OFFSETOF__Thread__m_alloc_context__alloc_ptr] + jc LOCAL_LABEL(RhNewString_RarePath) + + // rax == new alloc ptr + // rbx == EEType + // rcx == Thread* + // rdx == string size + // r12 == element count + cmp rax, [rcx + OFFSETOF__Thread__m_alloc_context__alloc_limit] + ja LOCAL_LABEL(RhNewString_RarePath) + + mov [rcx + OFFSETOF__Thread__m_alloc_context__alloc_ptr], rax + + // calc the new object pointer + sub rax, rdx + + mov [rax + OFFSETOF__Object__m_pEEType], rbx + mov [rax + OFFSETOF__String__m_Length], r12d + + .cfi_remember_state + pop_register rcx // padding + pop_nonvol_reg r12 + pop_nonvol_reg rbx + ret + + .cfi_restore_state + .cfi_def_cfa_offset 32 // workaround cfi_restore_state bug +LOCAL_LABEL(RhNewString_RarePath): + mov rdi, rbx // restore EEType + mov rsi, r12 // restore element count + // passing string size in rdx + + pop_register rcx // padding + pop_nonvol_reg r12 + pop_nonvol_reg rbx + jmp C_FUNC(RhpNewArrayRare) + +LOCAL_LABEL(StringSizeOverflow): + // We get here if the size of the final string object can't be represented as an unsigned + // 32-bit value. We're going to tail-call to a managed helper that will throw + // an OOM exception that the caller of this allocator understands. + + // rdi holds EEType pointer already + xor esi, esi // Indicate that we should throw OOM. + jmp EXTERNAL_C_FUNC(RhExceptionHandling_FailedAllocation) + +NESTED_END RhNewString, _TEXT + + +// Allocate one dimensional, zero based array (SZARRAY). +// RDI == EEType +// ESI == element count +NESTED_ENTRY RhpNewArray, _TEXT, NoHandler + // we want to limit the element count to the non-negative 32-bit int range + cmp rsi, 0x07fffffff + ja LOCAL_LABEL(ArraySizeOverflow) + + push_nonvol_reg rbx + push_nonvol_reg r12 + push_register rcx // padding + + mov rbx, rdi // save EEType + mov r12, rsi // save element count + + // rax = GetThread() + INLINE_GETTHREAD + + mov rcx, rax // rcx = Thread* + + // Compute overall allocation size (align(base size + (element size * elements), 8)). + movzx eax, word ptr [rbx + OFFSETOF__EEType__m_usComponentSize] + mul r12 + mov edx, [rbx + OFFSETOF__EEType__m_uBaseSize] + add rax, rdx + add rax, 7 + and rax, -8 + + // rax == array size + // rbx == EEType + // rcx == Thread* + // r12 == element count + + mov rdx, rax + add rax, [rcx + OFFSETOF__Thread__m_alloc_context__alloc_ptr] + jc LOCAL_LABEL(RhpNewArray_RarePath) + + // rax == new alloc ptr + // rbx == EEType + // rcx == Thread* + // rdx == array size + // r12 == element count + cmp rax, [rcx + OFFSETOF__Thread__m_alloc_context__alloc_limit] + ja LOCAL_LABEL(RhpNewArray_RarePath) + + mov [rcx + OFFSETOF__Thread__m_alloc_context__alloc_ptr], rax + + // calc the new object pointer + sub rax, rdx + + mov [rax + OFFSETOF__Object__m_pEEType], rbx + mov [rax + OFFSETOF__Array__m_Length], r12d + + .cfi_remember_state + pop_register rcx // padding + pop_nonvol_reg r12 + pop_nonvol_reg rbx + ret + + .cfi_restore_state + .cfi_def_cfa_offset 32 // workaround cfi_restore_state bug +LOCAL_LABEL(RhpNewArray_RarePath): + mov rdi, rbx // restore EEType + mov rsi, r12 // restore element count + // passing array size in rdx + + pop_register rcx // padding + pop_nonvol_reg r12 + pop_nonvol_reg rbx + jmp C_FUNC(RhpNewArrayRare) + +LOCAL_LABEL(ArraySizeOverflow): + // We get here if the size of the final array object can't be represented as an unsigned + // 32-bit value. We're going to tail-call to a managed helper that will throw + // an overflow exception that the caller of this allocator understands. + + // rdi holds EEType pointer already + mov esi, 1 // Indicate that we should throw OverflowException + jmp EXTERNAL_C_FUNC(RhExceptionHandling_FailedAllocation) + +NESTED_END RhpNewArray, _TEXT + +NESTED_ENTRY RhpNewArrayRare, _TEXT, NoHandler + + // rdi == EEType + // rsi == element count + // rdx == array size + + PUSH_COOP_PINVOKE_FRAME rcx + END_PROLOGUE + + // rcx: transition frame + + // Preserve the EEType in RBX + mov rbx, rdi + // Preserve the element count in R12 + mov r12, rsi + // Preserve the size in R13 + mov r13, rdx + + // passing EEType in rdi + xor rsi, rsi // uFlags + // pasing size in rdx + // pasing pTransitionFrame in rcx + + // Call the rest of the allocation helper. + // void* RhpGcAlloc(EEType *pEEType, UInt32 uFlags, UIntNative cbSize, void * pTransitionFrame) + call C_FUNC(RhpGcAlloc) + + // Set the new object's EEType pointer and length on success. + test rax, rax + jz LOCAL_LABEL(ArrayOutOfMemory) + mov [rax + OFFSETOF__Object__m_pEEType], rbx + mov [rax + OFFSETOF__Array__m_Length], r12d + + // If the object is bigger than RH_LARGE_OBJECT_SIZE, we must publish it to the BGC +.att_syntax + cmp $RH_LARGE_OBJECT_SIZE, %r13 +.intel_syntax noprefix + jb LOCAL_LABEL(NewArray_SkipPublish) + mov rdi, rax // rcx: object + mov rsi, r13 // rdx: object size + call C_FUNC(RhpPublishObject) // rax: this function returns the object that was passed-in +LOCAL_LABEL(NewArray_SkipPublish): + + .cfi_remember_state + POP_COOP_PINVOKE_FRAME + ret + + .cfi_restore_state + .cfi_def_cfa_offset 96 // workaround cfi_restore_state bug +LOCAL_LABEL(ArrayOutOfMemory): + // This is the OOM failure path. We're going to tail-call to a managed helper that will throw + // an out of memory exception that the caller of this allocator understands. + + mov rdi, rbx // EEType pointer + xor esi, esi // Indicate that we should throw OOM. + + POP_COOP_PINVOKE_FRAME + + jmp EXTERNAL_C_FUNC(RhExceptionHandling_FailedAllocation) + +NESTED_END RhpNewArrayRare, _TEXT + diff --git a/src/coreclr/src/nativeaot/Runtime/amd64/AllocFast.asm b/src/coreclr/src/nativeaot/Runtime/amd64/AllocFast.asm new file mode 100644 index 0000000000000..bd73a8ffe711f --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/amd64/AllocFast.asm @@ -0,0 +1,274 @@ +;; Licensed to the .NET Foundation under one or more agreements. +;; The .NET Foundation licenses this file to you under the MIT license. + +include asmmacros.inc + + +;; Allocate non-array, non-finalizable object. If the allocation doesn't fit into the current thread's +;; allocation context then automatically fallback to the slow allocation path. +;; RCX == EEType +LEAF_ENTRY RhpNewFast, _TEXT + + ;; rdx = GetThread(), TRASHES rax + INLINE_GETTHREAD rdx, rax + + ;; + ;; rcx contains EEType pointer + ;; + mov r8d, [rcx + OFFSETOF__EEType__m_uBaseSize] + + ;; + ;; eax: base size + ;; rcx: EEType pointer + ;; rdx: Thread pointer + ;; + + mov rax, [rdx + OFFSETOF__Thread__m_alloc_context__alloc_ptr] + add r8, rax + cmp r8, [rdx + OFFSETOF__Thread__m_alloc_context__alloc_limit] + ja RhpNewFast_RarePath + + ;; set the new alloc pointer + mov [rdx + OFFSETOF__Thread__m_alloc_context__alloc_ptr], r8 + + ;; set the new object's EEType pointer + mov [rax], rcx + ret + +RhpNewFast_RarePath: + xor edx, edx + jmp RhpNewObject + +LEAF_END RhpNewFast, _TEXT + + + +;; Allocate non-array object with finalizer +;; RCX == EEType +LEAF_ENTRY RhpNewFinalizable, _TEXT + mov edx, GC_ALLOC_FINALIZE + jmp RhpNewObject +LEAF_END RhpNewFinalizable, _TEXT + + + +;; Allocate non-array object +;; RCX == EEType +;; EDX == alloc flags +NESTED_ENTRY RhpNewObject, _TEXT + + PUSH_COOP_PINVOKE_FRAME r9 + END_PROLOGUE + + ; R9: transition frame + + ;; Preserve the EEType in RSI + mov rsi, rcx + + mov r8d, [rsi + OFFSETOF__EEType__m_uBaseSize] ; cbSize + + ;; Call the rest of the allocation helper. + ;; void* RhpGcAlloc(EEType *pEEType, UInt32 uFlags, UIntNative cbSize, void * pTransitionFrame) + call RhpGcAlloc + + ;; Set the new object's EEType pointer on success. + test rax, rax + jz NewOutOfMemory + mov [rax + OFFSETOF__Object__m_pEEType], rsi + + ;; If the object is bigger than RH_LARGE_OBJECT_SIZE, we must publish it to the BGC + mov edx, [rsi + OFFSETOF__EEType__m_uBaseSize] + cmp rdx, RH_LARGE_OBJECT_SIZE + jb New_SkipPublish + mov rcx, rax ;; rcx: object + ;; rdx: already contains object size + call RhpPublishObject ;; rax: this function returns the object that was passed-in +New_SkipPublish: + + POP_COOP_PINVOKE_FRAME + ret + +NewOutOfMemory: + ;; This is the OOM failure path. We're going to tail-call to a managed helper that will throw + ;; an out of memory exception that the caller of this allocator understands. + + mov rcx, rsi ; EEType pointer + xor edx, edx ; Indicate that we should throw OOM. + + POP_COOP_PINVOKE_FRAME + + jmp RhExceptionHandling_FailedAllocation +NESTED_END RhpNewObject, _TEXT + + +;; Allocate a string. +;; RCX == EEType +;; EDX == character/element count +LEAF_ENTRY RhNewString, _TEXT + + ; we want to limit the element count to the non-negative 32-bit int range + cmp rdx, MAX_STRING_LENGTH + ja StringSizeOverflow + + ; Compute overall allocation size (align(base size + (element size * elements), 8)). + lea rax, [(rdx * STRING_COMPONENT_SIZE) + (STRING_BASE_SIZE + 7)] + and rax, -8 + + ; rax == string size + ; rcx == EEType + ; rdx == element count + + INLINE_GETTHREAD r10, r8 + + mov r8, rax + add rax, [r10 + OFFSETOF__Thread__m_alloc_context__alloc_ptr] + jc RhpNewArrayRare + + ; rax == new alloc ptr + ; rcx == EEType + ; rdx == element count + ; r8 == array size + ; r10 == thread + cmp rax, [r10 + OFFSETOF__Thread__m_alloc_context__alloc_limit] + ja RhpNewArrayRare + + mov [r10 + OFFSETOF__Thread__m_alloc_context__alloc_ptr], rax + + ; calc the new object pointer + sub rax, r8 + + mov [rax + OFFSETOF__Object__m_pEEType], rcx + mov [rax + OFFSETOF__String__m_Length], edx + + ret + +StringSizeOverflow: + ; We get here if the size of the final string object can't be represented as an unsigned + ; 32-bit value. We're going to tail-call to a managed helper that will throw + ; an OOM exception that the caller of this allocator understands. + + ; rcx holds EEType pointer already + xor edx, edx ; Indicate that we should throw OOM. + jmp RhExceptionHandling_FailedAllocation +LEAF_END RhNewString, _TEXT + + +;; Allocate one dimensional, zero based array (SZARRAY). +;; RCX == EEType +;; EDX == element count +LEAF_ENTRY RhpNewArray, _TEXT + + ; we want to limit the element count to the non-negative 32-bit int range + cmp rdx, 07fffffffh + ja ArraySizeOverflow + + ; save element count + mov r8, rdx + + ; Compute overall allocation size (align(base size + (element size * elements), 8)). + movzx eax, word ptr [rcx + OFFSETOF__EEType__m_usComponentSize] + mul rdx + mov edx, [rcx + OFFSETOF__EEType__m_uBaseSize] + add rax, rdx + add rax, 7 + and rax, -8 + + mov rdx, r8 + + ; rax == array size + ; rcx == EEType + ; rdx == element count + + INLINE_GETTHREAD r10, r8 + + mov r8, rax + add rax, [r10 + OFFSETOF__Thread__m_alloc_context__alloc_ptr] + jc RhpNewArrayRare + + ; rax == new alloc ptr + ; rcx == EEType + ; rdx == element count + ; r8 == array size + ; r10 == thread + cmp rax, [r10 + OFFSETOF__Thread__m_alloc_context__alloc_limit] + ja RhpNewArrayRare + + mov [r10 + OFFSETOF__Thread__m_alloc_context__alloc_ptr], rax + + ; calc the new object pointer + sub rax, r8 + + mov [rax + OFFSETOF__Object__m_pEEType], rcx + mov [rax + OFFSETOF__Array__m_Length], edx + + ret + +ArraySizeOverflow: + ; We get here if the size of the final array object can't be represented as an unsigned + ; 32-bit value. We're going to tail-call to a managed helper that will throw + ; an overflow exception that the caller of this allocator understands. + + ; rcx holds EEType pointer already + mov edx, 1 ; Indicate that we should throw OverflowException + jmp RhExceptionHandling_FailedAllocation +LEAF_END RhpNewArray, _TEXT + +NESTED_ENTRY RhpNewArrayRare, _TEXT + + ; rcx == EEType + ; rdx == element count + ; r8 == array size + + PUSH_COOP_PINVOKE_FRAME r9 + END_PROLOGUE + + ; r9: transition frame + + ; Preserve the EEType in RSI + mov rsi, rcx + ; Preserve the element count in RBX + mov rbx, rdx + ; Preserve the size in RDI + mov rdi, r8 + + ; passing EEType in rcx + xor rdx, rdx ; uFlags + ; pasing size in r8 + ; pasing pTransitionFrame in r9 + + ; Call the rest of the allocation helper. + ; void* RhpGcAlloc(EEType *pEEType, UInt32 uFlags, UIntNative cbSize, void * pTransitionFrame) + call RhpGcAlloc + + ; Set the new object's EEType pointer and length on success. + test rax, rax + jz ArrayOutOfMemory + mov [rax + OFFSETOF__Object__m_pEEType], rsi + mov [rax + OFFSETOF__Array__m_Length], ebx + + ;; If the object is bigger than RH_LARGE_OBJECT_SIZE, we must publish it to the BGC + cmp rdi, RH_LARGE_OBJECT_SIZE + jb NewArray_SkipPublish + mov rcx, rax ;; rcx: object + mov rdx, rdi ;; rdx: object size + call RhpPublishObject ;; rax: this function returns the object that was passed-in +NewArray_SkipPublish: + + POP_COOP_PINVOKE_FRAME + ret + +ArrayOutOfMemory: + ;; This is the OOM failure path. We're going to tail-call to a managed helper that will throw + ;; an out of memory exception that the caller of this allocator understands. + + mov rcx, rsi ; EEType pointer + xor edx, edx ; Indicate that we should throw OOM. + + POP_COOP_PINVOKE_FRAME + + jmp RhExceptionHandling_FailedAllocation + +NESTED_END RhpNewArrayRare, _TEXT + + + END diff --git a/src/coreclr/src/nativeaot/Runtime/amd64/AsmMacros.inc b/src/coreclr/src/nativeaot/Runtime/amd64/AsmMacros.inc new file mode 100644 index 0000000000000..b20a6ba897105 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/amd64/AsmMacros.inc @@ -0,0 +1,418 @@ +;; +;; Licensed to the .NET Foundation under one or more agreements. +;; The .NET Foundation licenses this file to you under the MIT license. +;; + +include AsmOffsets.inc ; generated by the build from AsmOffsets.cpp + +;; +;; MACROS +;; + +; +; Define macros to build unwind data for prologues. +; + +push_nonvol_reg macro Reg + + .errnz ___STACK_ADJUSTMENT_FORBIDDEN, + + push Reg + .pushreg Reg + + endm + +push_vol_reg macro Reg + + .errnz ___STACK_ADJUSTMENT_FORBIDDEN, push_vol_reg cannot be used after save_reg_postrsp + + push Reg + .allocstack 8 + + endm + +push_imm macro imm + + .errnz ___STACK_ADJUSTMENT_FORBIDDEN, push_vol_reg cannot be used after save_reg_postrsp + + push imm + .allocstack 8 + + endm + +push_eflags macro + + .errnz ___STACK_ADJUSTMENT_FORBIDDEN, push_eflags cannot be used after save_reg_postrsp + + pushfq + .allocstack 8 + + endm + +alloc_stack macro Size + + .errnz ___STACK_ADJUSTMENT_FORBIDDEN, alloc_stack cannot be used after save_reg_postrsp + + sub rsp, Size + .allocstack Size + + endm + +save_reg_frame macro Reg, FrameReg, Offset + + .erre ___FRAME_REG_SET, save_reg_frame cannot be used before set_frame + + mov Offset[FrameReg], Reg + .savereg Reg, Offset + + endm + +save_reg_postrsp macro Reg, Offset + + .errnz ___FRAME_REG_SET, save_reg_postrsp cannot be used after set_frame + + mov Offset[rsp], Reg + .savereg Reg, Offset + + ___STACK_ADJUSTMENT_FORBIDDEN = 1 + + endm + +save_xmm128_frame macro Reg, FrameReg, Offset + + .erre ___FRAME_REG_SET, save_xmm128_frame cannot be used before set_frame + + movdqa Offset[FrameReg], Reg + .savexmm128 Reg, Offset + + endm + +save_xmm128_postrsp macro Reg, Offset + + .errnz ___FRAME_REG_SET, save_reg_postrsp cannot be used after set_frame + + movdqa Offset[rsp], Reg + .savexmm128 Reg, Offset + + ___STACK_ADJUSTMENT_FORBIDDEN = 1 + + endm + +set_frame macro Reg, Offset + + .errnz ___FRAME_REG_SET, set_frame cannot be used more than once + +if Offset + + lea Reg, Offset[rsp] + +else + + mov reg, rsp + +endif + + .setframe Reg, Offset + ___FRAME_REG_SET = 1 + + endm + +END_PROLOGUE macro + + .endprolog + + endm + +; +; Define function entry/end macros. +; + +LEAF_ENTRY macro Name, Section + +Section segment para 'CODE' + + align 16 + + public Name +Name proc + + endm + +LEAF_END macro Name, section + +Name endp + +Section ends + + endm + +LEAF_END_MARKED macro Name, section + public Name&_End +Name&_End label qword + ; this nop is important to keep the label in + ; the right place in the face of BBT + nop + +Name endp + +Section ends + + endm + + +NESTED_ENTRY macro Name, Section, Handler + +Section segment para 'CODE' + + align 16 + + public Name + +ifb + +Name proc frame + +else + +Name proc frame:Handler + +endif + + ___FRAME_REG_SET = 0 + ___STACK_ADJUSTMENT_FORBIDDEN = 0 + + endm + +NESTED_END macro Name, section + +Name endp + +Section ends + + endm + +NESTED_END_MARKED macro Name, section + public Name&_End +Name&_End label qword + +Name endp + +Section ends + + endm + + +ALTERNATE_ENTRY macro Name + +Name label proc +PUBLIC Name + endm + +LABELED_RETURN_ADDRESS macro Name + +Name label proc +PUBLIC Name + endm + +EXPORT_POINTER_TO_ADDRESS macro Name + + local AddressToExport + +AddressToExport label proc + + .const + + align 8 + +Name dq offset AddressToExport + + public Name + + .code + + endm + +_tls_array equ 58h ;; offsetof(TEB, ThreadLocalStoragePointer) + +;; +;; __declspec(thread) version +;; +INLINE_GETTHREAD macro destReg, trashReg + EXTERN _tls_index : DWORD + EXTERN tls_CurrentThread:DWORD + +;; +;; construct 'eax' from 'rax' so that the register size and data size match +;; +;; BEWARE: currently only r10 is allowed as destReg from the r8-r15 set. +;; +ifidni , +destRegDWORD EQU r10d +else +destRegDWORD TEXTEQU @CatStr( e, @SubStr( destReg, 2, 2 ) ) +endif + + mov destRegDWORD, [_tls_index] + mov trashReg, gs:[_tls_array] + mov trashReg, [trashReg + destReg * 8] + mov destRegDWORD, SECTIONREL tls_CurrentThread + add destReg, trashReg + +endm + +INLINE_THREAD_UNHIJACK macro threadReg, trashReg1, trashReg2 + ;; + ;; Thread::Unhijack() + ;; + mov trashReg1, [threadReg + OFFSETOF__Thread__m_pvHijackedReturnAddress] + cmp trashReg1, 0 + je @F + + mov trashReg2, [threadReg + OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation] + mov [trashReg2], trashReg1 + mov qword ptr [threadReg + OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation], 0 + mov qword ptr [threadReg + OFFSETOF__Thread__m_pvHijackedReturnAddress], 0 + +@@: +endm + +DEFAULT_FRAME_SAVE_FLAGS equ PTFF_SAVE_ALL_PRESERVED + PTFF_SAVE_RSP + +;; +;; Macro used from unmanaged helpers called from managed code where the helper does not transition immediately +;; into pre-emptive mode but may cause a GC and thus requires the stack is crawlable. This is typically the +;; case for helpers that meddle in GC state (e.g. allocation helpers) where the code must remain in +;; cooperative mode since it handles object references and internal GC state directly but a garbage collection +;; may be inevitable. In these cases we need to be able to transition to pre-meptive mode deep within the +;; unmanaged code but still be able to initialize the stack iterator at the first stack frame which may hold +;; interesting GC references. In all our helper cases this corresponds to the most recent managed frame (e.g. +;; the helper's caller). +;; +;; This macro builds a frame describing the current state of managed code. +;; +;; INVARIANTS +;; - The macro assumes it is called from a prolog, prior to a frame pointer being setup. +;; - All preserved registers remain unchanged from their values in managed code. +;; +PUSH_COOP_PINVOKE_FRAME macro trashReg + lea trashReg, [rsp + 8h] + push_vol_reg trashReg ; save caller's RSP + push_nonvol_reg r15 ; save preserved registers + push_nonvol_reg r14 ; .. + push_nonvol_reg r13 ; .. + push_nonvol_reg r12 ; .. + push_nonvol_reg rdi ; .. + push_nonvol_reg rsi ; .. + push_nonvol_reg rbx ; .. + push_imm DEFAULT_FRAME_SAVE_FLAGS ; save the register bitmask + push_vol_reg trashReg ; Thread * (unused by stackwalker) + push_nonvol_reg rbp ; save caller's RBP + mov trashReg, [rsp + 11*8] ; Find the return address + push_vol_reg trashReg ; save m_RIP + lea trashReg, [rsp + 0] ; trashReg == address of frame + + ;; allocate scratch space and any required alignment + alloc_stack 28h +endm + +;; +;; Pop the frame and restore register state preserved by PUSH_COOP_PINVOKE_FRAME +;; +POP_COOP_PINVOKE_FRAME macro + add rsp, 30h + pop rbp ; restore RBP + pop r10 ; discard thread + pop r10 ; discard bitmask + pop rbx + pop rsi + pop rdi + pop r12 + pop r13 + pop r14 + pop r15 + pop r10 ; discard caller RSP +endm + +; - TAILCALL_RAX: ("jmp rax") should be used for tailcalls, this emits an instruction +; sequence which is recognized by the unwinder as a valid epilogue terminator +TAILJMP_RAX TEXTEQU + +;; +;; CONSTANTS -- INTEGER +;; +TSF_Attached equ 01h +TSF_SuppressGcStress equ 08h +TSF_DoNotTriggerGc equ 10h + +;; +;; Rename fields of nested structs +;; +OFFSETOF__Thread__m_alloc_context__alloc_ptr equ OFFSETOF__Thread__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr +OFFSETOF__Thread__m_alloc_context__alloc_limit equ OFFSETOF__Thread__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_limit + + + +;; GC type flags +GC_ALLOC_FINALIZE equ 1 + +;; Note: these must match the defs in PInvokeTransitionFrameFlags +PTFF_SAVE_RBX equ 00000001h +PTFF_SAVE_RSI equ 00000002h +PTFF_SAVE_RDI equ 00000004h +PTFF_SAVE_R12 equ 00000010h +PTFF_SAVE_R13 equ 00000020h +PTFF_SAVE_R14 equ 00000040h +PTFF_SAVE_R15 equ 00000080h +PTFF_SAVE_ALL_PRESERVED equ 000000F7h ;; NOTE: RBP is not included in this set! +PTFF_SAVE_RSP equ 00008000h +PTFF_SAVE_RAX equ 00000100h ;; RAX is saved if it contains a GC ref and we're in hijack handler +PTFF_SAVE_ALL_SCRATCH equ 00007F00h +PTFF_RAX_IS_GCREF equ 00010000h ;; iff PTFF_SAVE_RAX: set -> eax is Object, clear -> eax is scalar +PTFF_RAX_IS_BYREF equ 00020000h ;; iff PTFF_SAVE_RAX: set -> eax is ByRef, clear -> eax is Object or scalar +PTFF_THREAD_ABORT equ 00040000h ;; indicates that ThreadAbortException should be thrown when returning from the transition + +;; These must match the TrapThreadsFlags enum +TrapThreadsFlags_None equ 0 +TrapThreadsFlags_AbortInProgress equ 1 +TrapThreadsFlags_TrapThreads equ 2 + +;; This must match HwExceptionCode.STATUS_REDHAWK_THREAD_ABORT +STATUS_REDHAWK_THREAD_ABORT equ 43h + +;; +;; CONSTANTS -- SYMBOLS +;; + +ifdef FEATURE_GC_STRESS +REDHAWKGCINTERFACE__STRESSGC equ ?StressGc@RedhawkGCInterface@@SAXXZ +THREAD__HIJACKFORGCSTRESS equ ?HijackForGcStress@Thread@@SAXPEAUPAL_LIMITED_CONTEXT@@@Z +endif ;; FEATURE_GC_STRESS + +;; +;; IMPORTS +;; + +EXTERN RhpGcAlloc : PROC +EXTERN RhpValidateExInfoPop : PROC +EXTERN RhDebugBreak : PROC +EXTERN RhpWaitForSuspend2 : PROC +EXTERN RhpWaitForGC2 : PROC +EXTERN RhpReversePInvokeAttachOrTrapThread2 : PROC +EXTERN RhExceptionHandling_FailedAllocation : PROC +EXTERN RhpPublishObject : PROC +EXTERN RhpCalculateStackTraceWorker : PROC +EXTERN RhThrowHwEx : PROC +EXTERN RhThrowEx : PROC +EXTERN RhRethrow : PROC +ifdef FEATURE_GC_STRESS +EXTERN REDHAWKGCINTERFACE__STRESSGC : PROC +EXTERN THREAD__HIJACKFORGCSTRESS : PROC +endif ;; FEATURE_GC_STRESS + +EXTERN g_lowest_address : QWORD +EXTERN g_highest_address : QWORD +EXTERN g_ephemeral_low : QWORD +EXTERN g_ephemeral_high : QWORD +EXTERN g_card_table : QWORD +EXTERN RhpTrapThreads : DWORD + diff --git a/src/coreclr/src/nativeaot/Runtime/amd64/AsmOffsetsCpu.h b/src/coreclr/src/nativeaot/Runtime/amd64/AsmOffsetsCpu.h new file mode 100644 index 0000000000000..2239433993f08 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/amd64/AsmOffsetsCpu.h @@ -0,0 +1,121 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// +// This file is used by AsmOffsets.h to validate that our +// assembly-code offsets always match their C++ counterparts. +// +// NOTE: the offsets MUST be in hex notation WITHOUT the 0x prefix + +#ifndef UNIX_AMD64_ABI +PLAT_ASM_SIZEOF(260, ExInfo) +PLAT_ASM_OFFSET(0, ExInfo, m_pPrevExInfo) +PLAT_ASM_OFFSET(8, ExInfo, m_pExContext) +PLAT_ASM_OFFSET(10, ExInfo, m_exception) +PLAT_ASM_OFFSET(18, ExInfo, m_kind) +PLAT_ASM_OFFSET(19, ExInfo, m_passNumber) +PLAT_ASM_OFFSET(1c, ExInfo, m_idxCurClause) +PLAT_ASM_OFFSET(20, ExInfo, m_frameIter) +PLAT_ASM_OFFSET(250, ExInfo, m_notifyDebuggerSP) + +PLAT_ASM_OFFSET(0, PInvokeTransitionFrame, m_RIP) +PLAT_ASM_OFFSET(8, PInvokeTransitionFrame, m_FramePointer) +PLAT_ASM_OFFSET(10, PInvokeTransitionFrame, m_pThread) +PLAT_ASM_OFFSET(18, PInvokeTransitionFrame, m_Flags) +PLAT_ASM_OFFSET(20, PInvokeTransitionFrame, m_PreservedRegs) + +PLAT_ASM_SIZEOF(230, StackFrameIterator) +PLAT_ASM_OFFSET(10, StackFrameIterator, m_FramePointer) +PLAT_ASM_OFFSET(18, StackFrameIterator, m_ControlPC) +PLAT_ASM_OFFSET(20, StackFrameIterator, m_RegDisplay) +PLAT_ASM_OFFSET(228, StackFrameIterator, m_OriginalControlPC) + +PLAT_ASM_SIZEOF(100, PAL_LIMITED_CONTEXT) +PLAT_ASM_OFFSET(0, PAL_LIMITED_CONTEXT, IP) + +PLAT_ASM_OFFSET(8, PAL_LIMITED_CONTEXT, Rsp) +PLAT_ASM_OFFSET(10, PAL_LIMITED_CONTEXT, Rbp) +PLAT_ASM_OFFSET(18, PAL_LIMITED_CONTEXT, Rdi) +PLAT_ASM_OFFSET(20, PAL_LIMITED_CONTEXT, Rsi) +PLAT_ASM_OFFSET(28, PAL_LIMITED_CONTEXT, Rax) +PLAT_ASM_OFFSET(30, PAL_LIMITED_CONTEXT, Rbx) + +PLAT_ASM_OFFSET(38, PAL_LIMITED_CONTEXT, R12) +PLAT_ASM_OFFSET(40, PAL_LIMITED_CONTEXT, R13) +PLAT_ASM_OFFSET(48, PAL_LIMITED_CONTEXT, R14) +PLAT_ASM_OFFSET(50, PAL_LIMITED_CONTEXT, R15) +PLAT_ASM_OFFSET(60, PAL_LIMITED_CONTEXT, Xmm6) +PLAT_ASM_OFFSET(70, PAL_LIMITED_CONTEXT, Xmm7) +PLAT_ASM_OFFSET(80, PAL_LIMITED_CONTEXT, Xmm8) +PLAT_ASM_OFFSET(90, PAL_LIMITED_CONTEXT, Xmm9) +PLAT_ASM_OFFSET(0a0, PAL_LIMITED_CONTEXT, Xmm10) +PLAT_ASM_OFFSET(0b0, PAL_LIMITED_CONTEXT, Xmm11) +PLAT_ASM_OFFSET(0c0, PAL_LIMITED_CONTEXT, Xmm12) +PLAT_ASM_OFFSET(0d0, PAL_LIMITED_CONTEXT, Xmm13) +PLAT_ASM_OFFSET(0e0, PAL_LIMITED_CONTEXT, Xmm14) +PLAT_ASM_OFFSET(0f0, PAL_LIMITED_CONTEXT, Xmm15) + +PLAT_ASM_SIZEOF(130, REGDISPLAY) +PLAT_ASM_OFFSET(78, REGDISPLAY, SP) + +PLAT_ASM_OFFSET(18, REGDISPLAY, pRbx) +PLAT_ASM_OFFSET(20, REGDISPLAY, pRbp) +PLAT_ASM_OFFSET(28, REGDISPLAY, pRsi) +PLAT_ASM_OFFSET(30, REGDISPLAY, pRdi) +PLAT_ASM_OFFSET(58, REGDISPLAY, pR12) +PLAT_ASM_OFFSET(60, REGDISPLAY, pR13) +PLAT_ASM_OFFSET(68, REGDISPLAY, pR14) +PLAT_ASM_OFFSET(70, REGDISPLAY, pR15) +PLAT_ASM_OFFSET(90, REGDISPLAY, Xmm) + +#else // !UNIX_AMD64_ABI + +PLAT_ASM_SIZEOF(1a8, ExInfo) +PLAT_ASM_OFFSET(0, ExInfo, m_pPrevExInfo) +PLAT_ASM_OFFSET(8, ExInfo, m_pExContext) +PLAT_ASM_OFFSET(10, ExInfo, m_exception) +PLAT_ASM_OFFSET(18, ExInfo, m_kind) +PLAT_ASM_OFFSET(19, ExInfo, m_passNumber) +PLAT_ASM_OFFSET(1c, ExInfo, m_idxCurClause) +PLAT_ASM_OFFSET(20, ExInfo, m_frameIter) +PLAT_ASM_OFFSET(1a0, ExInfo, m_notifyDebuggerSP) + +PLAT_ASM_OFFSET(0, PInvokeTransitionFrame, m_RIP) +PLAT_ASM_OFFSET(8, PInvokeTransitionFrame, m_FramePointer) +PLAT_ASM_OFFSET(10, PInvokeTransitionFrame, m_pThread) +PLAT_ASM_OFFSET(18, PInvokeTransitionFrame, m_Flags) +PLAT_ASM_OFFSET(20, PInvokeTransitionFrame, m_PreservedRegs) + +PLAT_ASM_SIZEOF(180, StackFrameIterator) +PLAT_ASM_OFFSET(10, StackFrameIterator, m_FramePointer) +PLAT_ASM_OFFSET(18, StackFrameIterator, m_ControlPC) +PLAT_ASM_OFFSET(20, StackFrameIterator, m_RegDisplay) +PLAT_ASM_OFFSET(178, StackFrameIterator, m_OriginalControlPC) + +PLAT_ASM_SIZEOF(50, PAL_LIMITED_CONTEXT) +PLAT_ASM_OFFSET(0, PAL_LIMITED_CONTEXT, IP) + +PLAT_ASM_OFFSET(8, PAL_LIMITED_CONTEXT, Rsp) +PLAT_ASM_OFFSET(10, PAL_LIMITED_CONTEXT, Rbp) +PLAT_ASM_OFFSET(18, PAL_LIMITED_CONTEXT, Rax) +PLAT_ASM_OFFSET(20, PAL_LIMITED_CONTEXT, Rbx) +PLAT_ASM_OFFSET(28, PAL_LIMITED_CONTEXT, Rdx) + +PLAT_ASM_OFFSET(30, PAL_LIMITED_CONTEXT, R12) +PLAT_ASM_OFFSET(38, PAL_LIMITED_CONTEXT, R13) +PLAT_ASM_OFFSET(40, PAL_LIMITED_CONTEXT, R14) +PLAT_ASM_OFFSET(48, PAL_LIMITED_CONTEXT, R15) + +PLAT_ASM_SIZEOF(90, REGDISPLAY) +PLAT_ASM_OFFSET(78, REGDISPLAY, SP) + +PLAT_ASM_OFFSET(18, REGDISPLAY, pRbx) +PLAT_ASM_OFFSET(20, REGDISPLAY, pRbp) +PLAT_ASM_OFFSET(28, REGDISPLAY, pRsi) +PLAT_ASM_OFFSET(30, REGDISPLAY, pRdi) +PLAT_ASM_OFFSET(58, REGDISPLAY, pR12) +PLAT_ASM_OFFSET(60, REGDISPLAY, pR13) +PLAT_ASM_OFFSET(68, REGDISPLAY, pR14) +PLAT_ASM_OFFSET(70, REGDISPLAY, pR15) + +#endif // !UNIX_AMD64_ABI diff --git a/src/coreclr/src/nativeaot/Runtime/amd64/CallDescrWorker.S b/src/coreclr/src/nativeaot/Runtime/amd64/CallDescrWorker.S new file mode 100644 index 0000000000000..483d4b5f9ac8c --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/amd64/CallDescrWorker.S @@ -0,0 +1,13 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +.intel_syntax noprefix +#include + +NESTED_ENTRY RhCallDescrWorker, _TEXT, NoHandler + + EXPORT_POINTER_TO_ADDRESS PointerToReturnFromCallDescrThunk + + // UNIXTODO: Implement this function + int 3 +NESTED_END RhCallDescrWorker, _TEXT diff --git a/src/coreclr/src/nativeaot/Runtime/amd64/CallDescrWorker.asm b/src/coreclr/src/nativeaot/Runtime/amd64/CallDescrWorker.asm new file mode 100644 index 0000000000000..85c8e2dd52c5c --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/amd64/CallDescrWorker.asm @@ -0,0 +1,105 @@ +;; Licensed to the .NET Foundation under one or more agreements. +;; The .NET Foundation licenses this file to you under the MIT license. + +include AsmMacros.inc + + +;;;;;;;;;;;;;;;;;;;;;;; CallingConventionConverter Thunks Helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;extern "C" void RhCallDescrWorker(CallDescrData * pCallDescrData); + + NESTED_ENTRY RhCallDescrWorker, _TEXT + + push_nonvol_reg rbx ; save nonvolatile registers + push_nonvol_reg rsi ; + push_nonvol_reg rbp ; + set_frame rbp, 0 ; set frame pointer + + END_PROLOGUE + + mov rbx, rcx ; save pCallDescrData in rbx + + mov ecx, dword ptr [rbx + OFFSETOF__CallDescrData__numStackSlots] + + test ecx, 1 + jz StackAligned + push rax +StackAligned: + + mov rsi, [rbx + OFFSETOF__CallDescrData__pSrc] ; set source argument list address + lea rsi, [rsi + 8 * rcx] + +StackCopyLoop: ; copy the arguments to stack top-down to carefully probe for sufficient + ; stack space + sub rsi, 8 + push qword ptr [rsi] + dec ecx + jnz StackCopyLoop + + ; + ; N.B. All four argument registers are loaded regardless of the actual number + ; of arguments. + ; + + mov rax, [rbx + OFFSETOF__CallDescrData__pFloatArgumentRegisters] ; get floating pointer arg registers pointer + + mov rcx, 0[rsp] ; load first four argument registers + mov rdx, 8[rsp] ; + mov r8, 10h[rsp] ; + mov r9, 18h[rsp] ; + test rax, rax ; + jz DoCall ; + movdqa xmm0, [rax + 00h] ; load floating point registers if they are used + movdqa xmm1, [rax + 10h] ; + movdqa xmm2, [rax + 20h] ; + movdqa xmm3, [rax + 30h] ; +DoCall: + call qword ptr [rbx + OFFSETOF__CallDescrData__pTarget] ; call target function + + EXPORT_POINTER_TO_ADDRESS PointerToReturnFromCallDescrThunk + + ; Symbol used to identify thunk call to managed function so the special + ; case unwinder can unwind through this function. Sadly we cannot directly + ; export this symbol right now because it confuses DIA unwinder to believe + ; it's the beginning of a new method, therefore we export the address + ; of an auxiliary variable holding the address instead. + + ; Save FP return value + + mov ecx, dword ptr [rbx + OFFSETOF__CallDescrData__fpReturnSize] + test ecx, ecx + jz ReturnsInt + + cmp ecx, 4 + je ReturnsFloat + cmp ecx, 8 + je ReturnsDouble + ; unexpected + jmp Epilog + +ReturnsInt: + mov rbx, [rbx + OFFSETOF__CallDescrData__pReturnBuffer] + mov [rbx], rax + +Epilog: + lea rsp, 0[rbp] ; deallocate argument list + pop rbp ; restore nonvolatile register + pop rsi ; + pop rbx ; + ret + +ReturnsFloat: +; Unlike desktop returnValue is a pointer to a return buffer, not the buffer itself + mov rbx, [rbx + OFFSETOF__CallDescrData__pReturnBuffer] + movss real4 ptr [rbx], xmm0 + jmp Epilog + +ReturnsDouble: +; Unlike desktop returnValue is a pointer to a return buffer, not the buffer itself + mov rbx, [rbx + OFFSETOF__CallDescrData__pReturnBuffer] + movsd real8 ptr [rbx], xmm0 + jmp Epilog + + NESTED_END RhCallDescrWorker, _TEXT + +end diff --git a/src/coreclr/src/nativeaot/Runtime/amd64/CallingConventionConverterHelpers.S b/src/coreclr/src/nativeaot/Runtime/amd64/CallingConventionConverterHelpers.S new file mode 100644 index 0000000000000..2a55819057212 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/amd64/CallingConventionConverterHelpers.S @@ -0,0 +1,57 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +.intel_syntax noprefix +#include + +// +// void CallingConventionConverter_ReturnVoidReturnThunk() +// +LEAF_ENTRY CallingConventionConverter_ReturnVoidReturnThunk, _TEXT + ret +LEAF_END CallingConventionConverter_ReturnVoidReturnThunk, _TEXT + +// +// int CallingConventionConverter_ReturnIntegerReturnThunk(int) +// +LEAF_ENTRY CallingConventionConverter_ReturnIntegerReturnThunk, _TEXT + // UNIXTODO: Implement this function + int 3 +LEAF_END CallingConventionConverter_ReturnIntegerReturnThunk, _TEXT + +// +// Note: The "__jmpstub__" prefix is used to indicate to debugger +// that it must step-through this stub when it encounters it while +// stepping. +// + +// __jmpstub__CallingConventionConverter_CommonCallingStub +// +// +// struct CallingConventionConverter_CommonCallingStub_PointerData +// { +// void *ManagedCallConverterThunk; +// void *UniversalThunk; +// } +// +// struct CommonCallingStubInputData +// { +// ULONG_PTR CallingConventionId; +// CallingConventionConverter_CommonCallingStub_PointerData *commonData; +// } +// +// r10 - Points at CommonCallingStubInputData +// +// +LEAF_ENTRY __jmpstub__CallingConventionConverter_CommonCallingStub, _TEXT + // UNIXTODO: Implement this function + int 3 +LEAF_END __jmpstub__CallingConventionConverter_CommonCallingStub, _TEXT + +// +// void CallingConventionConverter_GetStubs(IntPtr *returnVoidStub, IntPtr *returnIntegerStub, IntPtr *commonStub) +// +LEAF_ENTRY CallingConventionConverter_GetStubs, _TEXT + // UNIXTODO: Implement this function + int 3 +LEAF_END CallingConventionConverter_GetStubs, _TEXT diff --git a/src/coreclr/src/nativeaot/Runtime/amd64/CallingConventionConverterHelpers.asm b/src/coreclr/src/nativeaot/Runtime/amd64/CallingConventionConverterHelpers.asm new file mode 100644 index 0000000000000..a29d2202e5b62 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/amd64/CallingConventionConverterHelpers.asm @@ -0,0 +1,85 @@ +;; Licensed to the .NET Foundation under one or more agreements. +;; The .NET Foundation licenses this file to you under the MIT license. + +;; ----------------------------------------------------------------------------------------------------------- +;; #include "asmmacros.inc" +;; ----------------------------------------------------------------------------------------------------------- + +LEAF_ENTRY macro Name, Section + Section segment para 'CODE' + align 16 + public Name + Name proc +endm + +LEAF_END macro Name, Section + Name endp + Section ends +endm + +; - TAILCALL_RAX: ("jmp rax") should be used for tailcalls, this emits an instruction +; sequence which is recognized by the unwinder as a valid epilogue terminator +TAILJMP_RAX TEXTEQU +POINTER_SIZE equ 08h + +;; +;; void CallingConventionConverter_ReturnVoidReturnThunk() +;; +LEAF_ENTRY CallingConventionConverter_ReturnVoidReturnThunk, _TEXT + ret +LEAF_END CallingConventionConverter_ReturnVoidReturnThunk, _TEXT + +;; +;; int CallingConventionConverter_ReturnIntegerReturnThunk(int) +;; +LEAF_ENTRY CallingConventionConverter_ReturnIntegerReturnThunk, _TEXT + mov rax, rcx + ret +LEAF_END CallingConventionConverter_ReturnIntegerReturnThunk, _TEXT + +;; +;; Note: The "__jmpstub__" prefix is used to indicate to debugger +;; that it must step-through this stub when it encounters it while +;; stepping. +;; + +;; __jmpstub__CallingConventionConverter_CommonCallingStub +;; +;; +;; struct CallingConventionConverter_CommonCallingStub_PointerData +;; { +;; void *ManagedCallConverterThunk; +;; void *UniversalThunk; +;; } +;; +;; struct CommonCallingStubInputData +;; { +;; ULONG_PTR CallingConventionId; +;; CallingConventionConverter_CommonCallingStub_PointerData *commonData; +;; } +;; +;; r10 - Points at CommonCallingStubInputData +;; +;; +LEAF_ENTRY __jmpstub__CallingConventionConverter_CommonCallingStub, _TEXT + mov r11, [r10] ; put CallingConventionId into r11 as "parameter" to universal transition thunk + mov r10, [r10 + POINTER_SIZE] ; get pointer to CallingConventionConverter_CommonCallingStub_PointerData into r10 + mov rax, [r10 + POINTER_SIZE] ; get address of UniversalTransitionThunk + mov r10, [r10] ; get address of ManagedCallConverterThunk + TAILJMP_RAX +LEAF_END __jmpstub__CallingConventionConverter_CommonCallingStub, _TEXT + +;; +;; void CallingConventionConverter_GetStubs(IntPtr *returnVoidStub, IntPtr *returnIntegerStub, IntPtr *commonStub) +;; +LEAF_ENTRY CallingConventionConverter_GetStubs, _TEXT + lea rax, [CallingConventionConverter_ReturnVoidReturnThunk] + mov [rcx], rax + lea rax, [CallingConventionConverter_ReturnIntegerReturnThunk] + mov [rdx], rax + lea rax, [__jmpstub__CallingConventionConverter_CommonCallingStub] + mov [r8], rax + ret +LEAF_END CallingConventionConverter_GetStubs, _TEXT + +end diff --git a/src/coreclr/src/nativeaot/Runtime/amd64/ExceptionHandling.S b/src/coreclr/src/nativeaot/Runtime/amd64/ExceptionHandling.S new file mode 100644 index 0000000000000..6c67859090d04 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/amd64/ExceptionHandling.S @@ -0,0 +1,534 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +.intel_syntax noprefix +#include // generated by the build from AsmOffsets.cpp +#include + +////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// +// RhpThrowHwEx +// +// INPUT: RDI: exception code of fault +// RSI: faulting RIP +// +// OUTPUT: +// +////////////////////////////////////////////////////////////////////////////////////////////////////////////// +NESTED_ENTRY RhpThrowHwEx, _TEXT, NoHandler + + STACKSIZEOF_ExInfo = ((SIZEOF__ExInfo + 15) & (~15)) + rsp_offsetof_Context = STACKSIZEOF_ExInfo + + mov rax, rsp // save the faulting RSP + + // Align the stack towards zero + and rsp, -16 + + xor rdx, rdx + +// struct PAL_LIMITED_CONTEXT +// { + push_nonvol_reg r15 + push_nonvol_reg r14 + push_nonvol_reg r13 + push_nonvol_reg r12 + push_register rdx // rdx set to 0 + push_nonvol_reg rbx + push_register rdx // rax set to 0 + push_nonvol_reg rbp + push_register rax // faulting RSP + push_register rsi // faulting IP +// } + // allocate outgoing args area and space for the ExInfo + alloc_stack STACKSIZEOF_ExInfo + + END_PROLOGUE + + mov rbx, rdi + INLINE_GETTHREAD + mov rdi, rbx + + mov rsi, rsp // rsi <- ExInfo* + + xor rdx, rdx + mov [rsi + OFFSETOF__ExInfo__m_exception], rdx // init the exception object to null + mov byte ptr [rsi + OFFSETOF__ExInfo__m_passNumber], 1 // init to the first pass + mov dword ptr [rsi + OFFSETOF__ExInfo__m_idxCurClause], 0xFFFFFFFF + mov byte ptr [rsi + OFFSETOF__ExInfo__m_kind], 2 // ExKind.HardwareFault + + // link the ExInfo into the thread's ExInfo chain + mov rdx, [rax + OFFSETOF__Thread__m_pExInfoStackHead] + mov [rsi + OFFSETOF__ExInfo__m_pPrevExInfo], rdx // pExInfo->m_pPrevExInfo = m_pExInfoStackHead + mov [rax + OFFSETOF__Thread__m_pExInfoStackHead], rsi // m_pExInfoStackHead = pExInfo + + // set the exception context field on the ExInfo + lea rdx, [rsp + rsp_offsetof_Context] // rdx <- PAL_LIMITED_CONTEXT* + mov [rsi + OFFSETOF__ExInfo__m_pExContext], rdx // init ExInfo.m_pExContext + + // rdi still contains the exception code + // rsi contains the address of the ExInfo + call EXTERNAL_C_FUNC(RhThrowHwEx) + + EXPORT_POINTER_TO_ADDRESS PointerToRhpThrowHwEx2 + + // no return + int 3 + +NESTED_END RhpThrowHwEx, _TEXT + + +////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// +// RhpThrowEx +// +// INPUT: RDI: exception object +// +// OUTPUT: +// +////////////////////////////////////////////////////////////////////////////////////////////////////////////// +NESTED_ENTRY RhpThrowEx, _TEXT, NoHandler + + STACKSIZEOF_ExInfo = ((SIZEOF__ExInfo + 15) & (~ 15)) + rsp_offsetof_Context = STACKSIZEOF_ExInfo + + lea rax, [rsp+8] // save the RSP of the throw site + mov rsi, [rsp] // get return address + + xor rdx, rdx + push_register rdx // padding + +// struct PAL_LIMITED_CONTEXT +// { + push_nonvol_reg r15 + push_nonvol_reg r14 + push_nonvol_reg r13 + push_nonvol_reg r12 + push_register rdx // rdx set to 0 + push_nonvol_reg rbx + push_register rdx // rax set to 0 + push_nonvol_reg rbp + push_register rax // 'faulting' RSP + push_register rsi // 'faulting' IP +// } + + // allocate space for the ExInfo + alloc_stack STACKSIZEOF_ExInfo + + END_PROLOGUE + + mov rbx, rdi + INLINE_GETTHREAD + mov rdi, rbx + + lea rbx, [rsp + rsp_offsetof_Context + SIZEOF__PAL_LIMITED_CONTEXT + 0x8] // rbx <- addr of return address + + // There is runtime C# code that can tail call to RhpThrowEx using a binder intrinsic. So the return + // address could have been hijacked when we were in that C# code and we must remove the hijack and + // reflect the correct return address in our exception context record. The other throw helpers don't + // need this because they cannot be tail-called from C#. + INLINE_THREAD_UNHIJACK rax, rcx, rsi // trashes RCX, RSI + mov rsi, [rbx] // rdx <- return address + mov [rsp + rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__IP], rsi // set 'faulting' IP after unhijack + + mov rsi, rsp // rsi <- ExInfo* + + mov [rsi + OFFSETOF__ExInfo__m_exception], rdx // init the exception object to null + mov byte ptr [rsi + OFFSETOF__ExInfo__m_passNumber], 1 // init to the first pass + mov dword ptr [rsi + OFFSETOF__ExInfo__m_idxCurClause], 0xFFFFFFFF + mov byte ptr [rsi + OFFSETOF__ExInfo__m_kind], 1 // ExKind.Throw + + // link the ExInfo into the thread's ExInfo chain + mov rdx, [rax + OFFSETOF__Thread__m_pExInfoStackHead] + mov [rsi + OFFSETOF__ExInfo__m_pPrevExInfo], rdx // pExInfo->m_pPrevExInfo = m_pExInfoStackHead + mov [rax + OFFSETOF__Thread__m_pExInfoStackHead], rsi // m_pExInfoStackHead = pExInfo + + // set the exception context field on the ExInfo + lea rdx, [rsp + rsp_offsetof_Context] // rdx <- PAL_LIMITED_CONTEXT* + mov [rsi + OFFSETOF__ExInfo__m_pExContext], rdx // init ExInfo.m_pExContext + + // rdi still contains the exception object + // rsi contains the address of the ExInfo + call EXTERNAL_C_FUNC(RhThrowEx) + + EXPORT_POINTER_TO_ADDRESS PointerToRhpThrowEx2 + + // no return + int 3 + +NESTED_END RhpThrowEx, _TEXT + +////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// +// void FASTCALL RhpRethrow() +// +// SUMMARY: Similar to RhpThrowEx, except that it passes along the currently active ExInfo +// +// INPUT: +// +// OUTPUT: +// +////////////////////////////////////////////////////////////////////////////////////////////////////////////// +NESTED_ENTRY RhpRethrow, _TEXT, NoHandler + + STACKSIZEOF_ExInfo = ((SIZEOF__ExInfo + 15) & (~ 15)) + rsp_offsetof_Context = STACKSIZEOF_ExInfo + + lea rax, [rsp+8] // save the RSP of the throw site + mov rsi, [rsp] // get return address + + xor rdx, rdx + push_register rdx // padding + +// struct PAL_LIMITED_CONTEXT +// { + push_nonvol_reg r15 + push_nonvol_reg r14 + push_nonvol_reg r13 + push_nonvol_reg r12 + push_register rdx // rdx set to 0 + push_nonvol_reg rbx + push_register rdx // rax set to 0 + push_nonvol_reg rbp + push_register rax // 'faulting' RSP + push_register rsi // 'faulting' IP +// } + + // allocate space for the ExInfo + alloc_stack STACKSIZEOF_ExInfo + + END_PROLOGUE + + INLINE_GETTHREAD + + mov rsi, rsp // rsi <- ExInfo* + + mov [rsi + OFFSETOF__ExInfo__m_exception], rdx // init the exception object to null + mov byte ptr [rsi + OFFSETOF__ExInfo__m_passNumber], 1 // init to the first pass + mov dword ptr [rsi + OFFSETOF__ExInfo__m_idxCurClause], 0xFFFFFFFF + mov byte ptr [rsi + OFFSETOF__ExInfo__m_kind], 0 // init to a deterministic value (ExKind.None) + + + // link the ExInfo into the thread's ExInfo chain + mov rdi, [rax + OFFSETOF__Thread__m_pExInfoStackHead] // rdi <- currently active ExInfo + mov [rsi + OFFSETOF__ExInfo__m_pPrevExInfo], rdi // pExInfo->m_pPrevExInfo = m_pExInfoStackHead + mov [rax + OFFSETOF__Thread__m_pExInfoStackHead], rsi // m_pExInfoStackHead = pExInfo + + // set the exception context field on the ExInfo + lea rdx, [rsp + rsp_offsetof_Context] // rdx <- PAL_LIMITED_CONTEXT* + mov [rsi + OFFSETOF__ExInfo__m_pExContext], rdx // init ExInfo.m_pExContext + + // rdi contains the currently active ExInfo + // rsi contains the address of the new ExInfo + call EXTERNAL_C_FUNC(RhRethrow) + + EXPORT_POINTER_TO_ADDRESS PointerToRhpRethrow2 + + // no return + int 3 + +NESTED_END RhpRethrow, _TEXT + +// +// Prologue of all funclet calling helpers (RhpCallXXXXFunclet) +// +.macro FUNCLET_CALL_PROLOGUE localsCount, alignStack + + push_nonvol_reg r15 // save preserved regs for OS stackwalker + push_nonvol_reg r14 // ... + push_nonvol_reg r13 // ... + push_nonvol_reg r12 // ... + push_nonvol_reg rbx // ... + push_nonvol_reg rbp // ... + + stack_alloc_size = \localsCount * 8 + \alignStack * 8 + + alloc_stack stack_alloc_size + + END_PROLOGUE +.endm + +// +// Epilogue of all funclet calling helpers (RhpCallXXXXFunclet) +// +.macro FUNCLET_CALL_EPILOGUE + free_stack stack_alloc_size + + pop_nonvol_reg rbp + pop_nonvol_reg rbx + pop_nonvol_reg r12 + pop_nonvol_reg r13 + pop_nonvol_reg r14 + pop_nonvol_reg r15 +.endm + +////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// +// void* FASTCALL RhpCallCatchFunclet(RtuObjectRef exceptionObj, void* pHandlerIP, REGDISPLAY* pRegDisplay, +// ExInfo* pExInfo) +// +// INPUT: RDI: exception object +// RSI: handler funclet address +// RDX: REGDISPLAY* +// RCX: ExInfo* +// +// OUTPUT: +// +////////////////////////////////////////////////////////////////////////////////////////////////////////////// +NESTED_ENTRY RhpCallCatchFunclet, _TEXT, NoHandler + + FUNCLET_CALL_PROLOGUE 6, 1 + + locThread = 0 + locResumeIp = 8 + locArg0 = 0x10 + locArg1 = 0x18 + locArg2 = 0x20 + locArg3 = 0x28 + + mov [rsp + locArg0], rdi // save arguments for later + mov [rsp + locArg1], rsi + mov [rsp + locArg2], rdx + mov [rsp + locArg3], rcx + + mov rbx, rdx + INLINE_GETTHREAD + mov rdx, rbx + + mov [rsp + locThread], rax // save Thread* for later + + // Clear the DoNotTriggerGc state before calling out to our managed catch funclet. + lock and dword ptr [rax + OFFSETOF__Thread__m_ThreadStateFlags], ~TSF_DoNotTriggerGc + + mov rax, [rdx + OFFSETOF__REGDISPLAY__pRbx] + mov rbx, [rax] + mov rax, [rdx + OFFSETOF__REGDISPLAY__pRbp] + mov rbp, [rax] + mov rax, [rdx + OFFSETOF__REGDISPLAY__pR12] + mov r12, [rax] + mov rax, [rdx + OFFSETOF__REGDISPLAY__pR13] + mov r13, [rax] + mov rax, [rdx + OFFSETOF__REGDISPLAY__pR14] + mov r14, [rax] + mov rax, [rdx + OFFSETOF__REGDISPLAY__pR15] + mov r15, [rax] + +#if 0 // _DEBUG // @TODO: temporarily removed because trashing RBP breaks the debugger + // trash the values at the old homes to make sure nobody uses them + mov rcx, 0xbaaddeed + mov rax, [rdx + OFFSETOF__REGDISPLAY__pRbx] + mov [rax], rcx + mov rax, [rdx + OFFSETOF__REGDISPLAY__pRbp] + mov [rax], rcx + mov rax, [rdx + OFFSETOF__REGDISPLAY__pR12] + mov [rax], rcx + mov rax, [rdx + OFFSETOF__REGDISPLAY__pR13] + mov [rax], rcx + mov rax, [rdx + OFFSETOF__REGDISPLAY__pR14] + mov [rax], rcx + mov rax, [rdx + OFFSETOF__REGDISPLAY__pR15] + mov [rax], rcx +#endif + + mov rdi, [rdx + OFFSETOF__REGDISPLAY__SP] // rdi <- establisher frame + mov rsi, [rsp + locArg0] // rsi <- exception object + call qword ptr [rsp + locArg1] // call handler funclet + + EXPORT_POINTER_TO_ADDRESS PointerToRhpCallCatchFunclet2 + + mov rdx, [rsp + locArg2] // rdx <- dispatch context + +#ifdef _DEBUG + // Call into some C++ code to validate the pop of the ExInfo. We only do this in debug because we + // have to spill all the preserved registers and then refill them after the call. + + mov [rsp + locResumeIp], rax // save resume IP for later + + mov rdi, [rdx + OFFSETOF__REGDISPLAY__pRbx] + mov [rdi] , rbx + mov rdi, [rdx + OFFSETOF__REGDISPLAY__pRbp] + mov [rdi] , rbp + mov rdi, [rdx + OFFSETOF__REGDISPLAY__pR12] + mov [rdi] , r12 + mov rdi, [rdx + OFFSETOF__REGDISPLAY__pR13] + mov [rdi] , r13 + mov rdi, [rdx + OFFSETOF__REGDISPLAY__pR14] + mov [rdi] , r14 + mov rdi, [rdx + OFFSETOF__REGDISPLAY__pR15] + mov [rdi] , r15 + + mov rdi, [rsp] // rdi <- Thread* + mov rsi, [rsp + locArg3] // rsi <- current ExInfo * + mov rdx, [rdx + OFFSETOF__REGDISPLAY__SP] // rdx <- resume SP value + call C_FUNC(RhpValidateExInfoPop) + + mov rdx, [rsp + locArg2] // rdx <- dispatch context + mov rax, [rdx + OFFSETOF__REGDISPLAY__pRbx] + mov rbx, [rax] + mov rax, [rdx + OFFSETOF__REGDISPLAY__pRbp] + mov rbp, [rax] + mov rax, [rdx + OFFSETOF__REGDISPLAY__pR12] + mov r12, [rax] + mov rax, [rdx + OFFSETOF__REGDISPLAY__pR13] + mov r13, [rax] + mov rax, [rdx + OFFSETOF__REGDISPLAY__pR14] + mov r14, [rax] + mov rax, [rdx + OFFSETOF__REGDISPLAY__pR15] + mov r15, [rax] + + mov rax, [rsp + locResumeIp] // reload resume IP +#endif + mov rsi, [rsp + locThread] // rsi <- Thread* + + // We must unhijack the thread at this point because the section of stack where the hijack is applied + // may go dead. If it does, then the next time we try to unhijack the thread, it will corrupt the stack. + INLINE_THREAD_UNHIJACK rsi, rdi, rcx // Thread in rsi, trashes rdi and rcx + + mov rdi, [rsp + locArg3] // rdi <- current ExInfo * + mov rdx, [rdx + OFFSETOF__REGDISPLAY__SP] // rdx <- resume SP value + xor ecx, ecx // rcx <- 0 + +LOCAL_LABEL(ExInfoLoop): + mov rdi, [rdi + OFFSETOF__ExInfo__m_pPrevExInfo] // rdi <- next ExInfo + cmp rdi, rcx + je LOCAL_LABEL(ExInfoLoopDone) // we're done if it's null + cmp rdi, rdx + jl LOCAL_LABEL(ExInfoLoop) // keep looping if it's lower than the new SP + +LOCAL_LABEL(ExInfoLoopDone): + mov [rsi + OFFSETOF__Thread__m_pExInfoStackHead], rdi // store the new head on the Thread + + // reset RSP and jump to the continuation address + mov rsp, rdx // reset the SP + jmp rax + +NESTED_END RhpCallCatchFunclet, _TEXT + +////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// +// void FASTCALL RhpCallFinallyFunclet(void* pHandlerIP, REGDISPLAY* pRegDisplay) +// +// INPUT: RDI: handler funclet address +// RSI: REGDISPLAY* +// +// OUTPUT: +// +////////////////////////////////////////////////////////////////////////////////////////////////////////////// +NESTED_ENTRY RhpCallFinallyFunclet, _TEXT, NoHandler + + FUNCLET_CALL_PROLOGUE 3, 0 + + locThread = 0 + locArg0 = 8 + locArg1 = 0x10 + + mov [rsp + locArg0], rdi // save arguments for later + mov [rsp + locArg1], rsi + + mov rbx, rsi + INLINE_GETTHREAD + mov rsi, rbx + + mov [rsp + locThread], rax // save Thread* for later + + // + // We want to suppress hijacking between invocations of subsequent finallys. We do this because we + // cannot tolerate a GC after one finally has run (and possibly side-effected the GC state of the + // method) and then been popped off the stack, leaving behind no trace of its effect. + // + // So we clear the state before and set it after invocation of the handler. + // + lock and dword ptr [rax + OFFSETOF__Thread__m_ThreadStateFlags], ~TSF_DoNotTriggerGc + + mov rax, [rsi + OFFSETOF__REGDISPLAY__pRbx] + mov rbx, [rax] + mov rax, [rsi + OFFSETOF__REGDISPLAY__pRbp] + mov rbp, [rax] + mov rax, [rsi + OFFSETOF__REGDISPLAY__pR12] + mov r12, [rax] + mov rax, [rsi + OFFSETOF__REGDISPLAY__pR13] + mov r13, [rax] + mov rax, [rsi + OFFSETOF__REGDISPLAY__pR14] + mov r14, [rax] + mov rax, [rsi + OFFSETOF__REGDISPLAY__pR15] + mov r15, [rax] + +#if 0 // _DEBUG // @TODO: temporarily removed because trashing RBP breaks the debugger + // trash the values at the old homes to make sure nobody uses them + mov rcx, 0xbaaddeed + mov rax, [rsi + OFFSETOF__REGDISPLAY__pRbx] + mov [rax], rcx + mov rax, [rsi + OFFSETOF__REGDISPLAY__pRbp] + mov [rax], rcx + mov rax, [rsi + OFFSETOF__REGDISPLAY__pR12] + mov [rax], rcx + mov rax, [rsi + OFFSETOF__REGDISPLAY__pR13] + mov [rax], rcx + mov rax, [rsi + OFFSETOF__REGDISPLAY__pR14] + mov [rax], rcx + mov rax, [rsi + OFFSETOF__REGDISPLAY__pR15] + mov [rax], rcx +#endif + + mov rdi, [rsi + OFFSETOF__REGDISPLAY__SP] // rdi <- establisher frame + call qword ptr [rsp + locArg0] // handler funclet address + + EXPORT_POINTER_TO_ADDRESS PointerToRhpCallFinallyFunclet2 + + mov rsi, [rsp + locArg1] // rsi <- regdisplay + + mov rax, [rsi + OFFSETOF__REGDISPLAY__pRbx] + mov [rax] , rbx + mov rax, [rsi + OFFSETOF__REGDISPLAY__pRbp] + mov [rax] , rbp + mov rax, [rsi + OFFSETOF__REGDISPLAY__pR12] + mov [rax] , r12 + mov rax, [rsi + OFFSETOF__REGDISPLAY__pR13] + mov [rax] , r13 + mov rax, [rsi + OFFSETOF__REGDISPLAY__pR14] + mov [rax] , r14 + mov rax, [rsi + OFFSETOF__REGDISPLAY__pR15] + mov [rax] , r15 + + mov rax, [rsp + locThread] // rax <- Thread* + lock or dword ptr [rax + OFFSETOF__Thread__m_ThreadStateFlags], TSF_DoNotTriggerGc + + FUNCLET_CALL_EPILOGUE + + ret + +NESTED_END RhpCallFinallyFunclet, _TEXT + + +////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// +// void* FASTCALL RhpCallFilterFunclet(RtuObjectRef exceptionObj, void* pFilterIP, REGDISPLAY* pRegDisplay) +// +// INPUT: RDI: exception object +// RSI: filter funclet address +// RDX: REGDISPLAY* +// +// OUTPUT: +// +////////////////////////////////////////////////////////////////////////////////////////////////////////////// +NESTED_ENTRY RhpCallFilterFunclet, _TEXT, NoHandler + + FUNCLET_CALL_PROLOGUE 0, 1 + + mov rax, [rdx + OFFSETOF__REGDISPLAY__pRbp] + mov rbp, [rax] + + mov rax, rsi // rax <- handler funclet address + mov rsi, rdi // rsi <- exception object + mov rdi, [rdx + OFFSETOF__REGDISPLAY__SP] // rdi <- establisher frame + call rax + + EXPORT_POINTER_TO_ADDRESS PointerToRhpCallFilterFunclet2 + + // RAX contains the result of the filter execution + + FUNCLET_CALL_EPILOGUE + + ret + +NESTED_END RhpCallFilterFunclet, _TEXT diff --git a/src/coreclr/src/nativeaot/Runtime/amd64/ExceptionHandling.asm b/src/coreclr/src/nativeaot/Runtime/amd64/ExceptionHandling.asm new file mode 100644 index 0000000000000..5e7ed52f1d375 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/amd64/ExceptionHandling.asm @@ -0,0 +1,679 @@ +;; Licensed to the .NET Foundation under one or more agreements. +;; The .NET Foundation licenses this file to you under the MIT license. + +include asmmacros.inc + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; RhpThrowHwEx +;; +;; INPUT: RCX: exception code of fault +;; RDX: faulting RIP +;; +;; OUTPUT: +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +NESTED_ENTRY RhpThrowHwEx, _TEXT + + SIZEOF_XmmSaves equ SIZEOF__PAL_LIMITED_CONTEXT - OFFSETOF__PAL_LIMITED_CONTEXT__Xmm6 + STACKSIZEOF_ExInfo equ ((SIZEOF__ExInfo + 15) AND (NOT 15)) + + SIZEOF_OutgoingScratch equ 20h + rsp_offsetof_ExInfo equ SIZEOF_OutgoingScratch + rsp_offsetof_Context equ SIZEOF_OutgoingScratch + STACKSIZEOF_ExInfo + + mov rax, rsp ;; save the faulting RSP + + ;; Align the stack towards zero + and rsp, -16 + + ;; Push the expected "machine frame" for the unwinder to see. All that it looks at is the faulting + ;; RSP and RIP, so we push zero for the others. + xor r8, r8 + push r8 ;; SS + push rax ;; faulting RSP + pushfq ;; EFLAGS + push r8 ;; CS + push rdx ;; faulting RIP + + ; Tell the unwinder that the frame is there now + .pushframe + + alloc_stack SIZEOF_XmmSaves + 8h ;; reserve stack for the xmm saves (+8h to realign stack) + push_vol_reg r8 ;; padding + push_nonvol_reg r15 + push_nonvol_reg r14 + push_nonvol_reg r13 + push_nonvol_reg r12 + push_nonvol_reg rbx + push_vol_reg r8 + push_nonvol_reg rsi + push_nonvol_reg rdi + push_nonvol_reg rbp + push_vol_reg rax ;; faulting RSP + push_vol_reg rdx ;; faulting IP + + ;; allocate outgoing args area and space for the ExInfo + alloc_stack SIZEOF_OutgoingScratch + STACKSIZEOF_ExInfo + + save_xmm128_postrsp Xmm6 , rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__Xmm6 + save_xmm128_postrsp Xmm7 , rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__Xmm7 + save_xmm128_postrsp Xmm8 , rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__Xmm8 + save_xmm128_postrsp Xmm9 , rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__Xmm9 + save_xmm128_postrsp Xmm10, rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__Xmm10 + save_xmm128_postrsp Xmm11, rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__Xmm11 + save_xmm128_postrsp Xmm12, rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__Xmm12 + save_xmm128_postrsp Xmm13, rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__Xmm13 + save_xmm128_postrsp Xmm14, rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__Xmm14 + save_xmm128_postrsp Xmm15, rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__Xmm15 + + END_PROLOGUE + + INLINE_GETTHREAD rax, rbx ;; rax <- Thread*, rbx is trashed + + lea rdx, [rsp + rsp_offsetof_ExInfo] ;; rdx <- ExInfo* + + xor r8, r8 + mov [rdx + OFFSETOF__ExInfo__m_exception], r8 ;; init the exception object to null + mov byte ptr [rdx + OFFSETOF__ExInfo__m_passNumber], 1 ;; init to the first pass + mov dword ptr [rdx + OFFSETOF__ExInfo__m_idxCurClause], 0FFFFFFFFh + mov byte ptr [rdx + OFFSETOF__ExInfo__m_kind], 2 ;; ExKind.HardwareFault + + ;; link the ExInfo into the thread's ExInfo chain + mov r8, [rax + OFFSETOF__Thread__m_pExInfoStackHead] + mov [rdx + OFFSETOF__ExInfo__m_pPrevExInfo], r8 ;; pExInfo->m_pPrevExInfo = m_pExInfoStackHead + mov [rax + OFFSETOF__Thread__m_pExInfoStackHead], rdx ;; m_pExInfoStackHead = pExInfo + + ;; set the exception context field on the ExInfo + lea r8, [rsp + rsp_offsetof_Context] ;; r8 <- PAL_LIMITED_CONTEXT* + mov [rdx + OFFSETOF__ExInfo__m_pExContext], r8 ;; init ExInfo.m_pExContext + + ;; rcx still contains the exception code + ;; rdx contains the address of the ExInfo + call RhThrowHwEx + + EXPORT_POINTER_TO_ADDRESS PointerToRhpThrowHwEx2 + + ;; no return + int 3 + +NESTED_END RhpThrowHwEx, _TEXT + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; RhpThrowEx +;; +;; INPUT: RCX: exception object +;; +;; OUTPUT: +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +NESTED_ENTRY RhpThrowEx, _TEXT + + SIZEOF_XmmSaves equ SIZEOF__PAL_LIMITED_CONTEXT - OFFSETOF__PAL_LIMITED_CONTEXT__Xmm6 + STACKSIZEOF_ExInfo equ ((SIZEOF__ExInfo + 15) AND (NOT 15)) + + SIZEOF_OutgoingScratch equ 20h + rsp_offsetof_ExInfo equ SIZEOF_OutgoingScratch + rsp_offsetof_Context equ SIZEOF_OutgoingScratch + STACKSIZEOF_ExInfo + + lea rax, [rsp+8] ;; save the RSP of the throw site + mov rdx, [rsp] ;; get return address + + xor r8, r8 + + alloc_stack SIZEOF_XmmSaves + 8h ;; reserve stack for the xmm saves (+8h to realign stack) + push_vol_reg r8 ;; padding + push_nonvol_reg r15 + push_nonvol_reg r14 + push_nonvol_reg r13 + push_nonvol_reg r12 + push_nonvol_reg rbx + push_vol_reg r8 + push_nonvol_reg rsi + push_nonvol_reg rdi + push_nonvol_reg rbp + push_vol_reg rax ;; 'faulting' RSP + push_vol_reg rdx ;; 'faulting' IP + + ;; allocate outgoing args area and space for the ExInfo + alloc_stack SIZEOF_OutgoingScratch + STACKSIZEOF_ExInfo + + save_xmm128_postrsp Xmm6 , rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__Xmm6 + save_xmm128_postrsp Xmm7 , rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__Xmm7 + save_xmm128_postrsp Xmm8 , rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__Xmm8 + save_xmm128_postrsp Xmm9 , rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__Xmm9 + save_xmm128_postrsp Xmm10, rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__Xmm10 + save_xmm128_postrsp Xmm11, rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__Xmm11 + save_xmm128_postrsp Xmm12, rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__Xmm12 + save_xmm128_postrsp Xmm13, rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__Xmm13 + save_xmm128_postrsp Xmm14, rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__Xmm14 + save_xmm128_postrsp Xmm15, rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__Xmm15 + + END_PROLOGUE + + INLINE_GETTHREAD rax, rbx ;; rax <- Thread*, rbx is trashed + + lea rbx, [rsp + rsp_offsetof_Context + SIZEOF__PAL_LIMITED_CONTEXT + 8h] ;; rbx <- addr of return address + + ;; There is runtime C# code that can tail call to RhpThrowEx using a binder intrinsic. So the return + ;; address could have been hijacked when we were in that C# code and we must remove the hijack and + ;; reflect the correct return address in our exception context record. The other throw helpers don't + ;; need this because they cannot be tail-called from C#. + INLINE_THREAD_UNHIJACK rax, r9, rdx ;; trashes R9, RDX + mov rdx, [rbx] ;; rdx <- return address + mov [rsp + rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__IP], rdx ;; set 'faulting' IP after unhijack + + lea rdx, [rsp + rsp_offsetof_ExInfo] ;; rdx <- ExInfo* + + mov [rdx + OFFSETOF__ExInfo__m_exception], r8 ;; init the exception object to null + mov byte ptr [rdx + OFFSETOF__ExInfo__m_passNumber], 1 ;; init to the first pass + mov dword ptr [rdx + OFFSETOF__ExInfo__m_idxCurClause], 0FFFFFFFFh + mov byte ptr [rdx + OFFSETOF__ExInfo__m_kind], 1 ;; ExKind.Throw + + ;; link the ExInfo into the thread's ExInfo chain + mov r8, [rax + OFFSETOF__Thread__m_pExInfoStackHead] + mov [rdx + OFFSETOF__ExInfo__m_pPrevExInfo], r8 ;; pExInfo->m_pPrevExInfo = m_pExInfoStackHead + mov [rax + OFFSETOF__Thread__m_pExInfoStackHead], rdx ;; m_pExInfoStackHead = pExInfo + + ;; set the exception context field on the ExInfo + lea r8, [rsp + rsp_offsetof_Context] ;; r8 <- PAL_LIMITED_CONTEXT* + mov [rdx + OFFSETOF__ExInfo__m_pExContext], r8 ;; init ExInfo.m_pExContext + + ;; rcx still contains the exception object + ;; rdx contains the address of the ExInfo + call RhThrowEx + + EXPORT_POINTER_TO_ADDRESS PointerToRhpThrowEx2 + + ;; no return + int 3 + +NESTED_END RhpThrowEx, _TEXT + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; void FASTCALL RhpRethrow() +;; +;; SUMMARY: Similar to RhpThrowEx, except that it passes along the currently active ExInfo +;; +;; INPUT: +;; +;; OUTPUT: +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +NESTED_ENTRY RhpRethrow, _TEXT + + SIZEOF_XmmSaves equ SIZEOF__PAL_LIMITED_CONTEXT - OFFSETOF__PAL_LIMITED_CONTEXT__Xmm6 + STACKSIZEOF_ExInfo equ ((SIZEOF__ExInfo + 15) AND (NOT 15)) + + SIZEOF_OutgoingScratch equ 20h + rsp_offsetof_ExInfo equ SIZEOF_OutgoingScratch + rsp_offsetof_Context equ SIZEOF_OutgoingScratch + STACKSIZEOF_ExInfo + + lea rax, [rsp+8] ;; save the RSP of the throw site + mov rdx, [rsp] ;; get return address + + xor r8, r8 + + alloc_stack SIZEOF_XmmSaves + 8h ;; reserve stack for the xmm saves (+8h to realign stack) + push_vol_reg r8 ;; padding + push_nonvol_reg r15 + push_nonvol_reg r14 + push_nonvol_reg r13 + push_nonvol_reg r12 + push_nonvol_reg rbx + push_vol_reg r8 + push_nonvol_reg rsi + push_nonvol_reg rdi + push_nonvol_reg rbp + push_vol_reg rax ;; 'faulting' RSP + push_vol_reg rdx ;; 'faulting' IP + + ;; allocate outgoing args area and space for the ExInfo + alloc_stack SIZEOF_OutgoingScratch + STACKSIZEOF_ExInfo + + save_xmm128_postrsp Xmm6 , rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__Xmm6 + save_xmm128_postrsp Xmm7 , rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__Xmm7 + save_xmm128_postrsp Xmm8 , rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__Xmm8 + save_xmm128_postrsp Xmm9 , rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__Xmm9 + save_xmm128_postrsp Xmm10, rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__Xmm10 + save_xmm128_postrsp Xmm11, rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__Xmm11 + save_xmm128_postrsp Xmm12, rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__Xmm12 + save_xmm128_postrsp Xmm13, rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__Xmm13 + save_xmm128_postrsp Xmm14, rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__Xmm14 + save_xmm128_postrsp Xmm15, rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__Xmm15 + + END_PROLOGUE + + INLINE_GETTHREAD rax, rbx ;; rax <- Thread*, rbx is trashed + + lea rdx, [rsp + rsp_offsetof_ExInfo] ;; rdx <- ExInfo* + + mov [rdx + OFFSETOF__ExInfo__m_exception], r8 ;; init the exception object to null + mov byte ptr [rdx + OFFSETOF__ExInfo__m_passNumber], 1 ;; init to the first pass + mov dword ptr [rdx + OFFSETOF__ExInfo__m_idxCurClause], 0FFFFFFFFh + mov byte ptr [rdx + OFFSETOF__ExInfo__m_kind], 0 ;; init to a deterministic value (ExKind.None) + + + ;; link the ExInfo into the thread's ExInfo chain + mov rcx, [rax + OFFSETOF__Thread__m_pExInfoStackHead] ;; rcx <- currently active ExInfo + mov [rdx + OFFSETOF__ExInfo__m_pPrevExInfo], rcx ;; pExInfo->m_pPrevExInfo = m_pExInfoStackHead + mov [rax + OFFSETOF__Thread__m_pExInfoStackHead], rdx ;; m_pExInfoStackHead = pExInfo + + ;; set the exception context field on the ExInfo + lea r8, [rsp + rsp_offsetof_Context] ;; r8 <- PAL_LIMITED_CONTEXT* + mov [rdx + OFFSETOF__ExInfo__m_pExContext], r8 ;; init ExInfo.m_pExContext + + ;; rcx contains the currently active ExInfo + ;; rdx contains the address of the new ExInfo + call RhRethrow + + EXPORT_POINTER_TO_ADDRESS PointerToRhpRethrow2 + + ;; no return + int 3 + +NESTED_END RhpRethrow, _TEXT + +;; +;; Prologue of all funclet calling helpers (RhpCallXXXXFunclet) +;; +FUNCLET_CALL_PROLOGUE macro localsCount, alignStack + + push_nonvol_reg r15 ;; save preserved regs for OS stackwalker + push_nonvol_reg r14 ;; ... + push_nonvol_reg r13 ;; ... + push_nonvol_reg r12 ;; ... + push_nonvol_reg rbx ;; ... + push_nonvol_reg rsi ;; ... + push_nonvol_reg rdi ;; ... + push_nonvol_reg rbp ;; ... + + arguments_scratch_area_size = 20h + xmm_save_area_size = 10 * 10h ;; xmm6..xmm15 save area + stack_alloc_size = arguments_scratch_area_size + localsCount * 8 + alignStack * 8 + xmm_save_area_size + rsp_offsetof_arguments = stack_alloc_size + 8*8h + 8h + rsp_offsetof_locals = arguments_scratch_area_size + xmm_save_area_size + + alloc_stack stack_alloc_size + + save_xmm128_postrsp xmm6, (arguments_scratch_area_size + 0 * 10h) + save_xmm128_postrsp xmm7, (arguments_scratch_area_size + 1 * 10h) + save_xmm128_postrsp xmm8, (arguments_scratch_area_size + 2 * 10h) + save_xmm128_postrsp xmm9, (arguments_scratch_area_size + 3 * 10h) + save_xmm128_postrsp xmm10, (arguments_scratch_area_size + 4 * 10h) + save_xmm128_postrsp xmm11, (arguments_scratch_area_size + 5 * 10h) + save_xmm128_postrsp xmm12, (arguments_scratch_area_size + 6 * 10h) + save_xmm128_postrsp xmm13, (arguments_scratch_area_size + 7 * 10h) + save_xmm128_postrsp xmm14, (arguments_scratch_area_size + 8 * 10h) + save_xmm128_postrsp xmm15, (arguments_scratch_area_size + 9 * 10h) + + END_PROLOGUE +endm + +;; +;; Epilogue of all funclet calling helpers (RhpCallXXXXFunclet) +;; +FUNCLET_CALL_EPILOGUE macro + movdqa xmm6, [rsp + arguments_scratch_area_size + 0 * 10h] + movdqa xmm7, [rsp + arguments_scratch_area_size + 1 * 10h] + movdqa xmm8, [rsp + arguments_scratch_area_size + 2 * 10h] + movdqa xmm9, [rsp + arguments_scratch_area_size + 3 * 10h] + movdqa xmm10, [rsp + arguments_scratch_area_size + 4 * 10h] + movdqa xmm11, [rsp + arguments_scratch_area_size + 5 * 10h] + movdqa xmm12, [rsp + arguments_scratch_area_size + 6 * 10h] + movdqa xmm13, [rsp + arguments_scratch_area_size + 7 * 10h] + movdqa xmm14, [rsp + arguments_scratch_area_size + 8 * 10h] + movdqa xmm15, [rsp + arguments_scratch_area_size + 9 * 10h] + + add rsp, stack_alloc_size + pop rbp + pop rdi + pop rsi + pop rbx + pop r12 + pop r13 + pop r14 + pop r15 +endm + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; void* FASTCALL RhpCallCatchFunclet(RtuObjectRef exceptionObj, void* pHandlerIP, REGDISPLAY* pRegDisplay, +;; ExInfo* pExInfo) +;; +;; INPUT: RCX: exception object +;; RDX: handler funclet address +;; R8: REGDISPLAY* +;; R9: ExInfo* +;; +;; OUTPUT: +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +NESTED_ENTRY RhpCallCatchFunclet, _TEXT + + FUNCLET_CALL_PROLOGUE 3, 0 + + ;; locals + rsp_offsetof_thread = rsp_offsetof_locals + rsp_offsetof_resume_ip = rsp_offsetof_locals + 8; + rsp_offsetof_is_handling_thread_abort = rsp_offsetof_locals + 16; + + mov [rsp + rsp_offsetof_arguments + 0h], rcx ;; save arguments for later + mov [rsp + rsp_offsetof_arguments + 8h], rdx + mov [rsp + rsp_offsetof_arguments + 10h], r8 + mov [rsp + rsp_offsetof_arguments + 18h], r9 + + INLINE_GETTHREAD rax, rbx ;; rax <- Thread*, rbx is trashed + mov [rsp + rsp_offsetof_thread], rax ;; save Thread* for later + + cmp rcx, [rax + OFFSETOF__Thread__m_threadAbortException] + setz byte ptr [rsp + rsp_offsetof_is_handling_thread_abort] + + ;; Clear the DoNotTriggerGc state before calling out to our managed catch funclet. + lock and dword ptr [rax + OFFSETOF__Thread__m_ThreadStateFlags], NOT TSF_DoNotTriggerGc + + mov rax, [r8 + OFFSETOF__REGDISPLAY__pRbx] + mov rbx, [rax] + mov rax, [r8 + OFFSETOF__REGDISPLAY__pRbp] + mov rbp, [rax] + mov rax, [r8 + OFFSETOF__REGDISPLAY__pRsi] + mov rsi, [rax] + mov rax, [r8 + OFFSETOF__REGDISPLAY__pRdi] + mov rdi, [rax] + mov rax, [r8 + OFFSETOF__REGDISPLAY__pR12] + mov r12, [rax] + mov rax, [r8 + OFFSETOF__REGDISPLAY__pR13] + mov r13, [rax] + mov rax, [r8 + OFFSETOF__REGDISPLAY__pR14] + mov r14, [rax] + mov rax, [r8 + OFFSETOF__REGDISPLAY__pR15] + mov r15, [rax] + +if 0 ;; _DEBUG ;; @TODO: temporarily removed because trashing RBP breaks the debugger + ;; trash the values at the old homes to make sure nobody uses them + mov r9, 0baaddeedh + mov rax, [r8 + OFFSETOF__REGDISPLAY__pRbx] + mov [rax], r9 + mov rax, [r8 + OFFSETOF__REGDISPLAY__pRbp] + mov [rax], r9 + mov rax, [r8 + OFFSETOF__REGDISPLAY__pRsi] + mov [rax], r9 + mov rax, [r8 + OFFSETOF__REGDISPLAY__pRdi] + mov [rax], r9 + mov rax, [r8 + OFFSETOF__REGDISPLAY__pR12] + mov [rax], r9 + mov rax, [r8 + OFFSETOF__REGDISPLAY__pR13] + mov [rax], r9 + mov rax, [r8 + OFFSETOF__REGDISPLAY__pR14] + mov [rax], r9 + mov rax, [r8 + OFFSETOF__REGDISPLAY__pR15] + mov [rax], r9 +endif + + movdqa xmm6, [r8 + OFFSETOF__REGDISPLAY__Xmm + 0*10h] + movdqa xmm7, [r8 + OFFSETOF__REGDISPLAY__Xmm + 1*10h] + movdqa xmm8, [r8 + OFFSETOF__REGDISPLAY__Xmm + 2*10h] + movdqa xmm9, [r8 + OFFSETOF__REGDISPLAY__Xmm + 3*10h] + movdqa xmm10,[r8 + OFFSETOF__REGDISPLAY__Xmm + 4*10h] + + movdqa xmm11,[r8 + OFFSETOF__REGDISPLAY__Xmm + 5*10h] + movdqa xmm12,[r8 + OFFSETOF__REGDISPLAY__Xmm + 6*10h] + movdqa xmm13,[r8 + OFFSETOF__REGDISPLAY__Xmm + 7*10h] + movdqa xmm14,[r8 + OFFSETOF__REGDISPLAY__Xmm + 8*10h] + movdqa xmm15,[r8 + OFFSETOF__REGDISPLAY__Xmm + 9*10h] + + mov rcx, [r8 + OFFSETOF__REGDISPLAY__SP] ;; rcx <- establisher frame + mov rdx, [rsp + rsp_offsetof_arguments + 0h] ;; rdx <- exception object + call qword ptr [rsp + rsp_offsetof_arguments + 8h] ;; call handler funclet + + EXPORT_POINTER_TO_ADDRESS PointerToRhpCallCatchFunclet2 + + mov r8, [rsp + rsp_offsetof_arguments + 10h] ;; r8 <- dispatch context + +ifdef _DEBUG + ;; Call into some C++ code to validate the pop of the ExInfo. We only do this in debug because we + ;; have to spill all the preserved registers and then refill them after the call. + mov [rsp + rsp_offsetof_resume_ip], rax ;; save resume IP for later + + mov rcx, [r8 + OFFSETOF__REGDISPLAY__pRbx] + mov [rcx] , rbx + mov rcx, [r8 + OFFSETOF__REGDISPLAY__pRbp] + mov [rcx] , rbp + mov rcx, [r8 + OFFSETOF__REGDISPLAY__pRsi] + mov [rcx] , rsi + mov rcx, [r8 + OFFSETOF__REGDISPLAY__pRdi] + mov [rcx] , rdi + mov rcx, [r8 + OFFSETOF__REGDISPLAY__pR12] + mov [rcx] , r12 + mov rcx, [r8 + OFFSETOF__REGDISPLAY__pR13] + mov [rcx] , r13 + mov rcx, [r8 + OFFSETOF__REGDISPLAY__pR14] + mov [rcx] , r14 + mov rcx, [r8 + OFFSETOF__REGDISPLAY__pR15] + mov [rcx] , r15 + + mov rcx, [rsp + rsp_offsetof_thread] ;; rcx <- Thread* + mov rdx, [rsp + rsp_offsetof_arguments + 18h] ;; rdx <- current ExInfo * + mov r8, [r8 + OFFSETOF__REGDISPLAY__SP] ;; r8 <- resume SP value + call RhpValidateExInfoPop + + mov r8, [rsp + rsp_offsetof_arguments + 10h] ;; r8 <- dispatch context + mov rax, [r8 + OFFSETOF__REGDISPLAY__pRbx] + mov rbx, [rax] + mov rax, [r8 + OFFSETOF__REGDISPLAY__pRbp] + mov rbp, [rax] + mov rax, [r8 + OFFSETOF__REGDISPLAY__pRsi] + mov rsi, [rax] + mov rax, [r8 + OFFSETOF__REGDISPLAY__pRdi] + mov rdi, [rax] + mov rax, [r8 + OFFSETOF__REGDISPLAY__pR12] + mov r12, [rax] + mov rax, [r8 + OFFSETOF__REGDISPLAY__pR13] + mov r13, [rax] + mov rax, [r8 + OFFSETOF__REGDISPLAY__pR14] + mov r14, [rax] + mov rax, [r8 + OFFSETOF__REGDISPLAY__pR15] + mov r15, [rax] + + mov rax, [rsp + rsp_offsetof_resume_ip] ;; reload resume IP +endif + mov rdx, [rsp + rsp_offsetof_thread] ;; rdx <- Thread* + + ;; We must unhijack the thread at this point because the section of stack where the hijack is applied + ;; may go dead. If it does, then the next time we try to unhijack the thread, it will corrupt the stack. + INLINE_THREAD_UNHIJACK rdx, rcx, r9 ;; Thread in rdx, trashes rcx and r9 + + mov rcx, [rsp + rsp_offsetof_arguments + 18h] ;; rcx <- current ExInfo * + mov r8, [r8 + OFFSETOF__REGDISPLAY__SP] ;; r8 <- resume SP value + xor r9d, r9d ;; r9 <- 0 + + @@: mov rcx, [rcx + OFFSETOF__ExInfo__m_pPrevExInfo] ;; rcx <- next ExInfo + cmp rcx, r9 + je @F ;; we're done if it's null + cmp rcx, r8 + jl @B ;; keep looping if it's lower than the new SP + + @@: mov [rdx + OFFSETOF__Thread__m_pExInfoStackHead], rcx ;; store the new head on the Thread + + test [RhpTrapThreads], TrapThreadsFlags_AbortInProgress + jz @f + + ;; test if the exception handled by the catch was the ThreadAbortException + cmp byte ptr [rsp + rsp_offsetof_is_handling_thread_abort], 0 + je @f + + ;; It was the ThreadAbortException, so rethrow it + mov rcx, STATUS_REDHAWK_THREAD_ABORT + mov rdx, rax ;; rdx <- continuation address as exception RIP + mov rsp, r8 ;; reset the SP to resume SP value + jmp RhpThrowHwEx ;; Throw the ThreadAbortException as a special kind of hardware exception + + ;; reset RSP and jump to the continuation address + @@: mov rsp, r8 ;; reset the SP to resume SP value + jmp rax + + +NESTED_END RhpCallCatchFunclet, _TEXT + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; void FASTCALL RhpCallFinallyFunclet(void* pHandlerIP, REGDISPLAY* pRegDisplay) +;; +;; INPUT: RCX: handler funclet address +;; RDX: REGDISPLAY* +;; +;; OUTPUT: +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +NESTED_ENTRY RhpCallFinallyFunclet, _TEXT + + FUNCLET_CALL_PROLOGUE 1, 0 + + mov [rsp + rsp_offsetof_arguments + 0h], rcx ;; save arguments for later + mov [rsp + rsp_offsetof_arguments + 8h], rdx + + rsp_offsetof_thread = rsp_offsetof_locals + + INLINE_GETTHREAD rax, rbx ;; rax <- Thread*, rbx is trashed + mov [rsp + rsp_offsetof_thread], rax ;; save Thread* for later + + ;; + ;; We want to suppress hijacking between invocations of subsequent finallys. We do this because we + ;; cannot tolerate a GC after one finally has run (and possibly side-effected the GC state of the + ;; method) and then been popped off the stack, leaving behind no trace of its effect. + ;; + ;; So we clear the state before and set it after invocation of the handler. + ;; + lock and dword ptr [rax + OFFSETOF__Thread__m_ThreadStateFlags], NOT TSF_DoNotTriggerGc + + mov rax, [rdx + OFFSETOF__REGDISPLAY__pRbx] + mov rbx, [rax] + mov rax, [rdx + OFFSETOF__REGDISPLAY__pRbp] + mov rbp, [rax] + mov rax, [rdx + OFFSETOF__REGDISPLAY__pRsi] + mov rsi, [rax] + mov rax, [rdx + OFFSETOF__REGDISPLAY__pRdi] + mov rdi, [rax] + mov rax, [rdx + OFFSETOF__REGDISPLAY__pR12] + mov r12, [rax] + mov rax, [rdx + OFFSETOF__REGDISPLAY__pR13] + mov r13, [rax] + mov rax, [rdx + OFFSETOF__REGDISPLAY__pR14] + mov r14, [rax] + mov rax, [rdx + OFFSETOF__REGDISPLAY__pR15] + mov r15, [rax] + + movdqa xmm6, [rdx + OFFSETOF__REGDISPLAY__Xmm + 0*10h] + movdqa xmm7, [rdx + OFFSETOF__REGDISPLAY__Xmm + 1*10h] + movdqa xmm8, [rdx + OFFSETOF__REGDISPLAY__Xmm + 2*10h] + movdqa xmm9, [rdx + OFFSETOF__REGDISPLAY__Xmm + 3*10h] + movdqa xmm10,[rdx + OFFSETOF__REGDISPLAY__Xmm + 4*10h] + + movdqa xmm11,[rdx + OFFSETOF__REGDISPLAY__Xmm + 5*10h] + movdqa xmm12,[rdx + OFFSETOF__REGDISPLAY__Xmm + 6*10h] + movdqa xmm13,[rdx + OFFSETOF__REGDISPLAY__Xmm + 7*10h] + movdqa xmm14,[rdx + OFFSETOF__REGDISPLAY__Xmm + 8*10h] + movdqa xmm15,[rdx + OFFSETOF__REGDISPLAY__Xmm + 9*10h] + +if 0 ;; _DEBUG ;; @TODO: temporarily removed because trashing RBP breaks the debugger + ;; trash the values at the old homes to make sure nobody uses them + mov r9, 0baaddeedh + mov rax, [rdx + OFFSETOF__REGDISPLAY__pRbx] + mov [rax], r9 + mov rax, [rdx + OFFSETOF__REGDISPLAY__pRbp] + mov [rax], r9 + mov rax, [rdx + OFFSETOF__REGDISPLAY__pRsi] + mov [rax], r9 + mov rax, [rdx + OFFSETOF__REGDISPLAY__pRdi] + mov [rax], r9 + mov rax, [rdx + OFFSETOF__REGDISPLAY__pR12] + mov [rax], r9 + mov rax, [rdx + OFFSETOF__REGDISPLAY__pR13] + mov [rax], r9 + mov rax, [rdx + OFFSETOF__REGDISPLAY__pR14] + mov [rax], r9 + mov rax, [rdx + OFFSETOF__REGDISPLAY__pR15] + mov [rax], r9 +endif + + mov rcx, [rdx + OFFSETOF__REGDISPLAY__SP] ;; rcx <- establisher frame + call qword ptr [rsp + rsp_offsetof_arguments + 0h] ;; handler funclet address + + EXPORT_POINTER_TO_ADDRESS PointerToRhpCallFinallyFunclet2 + + mov rdx, [rsp + rsp_offsetof_arguments + 8h] ;; rdx <- regdisplay + + mov rax, [rdx + OFFSETOF__REGDISPLAY__pRbx] + mov [rax] , rbx + mov rax, [rdx + OFFSETOF__REGDISPLAY__pRbp] + mov [rax] , rbp + mov rax, [rdx + OFFSETOF__REGDISPLAY__pRsi] + mov [rax] , rsi + mov rax, [rdx + OFFSETOF__REGDISPLAY__pRdi] + mov [rax] , rdi + mov rax, [rdx + OFFSETOF__REGDISPLAY__pR12] + mov [rax] , r12 + mov rax, [rdx + OFFSETOF__REGDISPLAY__pR13] + mov [rax] , r13 + mov rax, [rdx + OFFSETOF__REGDISPLAY__pR14] + mov [rax] , r14 + mov rax, [rdx + OFFSETOF__REGDISPLAY__pR15] + mov [rax] , r15 + + movdqa [rdx + OFFSETOF__REGDISPLAY__Xmm + 0*10h], xmm6 + movdqa [rdx + OFFSETOF__REGDISPLAY__Xmm + 1*10h], xmm7 + movdqa [rdx + OFFSETOF__REGDISPLAY__Xmm + 2*10h], xmm8 + movdqa [rdx + OFFSETOF__REGDISPLAY__Xmm + 3*10h], xmm9 + movdqa [rdx + OFFSETOF__REGDISPLAY__Xmm + 4*10h], xmm10 + + movdqa [rdx + OFFSETOF__REGDISPLAY__Xmm + 5*10h], xmm11 + movdqa [rdx + OFFSETOF__REGDISPLAY__Xmm + 6*10h], xmm12 + movdqa [rdx + OFFSETOF__REGDISPLAY__Xmm + 7*10h], xmm13 + movdqa [rdx + OFFSETOF__REGDISPLAY__Xmm + 8*10h], xmm14 + movdqa [rdx + OFFSETOF__REGDISPLAY__Xmm + 9*10h], xmm15 + + mov rax, [rsp + rsp_offsetof_thread] ;; rax <- Thread* + lock or dword ptr [rax + OFFSETOF__Thread__m_ThreadStateFlags], TSF_DoNotTriggerGc + + FUNCLET_CALL_EPILOGUE + + ret + +NESTED_END RhpCallFinallyFunclet, _TEXT + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; void* FASTCALL RhpCallFilterFunclet(RtuObjectRef exceptionObj, void* pFilterIP, REGDISPLAY* pRegDisplay) +;; +;; INPUT: RCX: exception object +;; RDX: filter funclet address +;; R8: REGDISPLAY* +;; +;; OUTPUT: +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +NESTED_ENTRY RhpCallFilterFunclet, _TEXT + + FUNCLET_CALL_PROLOGUE 0, 1 + + mov rax, [r8 + OFFSETOF__REGDISPLAY__pRbp] + mov rbp, [rax] + + mov rax, rdx ;; rax <- handler funclet address + mov rdx, rcx ;; rdx <- exception object + mov rcx, [r8 + OFFSETOF__REGDISPLAY__SP] ;; rcx <- establisher frame + call rax + + EXPORT_POINTER_TO_ADDRESS PointerToRhpCallFilterFunclet2 + + ;; RAX contains the result of the filter execution + + FUNCLET_CALL_EPILOGUE + + ret + +NESTED_END RhpCallFilterFunclet, _TEXT + + end diff --git a/src/coreclr/src/nativeaot/Runtime/amd64/FloatingPoint.asm b/src/coreclr/src/nativeaot/Runtime/amd64/FloatingPoint.asm new file mode 100644 index 0000000000000..8dcf12610a055 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/amd64/FloatingPoint.asm @@ -0,0 +1,57 @@ +;; Licensed to the .NET Foundation under one or more agreements. +;; The .NET Foundation licenses this file to you under the MIT license. + +include asmmacros.inc + +LEAF_ENTRY RhpFltRemRev, _TEXT + + sub rsp, 18h + + movss dword ptr [rsp + 10h], xmm1 ; divisor + movss dword ptr [rsp + 8h], xmm0 ; dividend + + fld dword ptr [rsp + 10h] ; divisor + fld dword ptr [rsp + 8h] ; dividend + +fremloop: + fprem + fstsw ax + test ax, 0400h + jnz fremloop + + fstp dword ptr [rsp] + movlps xmm0,qword ptr [rsp] + + fstp st(0) + add rsp,18h + ret + +LEAF_END RhpFltRemRev, _TEXT + + +LEAF_ENTRY RhpDblRemRev, _TEXT + + sub rsp, 18h + + movsd qword ptr [rsp + 10h], xmm1 ; divisor + movsd qword ptr [rsp + 8h], xmm0 ; dividend + + fld qword ptr [rsp + 10h] ; divisor + fld qword ptr [rsp + 8h] ; dividend + +fremloopd: + fprem + fstsw ax + test ax, 0400h + jnz fremloopd + + fstp qword ptr [rsp] + movlpd xmm0,qword ptr [rsp] + + fstp st(0) + add rsp,18h + ret + +LEAF_END RhpDblRemRev, _TEXT + + END diff --git a/src/coreclr/src/nativeaot/Runtime/amd64/GC.asm b/src/coreclr/src/nativeaot/Runtime/amd64/GC.asm new file mode 100644 index 0000000000000..5e4459ab205e3 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/amd64/GC.asm @@ -0,0 +1,65 @@ +;; Licensed to the .NET Foundation under one or more agreements. +;; The .NET Foundation licenses this file to you under the MIT license. + +include AsmMacros.inc + +;; extern "C" DWORD getcpuid(DWORD arg, unsigned char result[16]); +NESTED_ENTRY getcpuid, _TEXT + + push_nonvol_reg rbx + push_nonvol_reg rsi + END_PROLOGUE + + mov eax, ecx ; first arg + mov rsi, rdx ; second arg (result) + xor ecx, ecx ; clear ecx - needed for "Structured Extended Feature Flags" + cpuid + mov [rsi+ 0], eax + mov [rsi+ 4], ebx + mov [rsi+ 8], ecx + mov [rsi+12], edx + pop rsi + pop rbx + ret +NESTED_END getcpuid, _TEXT + +;The following function uses Deterministic Cache Parameter leafs to crack the cache hierarchy information on Prescott & Above platforms. +; This function takes 3 arguments: +; Arg1 is an input to ECX. Used as index to specify which cache level to return information on by CPUID. +; Arg1 is already passed in ECX on call to getextcpuid, so no explicit assignment is required; +; Arg2 is an input to EAX. For deterministic code enumeration, we pass in 4H in arg2. +; Arg3 is a pointer to the return dwbuffer +NESTED_ENTRY getextcpuid, _TEXT + push_nonvol_reg rbx + push_nonvol_reg rsi + END_PROLOGUE + + mov eax, edx ; second arg (input to EAX) + mov rsi, r8 ; third arg (pointer to return dwbuffer) + cpuid + mov [rsi+ 0], eax + mov [rsi+ 4], ebx + mov [rsi+ 8], ecx + mov [rsi+12], edx + pop rsi + pop rbx + + ret +NESTED_END getextcpuid, _TEXT + +;; extern "C" DWORD __stdcall xmmYmmStateSupport(); +LEAF_ENTRY xmmYmmStateSupport, _TEXT + mov ecx, 0 ; Specify xcr0 + xgetbv ; result in EDX:EAX + and eax, 06H + cmp eax, 06H ; check OS has enabled both XMM and YMM state support + jne not_supported + mov eax, 1 + jmp done + not_supported: + mov eax, 0 + done: + ret +LEAF_END xmmYmmStateSupport, _TEXT + + end diff --git a/src/coreclr/src/nativeaot/Runtime/amd64/GcProbe.asm b/src/coreclr/src/nativeaot/Runtime/amd64/GcProbe.asm new file mode 100644 index 0000000000000..31cd5a2539541 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/amd64/GcProbe.asm @@ -0,0 +1,810 @@ +;; Licensed to the .NET Foundation under one or more agreements. +;; The .NET Foundation licenses this file to you under the MIT license. + +include AsmMacros.inc + +PROBE_SAVE_FLAGS_EVERYTHING equ DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_ALL_SCRATCH +PROBE_SAVE_FLAGS_RAX_IS_GCREF equ DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_RAX + PTFF_RAX_IS_GCREF + +;; +;; See PUSH_COOP_PINVOKE_FRAME, this macro is very similar, but also saves RAX and accepts the register +;; bitmask in RCX +;; +;; On entry: +;; - BITMASK: bitmask describing pushes, may be volatile register or constant value +;; - RAX: managed function return value, may be an object or byref +;; - preserved regs: need to stay preserved, may contain objects or byrefs +;; - extraStack bytes of stack have already been allocated +;; +;; INVARIANTS +;; - The macro assumes it is called from a prolog, prior to a frame pointer being setup. +;; - All preserved registers remain unchanged from their values in managed code. +;; +PUSH_PROBE_FRAME macro threadReg, trashReg, extraStack, BITMASK + + push_vol_reg rax ; save RAX, it might contain an objectref + lea trashReg, [rsp + 10h + extraStack] + push_vol_reg trashReg ; save caller's RSP + push_nonvol_reg r15 ; save preserved registers + push_nonvol_reg r14 ; .. + push_nonvol_reg r13 ; .. + push_nonvol_reg r12 ; .. + push_nonvol_reg rdi ; .. + push_nonvol_reg rsi ; .. + push_nonvol_reg rbx ; .. + push_vol_reg BITMASK ; save the register bitmask passed in by caller + push_vol_reg threadReg ; Thread * (unused by stackwalker) + push_nonvol_reg rbp ; save caller's RBP + mov trashReg, [rsp + 12*8 + extraStack] ; Find the return address + push_vol_reg trashReg ; save m_RIP + lea trashReg, [rsp + 0] ; trashReg == address of frame + + ;; allocate scratch space and any required alignment + alloc_stack 20h + 10h + (extraStack AND (10h-1)) + + ;; save xmm0 in case it's being used as a return value + movdqa [rsp + 20h], xmm0 + + ; link the frame into the Thread + mov [threadReg + OFFSETOF__Thread__m_pHackPInvokeTunnel], trashReg +endm + +;; +;; Remove the frame from a previous call to PUSH_PROBE_FRAME from the top of the stack and restore preserved +;; registers and return value to their values from before the probe was called (while also updating any +;; object refs or byrefs). +;; +;; NOTE: does NOT deallocate the 'extraStack' portion of the stack, the user of this macro must do that. +;; +POP_PROBE_FRAME macro extraStack + movdqa xmm0, [rsp + 20h] + add rsp, 20h + 10h + (extraStack AND (10h-1)) + 8 + pop rbp + pop rax ; discard Thread* + pop rax ; discard BITMASK + pop rbx + pop rsi + pop rdi + pop r12 + pop r13 + pop r14 + pop r15 + pop rax ; discard caller RSP + pop rax +endm + +;; +;; Macro to clear the hijack state. This is safe to do because the suspension code will not Unhijack this +;; thread if it finds it at an IP that isn't managed code. +;; +;; Register state on entry: +;; RDX: thread pointer +;; +;; Register state on exit: +;; RCX: trashed +;; +ClearHijackState macro + xor ecx, ecx + mov [rdx + OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation], rcx + mov [rdx + OFFSETOF__Thread__m_pvHijackedReturnAddress], rcx +endm + + +;; +;; The prolog for all GC suspension hijacks (normal and stress). Fixes up the hijacked return address, and +;; clears the hijack state. +;; +;; Register state on entry: +;; All registers correct for return to the original return address. +;; +;; Register state on exit: +;; RCX: trashed +;; RDX: thread pointer +;; +FixupHijackedCallstack macro + + ;; rdx <- GetThread(), TRASHES rcx + INLINE_GETTHREAD rdx, rcx + + ;; + ;; Fix the stack by pushing the original return address + ;; + mov rcx, [rdx + OFFSETOF__Thread__m_pvHijackedReturnAddress] + push rcx + + ClearHijackState +endm + +;; +;; Set the Thread state and wait for a GC to complete. +;; +;; Register state on entry: +;; RBX: thread pointer +;; +;; Register state on exit: +;; RBX: thread pointer +;; All other registers trashed +;; + +EXTERN RhpWaitForGCNoAbort : PROC + +WaitForGCCompletion macro + test dword ptr [rbx + OFFSETOF__Thread__m_ThreadStateFlags], TSF_SuppressGcStress + TSF_DoNotTriggerGc + jnz @F + + mov rcx, [rbx + OFFSETOF__Thread__m_pHackPInvokeTunnel] + call RhpWaitForGCNoAbort +@@: + +endm + + +EXTERN RhpPInvokeExceptionGuard : PROC + +;; +;; +;; +;; GC Probe Hijack targets +;; +;; +NESTED_ENTRY RhpGcProbeHijackScalar, _TEXT, RhpPInvokeExceptionGuard + END_PROLOGUE + FixupHijackedCallstack + mov ecx, DEFAULT_FRAME_SAVE_FLAGS + jmp RhpGcProbe +NESTED_END RhpGcProbeHijackScalar, _TEXT + +NESTED_ENTRY RhpGcProbeHijackObject, _TEXT, RhpPInvokeExceptionGuard + END_PROLOGUE + FixupHijackedCallstack + mov ecx, DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_RAX + PTFF_RAX_IS_GCREF + jmp RhpGcProbe +NESTED_END RhpGcProbeHijackObject, _TEXT + +NESTED_ENTRY RhpGcProbeHijackByref, _TEXT, RhpPInvokeExceptionGuard + END_PROLOGUE + FixupHijackedCallstack + mov ecx, DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_RAX + PTFF_RAX_IS_BYREF + jmp RhpGcProbe +NESTED_END RhpGcProbeHijackByref, _TEXT + +ifdef FEATURE_GC_STRESS +;; +;; +;; GC Stress Hijack targets +;; +;; +LEAF_ENTRY RhpGcStressHijackScalar, _TEXT + FixupHijackedCallstack + mov ecx, DEFAULT_FRAME_SAVE_FLAGS + jmp RhpGcStressProbe +LEAF_END RhpGcStressHijackScalar, _TEXT + +LEAF_ENTRY RhpGcStressHijackObject, _TEXT + FixupHijackedCallstack + mov ecx, DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_RAX + PTFF_RAX_IS_GCREF + jmp RhpGcStressProbe +LEAF_END RhpGcStressHijackObject, _TEXT + +LEAF_ENTRY RhpGcStressHijackByref, _TEXT + FixupHijackedCallstack + mov ecx, DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_RAX + PTFF_RAX_IS_BYREF + jmp RhpGcStressProbe +LEAF_END RhpGcStressHijackByref, _TEXT + +;; +;; Worker for our GC stress probes. Do not call directly!! +;; Instead, go through RhpGcStressHijack{Scalar|Object|Byref}. +;; This worker performs the GC Stress work and returns to the original return address. +;; +;; Register state on entry: +;; RDX: thread pointer +;; RCX: register bitmask +;; +;; Register state on exit: +;; Scratch registers, except for RAX, have been trashed +;; All other registers restored as they were when the hijack was first reached. +;; +NESTED_ENTRY RhpGcStressProbe, _TEXT + PUSH_PROBE_FRAME rdx, rax, 0, rcx + END_PROLOGUE + + call REDHAWKGCINTERFACE__STRESSGC + + POP_PROBE_FRAME 0 + ret +NESTED_END RhpGcStressProbe, _TEXT + +endif ;; FEATURE_GC_STRESS + +EXTERN RhpThrowHwEx : PROC + +NESTED_ENTRY RhpGcProbe, _TEXT + test [RhpTrapThreads], TrapThreadsFlags_TrapThreads + jnz @f + ret +@@: + PUSH_PROBE_FRAME rdx, rax, 0, rcx + END_PROLOGUE + + mov rbx, rdx + WaitForGCCompletion + + mov rax, [rbx + OFFSETOF__Thread__m_pHackPInvokeTunnel] + test dword ptr [rax + OFFSETOF__PInvokeTransitionFrame__m_Flags], PTFF_THREAD_ABORT + jnz Abort + POP_PROBE_FRAME 0 + ret +Abort: + POP_PROBE_FRAME 0 + mov rcx, STATUS_REDHAWK_THREAD_ABORT + pop rdx ;; return address as exception RIP + jmp RhpThrowHwEx ;; Throw the ThreadAbortException as a special kind of hardware exception + +NESTED_END RhpGcProbe, _TEXT + + +ifdef FEATURE_GC_STRESS +;; PAL_LIMITED_CONTEXT, 6 xmm regs to save, 2 scratch regs to save, plus 20h bytes for scratch space +RhpHijackForGcStress_FrameSize equ SIZEOF__PAL_LIMITED_CONTEXT + 6*10h + 2*8h + 20h + +; ----------------------------------------------------------------------------------------------------------- +; RhpHijackForGcStress +; +; Called at the beginning of the epilog when a method is bound with /gcstress +; +; N.B. -- Leaf frames may not have aligned the stack or reserved any scratch space on the stack. Also, in +; order to have a resonable stacktrace in the debugger, we must use the .pushframe unwind directive. +; +; N.B. #2 -- The "EH jump epilog" codegen depends on rcx/rdx being preserved across this call. We currently +; will trash R8-R11, but we can do better, if necessary. +; +NESTED_ENTRY RhpHijackForGcStress, _TEXT + + lea r10, [rsp+8] ;; save the original RSP (prior to call) + mov r11, [rsp] ;; get the return address + + ;; Align the stack + and rsp, -16 + + ;; Push the expected "machine frame" for the unwinder to see. All that it looks at is the RSP and + ;; RIP, so we push zero for the others. + xor r8, r8 + push r8 ;; just aligning the stack + push r8 ;; SS + push r10 ;; original RSP + push r8 ;; EFLAGS + push r8 ;; CS + push r11 ;; return address + + ; Tell the unwinder that the frame is there now + .pushframe + + alloc_stack RhpHijackForGcStress_FrameSize + END_PROLOGUE + + ;; Save xmm scratch regs -- this is probably overkill, only the return value reg is + ;; likely to be interesting at this point, but it's a bit ambiguous. + movdqa [rsp + 20h + 0*10h], xmm0 + movdqa [rsp + 20h + 1*10h], xmm1 + movdqa [rsp + 20h + 2*10h], xmm2 + movdqa [rsp + 20h + 3*10h], xmm3 + movdqa [rsp + 20h + 4*10h], xmm4 + movdqa [rsp + 20h + 5*10h], xmm5 + + mov [rsp + 20h + 6*10h + 0*8h], rcx + mov [rsp + 20h + 6*10h + 1*8h], rdx + + ;; + ;; Setup a PAL_LIMITED_CONTEXT that looks like what you'd get if you had suspended this thread at the + ;; IP after the call to this helper. + ;; + ;; This is very likely overkill since the calculation of the return address should only need RSP and + ;; RBP, but this is test code, so I'm not too worried about efficiency. + ;; + mov [rsp + 20h + 6*10h + 2*8h + OFFSETOF__PAL_LIMITED_CONTEXT__IP], r11 ; rip at callsite + mov [rsp + 20h + 6*10h + 2*8h + OFFSETOF__PAL_LIMITED_CONTEXT__Rsp], r10 ; rsp at callsite + mov [rsp + 20h + 6*10h + 2*8h + OFFSETOF__PAL_LIMITED_CONTEXT__Rbp], rbp + mov [rsp + 20h + 6*10h + 2*8h + OFFSETOF__PAL_LIMITED_CONTEXT__Rdi], rdi + mov [rsp + 20h + 6*10h + 2*8h + OFFSETOF__PAL_LIMITED_CONTEXT__Rsi], rsi + mov [rsp + 20h + 6*10h + 2*8h + OFFSETOF__PAL_LIMITED_CONTEXT__Rax], rax + mov [rsp + 20h + 6*10h + 2*8h + OFFSETOF__PAL_LIMITED_CONTEXT__Rbx], rbx + + mov [rsp + 20h + 6*10h + 2*8h + OFFSETOF__PAL_LIMITED_CONTEXT__R12], r12 + mov [rsp + 20h + 6*10h + 2*8h + OFFSETOF__PAL_LIMITED_CONTEXT__R13], r13 + mov [rsp + 20h + 6*10h + 2*8h + OFFSETOF__PAL_LIMITED_CONTEXT__R14], r14 + mov [rsp + 20h + 6*10h + 2*8h + OFFSETOF__PAL_LIMITED_CONTEXT__R15], r15 + + lea rcx, [rsp + 20h + 6*10h + 2*8h] ;; address of PAL_LIMITED_CONTEXT + call THREAD__HIJACKFORGCSTRESS + + ;; Note: we only restore the scratch registers here. No GC has occured, so restoring + ;; the callee saved ones is unnecessary. + mov rax, [rsp + 20h + 6*10h + 2*8h + OFFSETOF__PAL_LIMITED_CONTEXT__Rax] + mov rcx, [rsp + 20h + 6*10h + 0*8h] + mov rdx, [rsp + 20h + 6*10h + 1*8h] + + ;; Restore xmm scratch regs + movdqa xmm0, [rsp + 20h + 0*10h] + movdqa xmm1, [rsp + 20h + 1*10h] + movdqa xmm2, [rsp + 20h + 2*10h] + movdqa xmm3, [rsp + 20h + 3*10h] + movdqa xmm4, [rsp + 20h + 4*10h] + movdqa xmm5, [rsp + 20h + 5*10h] + + ;; epilog + mov r10, [rsp + 20h + 6*10h + 2*8h + OFFSETOF__PAL_LIMITED_CONTEXT__Rsp] + lea rsp, [r10 - 8] ;; adjust RSP to point back at the return address + ret +NESTED_END RhpHijackForGcStress, _TEXT + +endif ;; FEATURE_GC_STRESS + + +;; +;; The following functions are _jumped_ to when we need to transfer control from one method to another for EH +;; dispatch. These are needed to properly coordinate with the GC hijacking logic. We are essentially replacing +;; the return from the throwing method with a jump to the handler in the caller, but we need to be aware of +;; any return address hijack that may be in place for GC suspension. These routines use a quick test of the +;; return address against a specific GC hijack routine, and then fixup the stack pointer to what it would be +;; after a real return from the throwing method. Then, if we are not hijacked we can simply jump to the +;; handler in the caller. +;; +;; If we are hijacked, then we jump to a routine that will unhijack appropriatley and wait for the GC to +;; complete. There are also variants for GC stress. +;; +;; Note that at this point we are eiher hijacked or we are not, and this will not change until we return to +;; managed code. It is an invariant of the system that a thread will only attempt to hijack or unhijack +;; another thread while the target thread is suspended in managed code, and this is _not_ managed code. +;; +;; Register state on entry: +;; RAX: pointer to this function (i.e., trash) +;; RCX: reference to the exception object. +;; RDX: handler address we want to jump to. +;; RBX, RSI, RDI, RBP, and R12-R15 are all already correct for return to the caller. +;; The stack still contains the return address. +;; +;; Register state on exit: +;; RSP: what it would be after a complete return to the caler. +;; RDX: TRASHED +;; +RTU_EH_JUMP_HELPER macro funcName, hijackFuncName, isStress, stressFuncName +LEAF_ENTRY funcName, _TEXT + lea rax, [hijackFuncName] + cmp [rsp], rax + je RhpGCProbeForEHJump + +IF isStress EQ 1 + lea rax, [stressFuncName] + cmp [rsp], rax + je RhpGCStressProbeForEHJump +ENDIF + + ;; We are not hijacked, so we can return to the handler. + ;; We return to keep the call/return prediction balanced. + mov [rsp], rdx ; Update the return address + ret + +LEAF_END funcName, _TEXT +endm + +;; We need an instance of the helper for each possible hijack function. The binder has enough +;; information to determine which one we need to use for any function. +RTU_EH_JUMP_HELPER RhpEHJumpScalar, RhpGcProbeHijackScalar, 0, 0 +RTU_EH_JUMP_HELPER RhpEHJumpObject, RhpGcProbeHijackObject, 0, 0 +RTU_EH_JUMP_HELPER RhpEHJumpByref, RhpGcProbeHijackByref, 0, 0 +ifdef FEATURE_GC_STRESS +RTU_EH_JUMP_HELPER RhpEHJumpScalarGCStress, RhpGcProbeHijackScalar, 1, RhpGcStressHijackScalar +RTU_EH_JUMP_HELPER RhpEHJumpObjectGCStress, RhpGcProbeHijackObject, 1, RhpGcStressHijackObject +RTU_EH_JUMP_HELPER RhpEHJumpByrefGCStress, RhpGcProbeHijackByref, 1, RhpGcStressHijackByref +endif + +;; +;; Macro to setup our frame and adjust the location of the EH object reference for EH jump probe funcs. +;; +;; Register state on entry: +;; RAX: scratch +;; RCX: reference to the exception object. +;; RDX: handler address we want to jump to. +;; RBX, RSI, RDI, RBP, and R12-R15 are all already correct for return to the caller. +;; The stack is as if we are just about to returned from the call +;; +;; Register state on exit: +;; RAX: reference to the exception object +;; RCX: scratch +;; RDX: thread pointer +;; +EHJumpProbeProlog_extraStack = 1*8 +EHJumpProbeProlog macro + push_nonvol_reg rdx ; save the handler address so we can jump to it later + mov rax, rcx ; move the ex object reference into rax so we can report it + + ;; rdx <- GetThread(), TRASHES rcx + INLINE_GETTHREAD rdx, rcx + + ;; Fix the stack by patching the original return address + mov rcx, [rdx + OFFSETOF__Thread__m_pvHijackedReturnAddress] + mov [rsp + EHJumpProbeProlog_extraStack], rcx + + ClearHijackState + + ; TRASHES r10 + PUSH_PROBE_FRAME rdx, r10, EHJumpProbeProlog_extraStack, PROBE_SAVE_FLAGS_RAX_IS_GCREF + + END_PROLOGUE +endm + +;; +;; Macro to re-adjust the location of the EH object reference, cleanup the frame, and make the +;; final jump to the handler for EH jump probe funcs. +;; +;; Register state on entry: +;; RAX: reference to the exception object +;; RCX: scratch +;; RDX: scratch +;; +;; Register state on exit: +;; RSP: correct for return to the caller +;; RCX: reference to the exception object +;; RDX: trashed +;; +EHJumpProbeEpilog macro + POP_PROBE_FRAME EHJumpProbeProlog_extraStack + mov rcx, rax ; Put the EX obj ref back into rcx for the handler. + + pop rax ; Recover the handler address. + mov [rsp], rax ; Update the return address + ret +endm + +;; +;; We are hijacked for a normal GC (not GC stress), so we need to unhijcak and wait for the GC to complete. +;; +;; Register state on entry: +;; RAX: scratch +;; RCX: reference to the exception object. +;; RDX: handler address we want to jump to. +;; RBX, RSI, RDI, RBP, and R12-R15 are all already correct for return to the caller. +;; The stack is as if we have tail called to this function (rsp points to return address). +;; +;; Register state on exit: +;; RSP: correct for return to the caller +;; RBP: previous ebp frame +;; RCX: reference to the exception object +;; +NESTED_ENTRY RhpGCProbeForEHJump, _TEXT + EHJumpProbeProlog + +ifdef _DEBUG + ;; + ;; If we get here, then we have been hijacked for a real GC, and our SyncState must + ;; reflect that we've been requested to synchronize. + + test [RhpTrapThreads], TrapThreadsFlags_TrapThreads + jnz @F + + call RhDebugBreak +@@: +endif ;; _DEBUG + + mov rbx, rdx + WaitForGCCompletion + + EHJumpProbeEpilog + +NESTED_END RhpGCProbeForEHJump, _TEXT + +ifdef FEATURE_GC_STRESS +;; +;; We are hijacked for GC Stress (not a normal GC) so we need to invoke the GC stress helper. +;; +;; Register state on entry: +;; RAX: scratch +;; RCX: reference to the exception object. +;; RDX: handler address we want to jump to. +;; RBX, RSI, RDI, RBP, and R12-R15 are all already correct for return to the caller. +;; The stack is as if we have tail called to this function (rsp points to return address). +;; +;; Register state on exit: +;; RSP: correct for return to the caller +;; RBP: previous ebp frame +;; RCX: reference to the exception object +;; +NESTED_ENTRY RhpGCStressProbeForEHJump, _TEXT + EHJumpProbeProlog + + call REDHAWKGCINTERFACE__STRESSGC + + EHJumpProbeEpilog + +NESTED_END RhpGCStressProbeForEHJump, _TEXT + +g_pTheRuntimeInstance equ ?g_pTheRuntimeInstance@@3PEAVRuntimeInstance@@EA +EXTERN g_pTheRuntimeInstance : QWORD +RuntimeInstance__ShouldHijackLoopForGcStress equ ?ShouldHijackLoopForGcStress@RuntimeInstance@@QEAA_N_K@Z +EXTERN RuntimeInstance__ShouldHijackLoopForGcStress : PROC + +endif ;; FEATURE_GC_STRESS + +EXTERN g_fGcStressStarted : DWORD +EXTERN g_fHasFastFxsave : BYTE + +FXSAVE_SIZE equ 512 + +;; Trap to GC. +;; Set up the P/Invoke transition frame with the return address as the safe point. +;; All registers, both volatile and non-volatile, are preserved. +;; The function should be called not jumped because it's expecting the return address +NESTED_ENTRY RhpTrapToGC, _TEXT + + sizeof_OutgoingScratchSpace equ 20h + sizeof_PInvokeFrame equ OFFSETOF__PInvokeTransitionFrame__m_PreservedRegs + 15*8 + sizeof_XmmAlignPad equ 8 + sizeof_XmmSave equ FXSAVE_SIZE + sizeof_MachineFrame equ 6*8 + sizeof_InitialPushedArgs equ 2*8 ;; eflags, return value + sizeof_FixedFrame equ sizeof_OutgoingScratchSpace + sizeof_PInvokeFrame + sizeof_XmmAlignPad + sizeof_XmmSave + sizeof_MachineFrame + + ;; On the stack on entry: + ;; [rsp ] -> Return address + + ;; save eflags before we trash them + pushfq + + ;; What we want to get to: + ;; + ;; [rsp ] -> outgoing scratch area + ;; + ;; [rsp + 20] -> m_RIP -------| + ;; [rsp + 28] -> m_FramePointer | + ;; [rsp + 30] -> m_pThread | + ;; [rsp + 38] -> m_Flags / m_dwAlignPad2 | + ;; [rsp + 40] -> rbx save | + ;; [rsp + 48] -> rsi save | + ;; [rsp + 50] -> rdi save | + ;; [rsp + 58] -> r12 save | + ;; [rsp + 60] -> r13 save | + ;; [rsp + 68] -> r14 save | PInvokeTransitionFrame + ;; [rsp + 70] -> r15 save | + ;; [rsp + 78] -> rsp save | + ;; [rsp + 80] -> rax save | + ;; [rsp + 88] -> rcx save | + ;; [rsp + 90] -> rdx save | + ;; [rsp + 98] -> r8 save | + ;; [rsp + a0] -> r9 save | + ;; [rsp + a8] -> r10 save | + ;; [rsp + b0] -> r11 save -------| + ;; + ;; [rsp + b8] -> [XmmAlignPad] + ;; + ;; [rsp + c0] -> FXSAVE area + ;; + ;; [rsp +2c0] | RIP | + ;; [rsp +2c8] | CS | + ;; [rsp +2d0] | EFLAGS | <-- 'machine frame' + ;; [rsp +2d8] | RSP | + ;; [rsp +2e0] | SS | + ;; [rsp +2e8] | padding | + ;; + ;; [rsp +2f0] [PSP] + ;; [rsp +2f8] [optional stack alignment] + ;; + ;; [PSP - 10] -> eflags save + ;; [PSP - 8] -> Return address + ;; [PSP] -> caller's frame + + test rsp, 0Fh + jz AlreadyAligned + + sub rsp, sizeof_XmmAlignPad + sizeof_XmmSave + sizeof_MachineFrame + 8 ; +8 to save PSP, + push r11 ; save incoming R11 into save location + lea r11, [rsp + 8 + sizeof_XmmAlignPad + sizeof_XmmSave + sizeof_MachineFrame + 8 + sizeof_InitialPushedArgs] + jmp PspCalculated + + AlreadyAligned: + + sub rsp, sizeof_XmmAlignPad + sizeof_XmmSave + sizeof_MachineFrame + 16 ; +8 to save RSP, +8 to re-align PSP, + push r11 ; save incoming R11 into save location + lea r11, [rsp + 8 + sizeof_XmmAlignPad + sizeof_XmmSave + sizeof_MachineFrame + 16 + sizeof_InitialPushedArgs] + + PspCalculated: + push r10 ; save incoming R10 into save location + xor r10d, r10d + + ;; + ;; Populate the 'machine frame' in the diagram above. We have only pushed up to the 'r10 save', so we have not + ;; yet pushed 0xA8 bytes of that diagram. + ;; + ;; [rsp + {offset-in-target-frame-layout-diagram} - {as-yet-unpushed-stack-size}] + mov [rsp + 2c0h - 0a8h], r10 ; init RIP to zero + mov [rsp + 2c8h - 0a8h], r10 ; init CS to zero + mov [rsp + 2d0h - 0a8h], r10 ; init EFLAGS to zero + mov [rsp + 2d8h - 0a8h], r11 ; save PSP in the 'machine frame' + mov [rsp + 2e0h - 0a8h], r10 ; init SS to zero + mov [rsp + 2f0h - 0a8h], r11 ; save PSP + + .pushframe + .allocstack sizeof_XmmAlignPad + sizeof_XmmSave + 2*8 ;; only 2 of the regs from the PInvokeTransitionFrame are on the stack + + push_vol_reg r9 + push_vol_reg r8 + push_vol_reg rdx + push_vol_reg rcx + push_vol_reg rax + push_vol_reg r11 ; PSP gets saved into the PInvokeTransitionFrame + push_nonvol_reg r15 + push_nonvol_reg r14 + push_nonvol_reg r13 + push_nonvol_reg r12 + push_nonvol_reg rdi + push_nonvol_reg rsi + push_nonvol_reg rbx + push_vol_reg PROBE_SAVE_FLAGS_EVERYTHING ; m_Flags / m_dwAlignPad2 + + ;; rdx <- GetThread(), TRASHES rcx + INLINE_GETTHREAD rdx, rcx + + push_vol_reg rdx ; m_pThread + push_nonvol_reg rbp ; m_FramePointer + push_vol_reg r10 ; m_RIP + + alloc_stack sizeof_OutgoingScratchSpace + END_PROLOGUE + + mov rbx, r11 ; put PSP into RBX + mov rsi, rdx ; put Thread* into RSI + + ; RBX is PSP + ; RSI is Thread* + + fxsave [rsp + 0c0h] + + cmp [g_fHasFastFxsave], 0 ; fast fxsave won't save the xmm registers, so we must do it + jz DontSaveXmmAgain + + ;; 0C0h -> offset of FXSAVE area + ;; 0A0h -> offset of xmm0 save area within the FXSAVE area + movdqa [rsp + 0c0h + 0a0h + 0*10h], xmm0 + movdqa [rsp + 0c0h + 0a0h + 1*10h], xmm1 + movdqa [rsp + 0c0h + 0a0h + 2*10h], xmm2 + movdqa [rsp + 0c0h + 0a0h + 3*10h], xmm3 + movdqa [rsp + 0c0h + 0a0h + 4*10h], xmm4 + movdqa [rsp + 0c0h + 0a0h + 5*10h], xmm5 + movdqa [rsp + 0c0h + 0a0h + 6*10h], xmm6 + movdqa [rsp + 0c0h + 0a0h + 7*10h], xmm7 + movdqa [rsp + 0c0h + 0a0h + 8*10h], xmm8 + movdqa [rsp + 0c0h + 0a0h + 9*10h], xmm9 + movdqa [rsp + 0c0h + 0a0h + 10*10h], xmm10 + movdqa [rsp + 0c0h + 0a0h + 11*10h], xmm11 + movdqa [rsp + 0c0h + 0a0h + 12*10h], xmm12 + movdqa [rsp + 0c0h + 0a0h + 13*10h], xmm13 + movdqa [rsp + 0c0h + 0a0h + 14*10h], xmm14 + movdqa [rsp + 0c0h + 0a0h + 15*10h], xmm15 + +DontSaveXmmAgain: + mov rax, [rbx - 8] + mov [rsp + 2c0h], rax ; save return address into 'machine frame' + mov [rsp + 20h], rax ; save return address into PInvokeTransitionFrame + + ; Early out if GC stress is currently suppressed. Do this after we have computed the real address to + ; return to but before we link the transition frame onto m_pHackPInvokeTunnel (because hitting this + ; condition implies we're running restricted callouts during a GC itself and we could end up + ; overwriting a co-op frame set by the code that caused the GC in the first place, e.g. a GC.Collect + ; call). + test dword ptr [rsi + OFFSETOF__Thread__m_ThreadStateFlags], TSF_SuppressGcStress + TSF_DoNotTriggerGc + jnz DoneWaitingForGc + + ; link the frame into the Thread + lea rcx, [rsp + sizeof_OutgoingScratchSpace] ; rcx <- PInvokeTransitionFrame* + mov [rsi + OFFSETOF__Thread__m_pHackPInvokeTunnel], rcx + + ;; + ;; Unhijack this thread, if necessary. + ;; + INLINE_THREAD_UNHIJACK rsi, rax, rcx ;; trashes RAX, RCX + +ifdef FEATURE_GC_STRESS + xor eax, eax + cmp [g_fGcStressStarted], eax + jz @F + + mov rdx, [rsp + 2c0h] + mov rcx, [g_pTheRuntimeInstance] + call RuntimeInstance__ShouldHijackLoopForGcStress + cmp al, 0 + je @F + + call REDHAWKGCINTERFACE__STRESSGC +@@: +endif ;; FEATURE_GC_STRESS + + lea rcx, [rsp + sizeof_OutgoingScratchSpace] ; calculate PInvokeTransitionFrame pointer + call RhpWaitForGCNoAbort + + DoneWaitingForGc: + + fxrstor [rsp + 0c0h] + + cmp [g_fHasFastFxsave], 0 + jz DontRestoreXmmAgain + + movdqa xmm0 , [rsp + 0c0h + 0a0h + 0*10h] + movdqa xmm1 , [rsp + 0c0h + 0a0h + 1*10h] + movdqa xmm2 , [rsp + 0c0h + 0a0h + 2*10h] + movdqa xmm3 , [rsp + 0c0h + 0a0h + 3*10h] + movdqa xmm4 , [rsp + 0c0h + 0a0h + 4*10h] + movdqa xmm5 , [rsp + 0c0h + 0a0h + 5*10h] + movdqa xmm6 , [rsp + 0c0h + 0a0h + 6*10h] + movdqa xmm7 , [rsp + 0c0h + 0a0h + 7*10h] + movdqa xmm8 , [rsp + 0c0h + 0a0h + 8*10h] + movdqa xmm9 , [rsp + 0c0h + 0a0h + 9*10h] + movdqa xmm10, [rsp + 0c0h + 0a0h + 10*10h] + movdqa xmm11, [rsp + 0c0h + 0a0h + 11*10h] + movdqa xmm12, [rsp + 0c0h + 0a0h + 12*10h] + movdqa xmm13, [rsp + 0c0h + 0a0h + 13*10h] + movdqa xmm14, [rsp + 0c0h + 0a0h + 14*10h] + movdqa xmm15, [rsp + 0c0h + 0a0h + 15*10h] + +DontRestoreXmmAgain: + add rsp, sizeof_OutgoingScratchSpace + mov eax, [rsp + OFFSETOF__PInvokeTransitionFrame__m_Flags] + test eax, PTFF_THREAD_ABORT + pop rax ; m_RIP + pop rbp ; m_FramePointer + pop rax ; m_pThread + pop rax ; m_Flags / m_dwAlign2 + pop rbx + pop rsi + pop rdi + pop r12 + pop r13 + pop r14 + pop r15 + pop rax ; RSP + pop rax ; RAX save + pop rcx + pop rdx + pop r8 + pop r9 + pop r10 + pop r11 + + ;; restore PSP + ;; 2F0h -> offset of the PSP area + ;; 0B8h -> offset of the end of the integer register area which is already popped + mov rsp, [rsp + 2f0h - 0b8h] + + ;; RSP is PSP at this point and the stack looks like this: + ;; [PSP - 10] -> eflags save + ;; [PSP - 8] -> return address + ;; [PSP] -> caller's frame + ;; + ;; The final step is to restore eflags and return + + lea rsp, [rsp - 10h] + jz @f ;; result of the test instruction before the pops above + popfq ;; restore flags + mov rcx, STATUS_REDHAWK_THREAD_ABORT + pop rdx ;; return address as exception RIP + jmp RhpThrowHwEx ;; Throw the ThreadAbortException as a special kind of hardware exception + +@@: + popfq ;; restore flags + ret + +NESTED_END RhpTrapToGC, _TEXT + +ifdef FEATURE_GC_STRESS +;; +;; INVARIANT: Don't trash the argument registers, the binder codegen depends on this. +;; +LEAF_ENTRY RhpSuppressGcStress, _TEXT + + INLINE_GETTHREAD rax, r10 + lock or dword ptr [rax + OFFSETOF__Thread__m_ThreadStateFlags], TSF_SuppressGcStress + ret + +LEAF_END RhpSuppressGcStress, _TEXT +endif ;; FEATURE_GC_STRESS + + end diff --git a/src/coreclr/src/nativeaot/Runtime/amd64/GetThread.asm b/src/coreclr/src/nativeaot/Runtime/amd64/GetThread.asm new file mode 100644 index 0000000000000..409ba3dafd99d --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/amd64/GetThread.asm @@ -0,0 +1,27 @@ +;; Licensed to the .NET Foundation under one or more agreements. +;; The .NET Foundation licenses this file to you under the MIT license. + +include asmmacros.inc + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; RhpGetThread +;; +;; +;; INPUT: +;; +;; OUTPUT: RAX: Thread pointer +;; +;; TRASHES: R10 +;; +;; MUST PRESERVE ARGUMENT REGISTERS +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +LEAF_ENTRY RhpGetThread, _TEXT + ;; rax = GetThread(), TRASHES r10 + INLINE_GETTHREAD rax, r10 + ret +LEAF_END RhpGetThread, _TEXT + + + end diff --git a/src/coreclr/src/nativeaot/Runtime/amd64/Interlocked.S b/src/coreclr/src/nativeaot/Runtime/amd64/Interlocked.S new file mode 100644 index 0000000000000..bfb577365276e --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/amd64/Interlocked.S @@ -0,0 +1,25 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +.intel_syntax noprefix +#include + +// WARNING: Code in EHHelpers.cpp makes assumptions about this helper, in particular: +// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at RhpLockCmpXchg32AVLocation +// - Function "UnwindWriteBarrierToCaller" assumes the stack contains just the pushed return address +LEAF_ENTRY RhpLockCmpXchg32, _TEXT + mov rax, rdx +ALTERNATE_ENTRY RhpLockCmpXchg32AVLocation + lock cmpxchg [rdi], esi + ret +LEAF_END RhpLockCmpXchg32, _TEXT + +// WARNING: Code in EHHelpers.cpp makes assumptions about this helper, in particular: +// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at RhpLockCmpXchg64AVLocation +// - Function "UnwindWriteBarrierToCaller" assumes the stack contains just the pushed return address +LEAF_ENTRY RhpLockCmpXchg64, _TEXT + mov rax, rdx +ALTERNATE_ENTRY RhpLockCmpXchg64AVLocation + lock cmpxchg [rdi], rsi + ret +LEAF_END RhpLockCmpXchg64, _TEXT diff --git a/src/coreclr/src/nativeaot/Runtime/amd64/Interlocked.asm b/src/coreclr/src/nativeaot/Runtime/amd64/Interlocked.asm new file mode 100644 index 0000000000000..f7b9bd1be7772 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/amd64/Interlocked.asm @@ -0,0 +1,26 @@ +;; Licensed to the .NET Foundation under one or more agreements. +;; The .NET Foundation licenses this file to you under the MIT license. + +include AsmMacros.inc + +;; WARNING: Code in EHHelpers.cpp makes assumptions about this helper, in particular: +;; - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at RhpLockCmpXchg32AVLocation +;; - Function "UnwindWriteBarrierToCaller" assumes the stack contains just the pushed return address +LEAF_ENTRY RhpLockCmpXchg32, _TEXT + mov rax, r8 +ALTERNATE_ENTRY RhpLockCmpXchg32AVLocation + lock cmpxchg [rcx], edx + ret +LEAF_END RhpLockCmpXchg32, _TEXT + +;; WARNING: Code in EHHelpers.cpp makes assumptions about this helper, in particular: +;; - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at RhpLockCmpXchg64AVLocation +;; - Function "UnwindWriteBarrierToCaller" assumes the stack contains just the pushed return address +LEAF_ENTRY RhpLockCmpXchg64, _TEXT + mov rax, r8 +ALTERNATE_ENTRY RhpLockCmpXchg64AVLocation + lock cmpxchg [rcx], rdx + ret +LEAF_END RhpLockCmpXchg64, _TEXT + + end diff --git a/src/coreclr/src/nativeaot/Runtime/amd64/InteropThunksHelpers.S b/src/coreclr/src/nativeaot/Runtime/amd64/InteropThunksHelpers.S new file mode 100644 index 0000000000000..66454d5466fa2 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/amd64/InteropThunksHelpers.S @@ -0,0 +1,45 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +.intel_syntax noprefix +#include // generated by the build from AsmOffsets.cpp +#include +#define POINTER_SIZE 8 + +LEAF_ENTRY RhCommonStub, _TEXT + + PUSH_ARGUMENT_REGISTERS + push_register r10 + + alloc_stack SIZEOF_FP_REGS + SAVE_FLOAT_ARGUMENT_REGISTERS 0 + + INLINE_GET_TLS_VAR tls_thunkData + + RESTORE_FLOAT_ARGUMENT_REGISTERS 0 + free_stack SIZEOF_FP_REGS + + pop_register r10 + POP_ARGUMENT_REGISTERS + + mov r11, [r10] + mov qword ptr [rax], r11 + + mov rax, [r10 + POINTER_SIZE] + jmp rax +LEAF_END RhCommonStub, _TEXT + + +LEAF_ENTRY RhGetCommonStubAddress, _TEXT + lea rax, [rip + C_FUNC(RhCommonStub)] + ret +LEAF_END RhGetCommonStubAddress, _TEXT + + +LEAF_ENTRY RhGetCurrentThunkContext, _TEXT + + INLINE_GET_TLS_VAR tls_thunkData + + mov rax, qword ptr [rax] + ret +LEAF_END RhGetCurrentThunkContext, _TEXT diff --git a/src/coreclr/src/nativeaot/Runtime/amd64/InteropThunksHelpers.asm b/src/coreclr/src/nativeaot/Runtime/amd64/InteropThunksHelpers.asm new file mode 100644 index 0000000000000..e1107717d4c70 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/amd64/InteropThunksHelpers.asm @@ -0,0 +1,97 @@ +;; Licensed to the .NET Foundation under one or more agreements. +;; The .NET Foundation licenses this file to you under the MIT license. + + +;; ----------------------------------------------------------------------------------------------------------- +;;#include "asmmacros.inc" +;; ----------------------------------------------------------------------------------------------------------- + +LEAF_ENTRY macro Name, Section + Section segment para 'CODE' + align 16 + public Name + Name proc +endm + +LEAF_END macro Name, Section + Name endp + Section ends +endm + +; - TAILCALL_RAX: ("jmp rax") should be used for tailcalls, this emits an instruction +; sequence which is recognized by the unwinder as a valid epilogue terminator +TAILJMP_RAX TEXTEQU + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; DATA SECTIONS ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +_tls_array equ 58h ;; offsetof(TEB, ThreadLocalStoragePointer) + +POINTER_SIZE equ 08h + +;; TLS variables +_TLS SEGMENT ALIAS(".tls$") + ThunkParamSlot DQ 0000000000000000H +_TLS ENDS + +EXTRN _tls_index:DWORD + + +;;;;;;;;;;;;;;;;;;;;;;; Interop Thunks Helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; +;; RhCommonStub +;; +LEAF_ENTRY RhCommonStub, _TEXT + ;; There are arbitrary callers passing arguments with arbitrary signatures. + ;; Custom calling convention: + ;; r10: pointer to the current thunk's data block (data contains 2 pointer values: context + target pointers) + + ;; Save context data into the ThunkParamSlot thread-local variable + ;; A pointer to the delegate and function pointer for open static delegate should have been saved in the thunk's context cell during thunk allocation + mov [rsp + 8], rcx ;; Save rcx in a home scratch location. Pushing the + ;; register on the stack will break callstack unwind + mov ecx, [_tls_index] + mov r11, gs:[_tls_array] + mov rax, [r11 + rcx * POINTER_SIZE] + + ;; rax = base address of TLS data + ;; r10 = address of context cell in thunk's data + ;; r11 = trashed + + ;; store thunk address in thread static + mov r11, [r10] + mov ecx, SECTIONREL ThunkParamSlot + mov [rax + rcx], r11 ;; ThunkParamSlot <- context slot data + + mov rcx, [rsp + 8] ;; Restore rcx + + ;; jump to the target + mov rax, [r10 + POINTER_SIZE] + TAILJMP_RAX +LEAF_END RhCommonStub, _TEXT + + +;; +;; IntPtr RhGetCommonStubAddress() +;; +LEAF_ENTRY RhGetCommonStubAddress, _TEXT + lea rax, [RhCommonStub] + ret +LEAF_END RhGetCommonStubAddress, _TEXT + + +;; +;; IntPtr RhGetCurrentThunkContext() +;; +LEAF_ENTRY RhGetCurrentThunkContext, _TEXT + mov r10d, [_tls_index] + mov r11, gs:[_tls_array] + mov r10, [r11 + r10 * POINTER_SIZE] + mov r8d, SECTIONREL ThunkParamSlot + mov rax, [r10 + r8] ;; rax <- ThunkParamSlot + ret +LEAF_END RhGetCurrentThunkContext, _TEXT + + +end diff --git a/src/coreclr/src/nativeaot/Runtime/amd64/MemClrForGC.asm b/src/coreclr/src/nativeaot/Runtime/amd64/MemClrForGC.asm new file mode 100644 index 0000000000000..047467aa7ed4a --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/amd64/MemClrForGC.asm @@ -0,0 +1,99 @@ +;; Licensed to the .NET Foundation under one or more agreements. +;; The .NET Foundation licenses this file to you under the MIT license. + +include AsmMacros.inc + + +LEAF_ENTRY memclr_for_gc, _TEXT + +; x64 version + +; we get the following parameters +; rcx = destination address +; rdx = size to clear + + ; save rdi - this should be faster than a push + mov r11,rdi + + xor eax, eax + + ; check alignment of destination + test cl,7 + jnz alignDest +alignDone: + ; now destination is qword aligned + ; move it to rdi for rep stos + mov rdi,rcx + + ; compute number of bytes to clear non-temporally + ; we wish to clear the first 8k or so with rep stos, + ; anything above that non-temporally + + xor r8,r8 + cmp rdx,8*1024 + jbe noNonTempClear + + ; compute the number of bytes above 8k + ; and round down to a multiple of 64 + mov r8,rdx + sub r8,8*1024 + and r8,not 63 + + ; compute remaining size to clear temporally + sub rdx,r8 + +noNonTempClear: + + ; do the temporal clear + mov rcx,rdx + shr rcx,3 + rep stosq + + ; do the non-temporal clear + test r8,r8 + jne nonTempClearLoop + +nonTempClearDone: + + ; clear any remaining bytes + mov rcx,rdx + and rcx,7 + rep stosb + + ; restore rdi + mov rdi,r11 + + ret + + ; this is the infrequent case, hence out of line +nonTempClearLoop: + movnti [rdi+ 0],rax + movnti [rdi+ 8],rax + movnti [rdi+16],rax + movnti [rdi+24],rax + + movnti [rdi+32],rax + movnti [rdi+40],rax + movnti [rdi+48],rax + movnti [rdi+56],rax + + add rdi,64 + sub r8,64 + ja nonTempClearLoop + jmp nonTempClearDone + +alignDest: + test rdx,rdx + je alignDone +alignLoop: + mov [rcx],al + add rcx,1 + sub rdx,1 + jz alignDone + test cl,7 + jnz alignLoop + jmp alignDone + +LEAF_END memclr_for_gc, _TEXT + + end diff --git a/src/coreclr/src/nativeaot/Runtime/amd64/MiscStubs.S b/src/coreclr/src/nativeaot/Runtime/amd64/MiscStubs.S new file mode 100644 index 0000000000000..a335b997459d3 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/amd64/MiscStubs.S @@ -0,0 +1,46 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +.intel_syntax noprefix +#include + +// The following helper will access ("probe") a word on each page of the stack +// starting with the page right beneath rsp down to the one pointed to by r11. +// The procedure is needed to make sure that the "guard" page is pushed down below the allocated stack frame. +// The call to the helper will be emitted by JIT in the function/funclet prolog when large (larger than 0x3000 bytes) stack frame is required. +// +// NOTE: On Linux we must advance the stack pointer as we probe - it is not allowed to access 65535 bytes below rsp. +// Since this helper will modify a value of rsp - it must establish the frame pointer. +// +// See also https://github.com/dotnet/runtime/issues/9899 for more information. + +#define PAGE_SIZE 0x1000 + +NESTED_ENTRY RhpStackProbe, _TEXT, NoHandler + // On entry: + // r11 - points to the lowest address on the stack frame being allocated (i.e. [InitialSp - FrameSize]) + // rsp - points to some byte on the last probed page + // On exit: + // r11 - is preserved + // + // NOTE: this helper will probe at least one page below the one pointed by rsp. + + push_nonvol_reg rbp + mov rbp, rsp + set_cfa_register rbp, 16 + + END_PROLOGUE + + and rsp, -PAGE_SIZE // rsp points to the **lowest address** on the last probed page + // This is done to make the following loop end condition simpler. + +LOCAL_LABEL(ProbeLoop): + sub rsp, PAGE_SIZE // rsp points to the lowest address of the **next page** to probe + test dword ptr [rsp], eax // rsp points to the lowest address on the **last probed** page + cmp rsp, r11 + jg LOCAL_LABEL(ProbeLoop) // if (rsp > r11), then we need to probe at least one more page. + + RESET_FRAME_WITH_RBP + ret + +NESTED_END RhpStackProbe, _TEXT diff --git a/src/coreclr/src/nativeaot/Runtime/amd64/MiscStubs.asm b/src/coreclr/src/nativeaot/Runtime/amd64/MiscStubs.asm new file mode 100644 index 0000000000000..16e00fb53f944 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/amd64/MiscStubs.asm @@ -0,0 +1,276 @@ +;; Licensed to the .NET Foundation under one or more agreements. +;; The .NET Foundation licenses this file to you under the MIT license. + +include AsmMacros.inc + +EXTERN GetClasslibCCtorCheck : PROC +EXTERN memcpy : PROC +EXTERN memcpyGCRefs : PROC +EXTERN memcpyGCRefsWithWriteBarrier : PROC +EXTERN memcpyAnyWithWriteBarrier : PROC + +;; +;; Checks whether the static class constructor for the type indicated by the context structure has been +;; executed yet. If not the classlib is called via their CheckStaticClassConstruction callback which will +;; execute the cctor and update the context to record this fact. +;; +;; Input: +;; rax : Address of StaticClassConstructionContext structure +;; +;; Output: +;; All volatile registers and the condition codes may be trashed. +;; +LEAF_ENTRY RhpCheckCctor, _TEXT + + ;; Check the m_initialized field of the context. The cctor has been run only if this equals 1 (the + ;; initial state is 0 and the remaining values are reserved for classlib use). This check is + ;; unsynchronized; if we go down the slow path and call the classlib then it is responsible for + ;; synchronizing with other threads and re-checking the value. + cmp dword ptr [rax + OFFSETOF__StaticClassConstructionContext__m_initialized], 1 + jne RhpCheckCctor__SlowPath + ret +RhpCheckCctor__SlowPath: + mov rdx, rax + jmp RhpCheckCctor2 ; Tail-call the check cctor helper that can actually call the cctor +LEAF_END RhpCheckCctor, _TEXT + +;; +;; Checks whether the static class constructor for the type indicated by the context structure has been +;; executed yet. If not the classlib is called via their CheckStaticClassConstruction callback which will +;; execute the cctor and update the context to record this fact. +;; +;; Input: +;; rax : Value that must be preserved in this register across the cctor check. +;; rdx : Address of StaticClassConstructionContext structure +;; +;; Output: +;; All volatile registers other than rax may be trashed and the condition codes may also be trashed. +;; +LEAF_ENTRY RhpCheckCctor2, _TEXT + + ;; Check the m_initialized field of the context. The cctor has been run only if this equals 1 (the + ;; initial state is 0 and the remaining values are reserved for classlib use). This check is + ;; unsynchronized; if we go down the slow path and call the classlib then it is responsible for + ;; synchronizing with other threads and re-checking the value. + cmp dword ptr [rdx + OFFSETOF__StaticClassConstructionContext__m_initialized], 1 + jne RhpCheckCctor2__SlowPath + ret + +LEAF_END RhpCheckCctor2, _TEXT + +;; +;; Slow path helper for RhpCheckCctor2. +;; +;; Input: +;; rax : Value that must be preserved in this register across the cctor check. +;; rdx : Address of StaticClassConstructionContext structure +;; +;; Output: +;; All volatile registers other than rax may be trashed and the condition codes may also be trashed. +;; +NESTED_ENTRY RhpCheckCctor2__SlowPath, _TEXT + +RhpCheckCctor2__SlowPath_FrameSize equ 20h + 10h + 8h ;; Scratch space + storage to save off rax/rdx value + align stack + + alloc_stack RhpCheckCctor2__SlowPath_FrameSize + save_reg_postrsp rdx, 20h + save_reg_postrsp rax, 28h + + END_PROLOGUE + + ;; Call a C++ helper to retrieve the address of the classlib callback. + + ;; The caller's return address is passed as the argument to the helper; it's an address in the module + ;; and is used by the helper to locate the classlib. + mov rcx, [rsp + RhpCheckCctor2__SlowPath_FrameSize] + + call GetClasslibCCtorCheck + + ;; Rax now contains the address of the classlib method to call. The single argument is the context + ;; structure address currently in stashed on the stack. Clean up and tail call to the classlib + ;; callback so we're not on the stack should a GC occur (so we don't need to worry about transition + ;; frames). + mov rdx, [rsp + 20h] + mov rcx, [rsp + 28h] + add rsp, RhpCheckCctor2__SlowPath_FrameSize + ;; Tail-call the classlib cctor check function. Note that the incoming rax value is moved to rcx + ;; and the classlib cctor check function is required to return that value, so that rax is preserved + ;; across a RhpCheckCctor call. + TAILJMP_RAX + +NESTED_END RhpCheckCctor2__SlowPath, _TEXT + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; void* RhpCopyMultibyteNoGCRefs(void*, void*, size_t) +;; +;; The purpose of this wrapper is to hoist the potential null reference exceptions of copying memory up to a place where +;; the stack unwinder and exception dispatch can properly transform the exception into a managed exception and dispatch +;; it to managed code. +;; +LEAF_ENTRY RhpCopyMultibyteNoGCRefs, _TEXT + + ; rcx dest + ; rdx src + ; r8 count + + test r8, r8 ; check for a zero-length copy + jz NothingToCopy + + ; Now check the dest and src pointers. If they AV, the EH subsystem will recognize the address of the AV, + ; unwind the frame, and fixup the stack to make it look like the (managed) caller AV'ed, which will be + ; translated to a managed exception as usual. +ALTERNATE_ENTRY RhpCopyMultibyteNoGCRefsDestAVLocation + cmp byte ptr [rcx], 0 +ALTERNATE_ENTRY RhpCopyMultibyteNoGCRefsSrcAVLocation + cmp byte ptr [rdx], 0 + + ; tail-call to plain-old-memcpy + jmp memcpy + +NothingToCopy: + mov rax, rcx ; return dest + ret + +LEAF_END RhpCopyMultibyteNoGCRefs, _TEXT + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; void* RhpCopyMultibyte(void*, void*, size_t) +;; +;; The purpose of this wrapper is to hoist the potential null reference exceptions of copying memory up to a place where +;; the stack unwinder and exception dispatch can properly transform the exception into a managed exception and dispatch +;; it to managed code. +;; +LEAF_ENTRY RhpCopyMultibyte, _TEXT + + ; rcx dest + ; rdx src + ; r8 count + + test r8, r8 ; check for a zero-length copy + jz NothingToCopy + + ; Now check the dest and src pointers. If they AV, the EH subsystem will recognize the address of the AV, + ; unwind the frame, and fixup the stack to make it look like the (managed) caller AV'ed, which will be + ; translated to a managed exception as usual. +ALTERNATE_ENTRY RhpCopyMultibyteDestAVLocation + cmp byte ptr [rcx], 0 +ALTERNATE_ENTRY RhpCopyMultibyteSrcAVLocation + cmp byte ptr [rdx], 0 + + ; tail-call to the GC-safe memcpy implementation + jmp memcpyGCRefs + +NothingToCopy: + mov rax, rcx ; return dest + ret + +LEAF_END RhpCopyMultibyte, _TEXT + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; void* RhpCopyMultibyteWithWriteBarrier(void*, void*, size_t) +;; +;; The purpose of this wrapper is to hoist the potential null reference exceptions of copying memory up to a place where +;; the stack unwinder and exception dispatch can properly transform the exception into a managed exception and dispatch +;; it to managed code. +;; Runs a card table update via RhpBulkWriteBarrier after the copy +;; +LEAF_ENTRY RhpCopyMultibyteWithWriteBarrier, _TEXT + + ; rcx dest + ; rdx src + ; r8 count + + test r8, r8 ; check for a zero-length copy + jz NothingToCopy + + ; Now check the dest and src pointers. If they AV, the EH subsystem will recognize the address of the AV, + ; unwind the frame, and fixup the stack to make it look like the (managed) caller AV'ed, which will be + ; translated to a managed exception as usual. +ALTERNATE_ENTRY RhpCopyMultibyteWithWriteBarrierDestAVLocation + cmp byte ptr [rcx], 0 +ALTERNATE_ENTRY RhpCopyMultibyteWithWriteBarrierSrcAVLocation + cmp byte ptr [rdx], 0 + + ; tail-call to the GC-safe memcpy implementation + jmp memcpyGCRefsWithWriteBarrier + +NothingToCopy: + mov rax, rcx ; return dest + ret + +LEAF_END RhpCopyMultibyteWithWriteBarrier, _TEXT + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; void* RhpCopyAnyWithWriteBarrier(void*, void*, size_t) +;; +;; The purpose of this wrapper is to hoist the potential null reference exceptions of copying memory up to a place where +;; the stack unwinder and exception dispatch can properly transform the exception into a managed exception and dispatch +;; it to managed code. +;; Runs a card table update via RhpBulkWriteBarrier after the copy if the copy may contain GC pointers +;; +LEAF_ENTRY RhpCopyAnyWithWriteBarrier, _TEXT + + ; rcx dest + ; rdx src + ; r8 count + + test r8, r8 ; check for a zero-length copy + jz NothingToCopy + + ; Now check the dest and src pointers. If they AV, the EH subsystem will recognize the address of the AV, + ; unwind the frame, and fixup the stack to make it look like the (managed) caller AV'ed, which will be + ; translated to a managed exception as usual. +ALTERNATE_ENTRY RhpCopyAnyWithWriteBarrierDestAVLocation + cmp byte ptr [rcx], 0 +ALTERNATE_ENTRY RhpCopyAnyWithWriteBarrierSrcAVLocation + cmp byte ptr [rdx], 0 + + ; tail-call to the GC-safe memcpy implementation + jmp memcpyAnyWithWriteBarrier + +NothingToCopy: + mov rax, rcx ; return dest + ret + +LEAF_END RhpCopyAnyWithWriteBarrier, _TEXT + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; The following helper will access ("probe") a word on each page of the stack +; starting with the page right beneath rsp down to the one pointed to by r11. +; The procedure is needed to make sure that the "guard" page is pushed down below the allocated stack frame. +; The call to the helper will be emitted by JIT in the function/funclet prolog when large (larger than 0x3000 bytes) stack frame is required. +; +; NOTE: this helper will NOT modify a value of rsp and can be defined as a leaf function. + +PAGE_SIZE equ 1000h + +LEAF_ENTRY RhpStackProbe, _TEXT + ; On entry: + ; r11 - points to the lowest address on the stack frame being allocated (i.e. [InitialSp - FrameSize]) + ; rsp - points to some byte on the last probed page + ; On exit: + ; rax - is not preserved + ; r11 - is preserved + ; + ; NOTE: this helper will probe at least one page below the one pointed by rsp. + + mov rax, rsp ; rax points to some byte on the last probed page + and rax, -PAGE_SIZE ; rax points to the **lowest address** on the last probed page + ; This is done to make the following loop end condition simpler. + +ProbeLoop: + sub rax, PAGE_SIZE ; rax points to the lowest address of the **next page** to probe + test dword ptr [rax], eax ; rax points to the lowest address on the **last probed** page + cmp rax, r11 + jg ProbeLoop ; If (rax > r11), then we need to probe at least one more page. + + ret + +LEAF_END RhpStackProbe, _TEXT + +end diff --git a/src/coreclr/src/nativeaot/Runtime/amd64/PInvoke.S b/src/coreclr/src/nativeaot/Runtime/amd64/PInvoke.S new file mode 100644 index 0000000000000..0048119214508 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/amd64/PInvoke.S @@ -0,0 +1,58 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +.intel_syntax noprefix +#include // generated by the build from AsmOffsets.cpp +#include + +// +// RhpPInvoke +// +// IN: RDI: address of pinvoke frame +// +// This helper assumes that its callsite is as good to start the stackwalk as the actual PInvoke callsite. +// The codegenerator must treat the callsite of this helper as GC triggering and generate the GC info for it. +// Also, the codegenerator must ensure that there are no live GC references in callee saved registers. +// +NESTED_ENTRY RhpPInvoke, _TEXT, NoHandler + push_nonvol_reg rbx + mov rbx, rdi + + // RAX = GetThread() + INLINE_GETTHREAD + + mov r11, [rsp + 0x8] // r11 <- return address + mov qword ptr [rbx + OFFSETOF__PInvokeTransitionFrame__m_pThread], rax + mov qword ptr [rbx + OFFSETOF__PInvokeTransitionFrame__m_FramePointer], rbp + mov qword ptr [rbx + OFFSETOF__PInvokeTransitionFrame__m_RIP], r11 + + lea r11, [rsp + 0x10] // r11 <- caller SP + mov dword ptr [rbx + OFFSETOF__PInvokeTransitionFrame__m_Flags], PTFF_SAVE_RSP + mov qword ptr [rbx + OFFSETOF__PInvokeTransitionFrame__m_PreservedRegs], r11 + + mov qword ptr [rax + OFFSETOF__Thread__m_pTransitionFrame], rbx + + test dword ptr [C_VAR(RhpTrapThreads)], TrapThreadsFlags_TrapThreads + pop_nonvol_reg rbx + jnz 0f // forward branch - predicted not taken + ret +0: + jmp C_FUNC(RhpWaitForSuspend2) +NESTED_END RhpPInvoke, _TEXT + + +// +// RhpPInvokeReturn +// +// IN: RDI: address of pinvoke frame +// +LEAF_ENTRY RhpPInvokeReturn, _TEXT + mov rsi, [rdi + OFFSETOF__PInvokeTransitionFrame__m_pThread] + mov qword ptr [rsi + OFFSETOF__Thread__m_pTransitionFrame], 0 + cmp dword ptr [C_VAR(RhpTrapThreads)], TrapThreadsFlags_None + jne 0f // forward branch - predicted not taken + ret +0: + // passing transition frame pointer in rdi + jmp C_FUNC(RhpWaitForGC2) +LEAF_END RhpPInvokeReturn, _TEXT diff --git a/src/coreclr/src/nativeaot/Runtime/amd64/PInvoke.asm b/src/coreclr/src/nativeaot/Runtime/amd64/PInvoke.asm new file mode 100644 index 0000000000000..c9f93df834a20 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/amd64/PInvoke.asm @@ -0,0 +1,329 @@ +;; Licensed to the .NET Foundation under one or more agreements. +;; The .NET Foundation licenses this file to you under the MIT license. + +include asmmacros.inc + +extern RhpReversePInvokeBadTransition : proc + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; RhpWaitForSuspend -- rare path for RhpPInvoke and RhpReversePInvokeReturn +;; +;; +;; INPUT: none +;; +;; TRASHES: none +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +NESTED_ENTRY RhpWaitForSuspend, _TEXT + push_vol_reg rax + alloc_stack 60h + + ; save the arg regs in the caller's scratch space + save_reg_postrsp rcx, 70h + save_reg_postrsp rdx, 78h + save_reg_postrsp r8, 80h + save_reg_postrsp r9, 88h + + ; save the FP arg regs in our stack frame + save_xmm128_postrsp xmm0, (20h + 0*10h) + save_xmm128_postrsp xmm1, (20h + 1*10h) + save_xmm128_postrsp xmm2, (20h + 2*10h) + save_xmm128_postrsp xmm3, (20h + 3*10h) + + END_PROLOGUE + + test [RhpTrapThreads], TrapThreadsFlags_TrapThreads + jz NoWait + + call RhpWaitForSuspend2 + +NoWait: + movdqa xmm0, [rsp + 20h + 0*10h] + movdqa xmm1, [rsp + 20h + 1*10h] + movdqa xmm2, [rsp + 20h + 2*10h] + movdqa xmm3, [rsp + 20h + 3*10h] + + mov rcx, [rsp + 70h] + mov rdx, [rsp + 78h] + mov r8, [rsp + 80h] + mov r9, [rsp + 88h] + + add rsp, 60h + pop rax + ret + +NESTED_END RhpWaitForSuspend, _TEXT + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; RhpWaitForGCNoAbort -- rare path for RhpPInvokeReturn +;; +;; +;; INPUT: RCX: transition frame +;; +;; TRASHES: RCX, RDX, R8, R9, R10, R11 +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +NESTED_ENTRY RhpWaitForGCNoAbort, _TEXT + push_vol_reg rax ; don't trash the integer return value + alloc_stack 30h + movdqa [rsp + 20h], xmm0 ; don't trash the FP return value + END_PROLOGUE + + mov rdx, [rcx + OFFSETOF__PInvokeTransitionFrame__m_pThread] + + test dword ptr [rdx + OFFSETOF__Thread__m_ThreadStateFlags], TSF_DoNotTriggerGc + jnz Done + + ; passing transition frame pointer in rcx + call RhpWaitForGC2 + +Done: + movdqa xmm0, [rsp + 20h] + add rsp, 30h + pop rax + ret + +NESTED_END RhpWaitForGCNoAbort, _TEXT + +EXTERN RhpThrowHwEx : PROC + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; RhpWaitForGC -- rare path for RhpPInvokeReturn +;; +;; +;; INPUT: RCX: transition frame +;; +;; TRASHES: RCX, RDX, R8, R9, R10, R11 +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +NESTED_ENTRY RhpWaitForGC, _TEXT + push_nonvol_reg rbx + END_PROLOGUE + + mov rbx, rcx + + test [RhpTrapThreads], TrapThreadsFlags_TrapThreads + jz NoWait + + call RhpWaitForGCNoAbort +NoWait: + test [RhpTrapThreads], TrapThreadsFlags_AbortInProgress + jz Done + test dword ptr [rbx + OFFSETOF__PInvokeTransitionFrame__m_Flags], PTFF_THREAD_ABORT + jz Done + + mov rcx, STATUS_REDHAWK_THREAD_ABORT + pop rbx + pop rdx ; return address as exception RIP + jmp RhpThrowHwEx ; Throw the ThreadAbortException as a special kind of hardware exception + +Done: + pop rbx + ret + +NESTED_END RhpWaitForGC, _TEXT + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; RhpReversePInvoke +;; +;; +;; INCOMING: RAX -- address of reverse pinvoke frame +;; 0: save slot for previous M->U transition frame +;; 8: save slot for thread pointer to avoid re-calc in epilog sequence +;; +;; PRESERVES: RCX, RDX, R8, R9 -- need to preserve these because the caller assumes they aren't trashed +;; +;; TRASHES: RAX, R10, R11 +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +LEAF_ENTRY RhpReversePInvoke, _TEXT + ;; R10 = GetThread(), TRASHES R11 + INLINE_GETTHREAD r10, r11 + mov [rax + 8], r10 ; save thread pointer for RhpReversePInvokeReturn + + test dword ptr [r10 + OFFSETOF__Thread__m_ThreadStateFlags], TSF_Attached + jz AttachThread + + ;; + ;; Check for the correct mode. This is accessible via various odd things that we cannot completely + ;; prevent such as : + ;; 1) Registering a reverse pinvoke entrypoint as a vectored exception handler + ;; 2) Performing a managed delegate invoke on a reverse pinvoke delegate. + ;; + cmp qword ptr [r10 + OFFSETOF__Thread__m_pTransitionFrame], 0 + je CheckBadTransition + + ; rax: reverse pinvoke frame + ; r10: thread + + ; Save previous TransitionFrame prior to making the mode transition so that it is always valid + ; whenever we might attempt to hijack this thread. + mov r11, [r10 + OFFSETOF__Thread__m_pTransitionFrame] + mov [rax], r11 + + mov qword ptr [r10 + OFFSETOF__Thread__m_pTransitionFrame], 0 + test [RhpTrapThreads], TrapThreadsFlags_TrapThreads + jnz TrapThread + + ret + +CheckBadTransition: + ;; Allow 'bad transitions' in when the TSF_DoNotTriggerGc mode is set. This allows us to have + ;; [UnmanagedCallersOnly] methods that are called via the "restricted GC callouts" as well as from native, + ;; which is necessary because the methods are CCW vtable methods on interfaces passed to native. + test dword ptr [r10 + OFFSETOF__Thread__m_ThreadStateFlags], TSF_DoNotTriggerGc + jz BadTransition + + ;; RhpTrapThreads will always be set in this case, so we must skip that check. We must be sure to + ;; zero-out our 'previous transition frame' state first, however. + mov qword ptr [rax], 0 + ret + +TrapThread: + ;; put the previous frame back (sets us back to preemptive mode) + mov qword ptr [r10 + OFFSETOF__Thread__m_pTransitionFrame], r11 + +AttachThread: + ; passing address of reverse pinvoke frame in rax + jmp RhpReversePInvokeAttachOrTrapThread + +BadTransition: + mov rcx, qword ptr [rsp] ; arg <- return address + jmp RhpReversePInvokeBadTransition + +LEAF_END RhpReversePInvoke, _TEXT + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; RhpReversePInvokeAttachOrTrapThread +;; +;; +;; INCOMING: RAX -- address of reverse pinvoke frame +;; +;; PRESERVES: RCX, RDX, R8, R9 -- need to preserve these because the caller assumes they aren't trashed +;; +;; TRASHES: RAX, R10, R11 +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +NESTED_ENTRY RhpReversePInvokeAttachOrTrapThread, _TEXT + alloc_stack 88h ; alloc scratch area and frame + + ; save the integer arg regs + save_reg_postrsp rcx, (20h + 0*8) + save_reg_postrsp rdx, (20h + 1*8) + save_reg_postrsp r8, (20h + 2*8) + save_reg_postrsp r9, (20h + 3*8) + + ; save the FP arg regs + save_xmm128_postrsp xmm0, (20h + 4*8 + 0*10h) + save_xmm128_postrsp xmm1, (20h + 4*8 + 1*10h) + save_xmm128_postrsp xmm2, (20h + 4*8 + 2*10h) + save_xmm128_postrsp xmm3, (20h + 4*8 + 3*10h) + + END_PROLOGUE + + mov rcx, rax ; rcx <- reverse pinvoke frame + call RhpReversePInvokeAttachOrTrapThread2 + + movdqa xmm0, [rsp + (20h + 4*8 + 0*10h)] + movdqa xmm1, [rsp + (20h + 4*8 + 1*10h)] + movdqa xmm2, [rsp + (20h + 4*8 + 2*10h)] + movdqa xmm3, [rsp + (20h + 4*8 + 3*10h)] + + mov rcx, [rsp + (20h + 0*8)] + mov rdx, [rsp + (20h + 1*8)] + mov r8, [rsp + (20h + 2*8)] + mov r9, [rsp + (20h + 3*8)] + + ;; epilog + add rsp, 88h + ret + +NESTED_END RhpReversePInvokeAttachOrTrapThread, _TEXT + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; RhpReversePInvokeReturn +;; +;; IN: RCX: address of reverse pinvoke frame +;; +;; TRASHES: RCX, RDX, R10, R11 +;; +;; PRESERVES: RAX (return value) +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +LEAF_ENTRY RhpReversePInvokeReturn, _TEXT + mov rdx, [rcx + 8] ; get Thread pointer + mov rcx, [rcx + 0] ; get previous M->U transition frame + + mov [rdx + OFFSETOF__Thread__m_pTransitionFrame], rcx + cmp [RhpTrapThreads], TrapThreadsFlags_None + jne RhpWaitForSuspend + ret +LEAF_END RhpReversePInvokeReturn, _TEXT + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; RhpPInvoke +;; +;; IN: RCX: address of pinvoke frame +;; +;; TRASHES: R10, R11 +;; +;; This helper assumes that its callsite is as good to start the stackwalk as the actual PInvoke callsite. +;; The codegenerator must treat the callsite of this helper as GC triggering and generate the GC info for it. +;; Also, the codegenerator must ensure that there are no live GC references in callee saved registers. +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +LEAF_ENTRY RhpPInvoke, _TEXT + ;; R10 = GetThread(), TRASHES R11 + INLINE_GETTHREAD r10, r11 + + mov r11, [rsp] ; r11 <- return address + mov qword ptr [rcx + OFFSETOF__PInvokeTransitionFrame__m_pThread], r10 + mov qword ptr [rcx + OFFSETOF__PInvokeTransitionFrame__m_FramePointer], rbp + mov qword ptr [rcx + OFFSETOF__PInvokeTransitionFrame__m_RIP], r11 + + lea r11, [rsp + 8] ; r11 <- caller SP + mov dword ptr [rcx + OFFSETOF__PInvokeTransitionFrame__m_Flags], PTFF_SAVE_RSP + mov qword ptr [rcx + OFFSETOF__PInvokeTransitionFrame__m_PreservedRegs], r11 + + mov qword ptr [r10 + OFFSETOF__Thread__m_pTransitionFrame], rcx + + cmp [RhpTrapThreads], TrapThreadsFlags_None + jne @F ; forward branch - predicted not taken + ret +@@: + jmp RhpWaitForSuspend +LEAF_END RhpPInvoke, _TEXT + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; RhpPInvokeReturn +;; +;; IN: RCX: address of pinvoke frame +;; +;; TRASHES: RCX, RDX, R8, R9, R10, R11 +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +LEAF_ENTRY RhpPInvokeReturn, _TEXT + mov rdx, [rcx + OFFSETOF__PInvokeTransitionFrame__m_pThread] + mov qword ptr [rdx + OFFSETOF__Thread__m_pTransitionFrame], 0 + cmp [RhpTrapThreads], TrapThreadsFlags_None + jne @F ; forward branch - predicted not taken + ret +@@: + ; passing transition frame pointer in rcx + jmp RhpWaitForGC +LEAF_END RhpPInvokeReturn, _TEXT + + +END diff --git a/src/coreclr/src/nativeaot/Runtime/amd64/StubDispatch.S b/src/coreclr/src/nativeaot/Runtime/amd64/StubDispatch.S new file mode 100644 index 0000000000000..8fa74c29616dd --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/amd64/StubDispatch.S @@ -0,0 +1,82 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +.intel_syntax noprefix +#include // generated by the build from AsmOffsets.cpp +#include + +// trick to avoid PLT relocation at runtime which corrupts registers +#define REL_C_FUNC(name) C_FUNC(name)@gotpcrel + + +// Macro that generates a stub consuming a cache with the given number of entries. +.macro DEFINE_INTERFACE_DISPATCH_STUB entries + +LEAF_ENTRY RhpInterfaceDispatch\entries, _TEXT + + // r10 currently contains the indirection cell address. + // load r11 to point to the cache block. + mov r11, [r10 + OFFSETOF__InterfaceDispatchCell__m_pCache] + + // Load the EEType from the object instance in rdi. + mov rax, [rdi] + + CurrentOffset = OFFSETOF__InterfaceDispatchCache__m_rgEntries + + // For each entry in the cache, see if its EEType type matches the EEType in rax. + // If so, call the second cache entry. If not, skip the InterfaceDispatchCacheEntry. + .rept \entries + cmp rax, [r11 + CurrentOffset] + jne 0f + jmp [r11 + CurrentOffset + 8] + 0: + CurrentOffset = CurrentOffset + 16 + .endr + + // r10 still contains the the indirection cell address. + + jmp C_FUNC(RhpInterfaceDispatchSlow) +LEAF_END RhpInterfaceDispatch\entries, _TEXT + +.endm // DEFINE_INTERFACE_DISPATCH_STUB + + + +// Define all the stub routines we currently need. +// +// The mrt100dbi requires these be exported to identify mrt100 code that dispatches back into managed. +// If you change or add any new dispatch stubs, please also change slr.def and dbi\process.cpp CordbProcess::GetExportStepInfo +// +DEFINE_INTERFACE_DISPATCH_STUB 1 +DEFINE_INTERFACE_DISPATCH_STUB 2 +DEFINE_INTERFACE_DISPATCH_STUB 4 +DEFINE_INTERFACE_DISPATCH_STUB 8 +DEFINE_INTERFACE_DISPATCH_STUB 16 +DEFINE_INTERFACE_DISPATCH_STUB 32 +DEFINE_INTERFACE_DISPATCH_STUB 64 + +// Stub dispatch routine for dispatch to a vtable slot +LEAF_ENTRY RhpVTableOffsetDispatch, _TEXT + // UNIXTODO: Implement this function + int 3 +LEAF_END RhpVTableOffsetDispatch, _TEXT + +// Initial dispatch on an interface when we don't have a cache yet. +LEAF_ENTRY RhpInitialInterfaceDispatch, _TEXT +ALTERNATE_ENTRY RhpInitialDynamicInterfaceDispatch + + // Just tail call to the cache miss helper. + jmp C_FUNC(RhpInterfaceDispatchSlow) + +LEAF_END RhpInitialInterfaceDispatch, _TEXT + +// Cache miss case, call the runtime to resolve the target and update the cache. +// Use universal transition helper to allow an exception to flow out of resolution +LEAF_ENTRY RhpInterfaceDispatchSlow, _TEXT + // r10 contains indirection cell address, move to r11 where it will be passed by + // the universal transition thunk as an argument to RhpCidResolve + mov r11, r10 + mov r10, [rip + REL_C_FUNC(RhpCidResolve)] + jmp qword ptr [rip + REL_C_FUNC(RhpUniversalTransition_DebugStepTailCall)] + +LEAF_END RhpInterfaceDispatchSlow, _TEXT diff --git a/src/coreclr/src/nativeaot/Runtime/amd64/StubDispatch.asm b/src/coreclr/src/nativeaot/Runtime/amd64/StubDispatch.asm new file mode 100644 index 0000000000000..11912a958716f --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/amd64/StubDispatch.asm @@ -0,0 +1,108 @@ +;; Licensed to the .NET Foundation under one or more agreements. +;; The .NET Foundation licenses this file to you under the MIT license. + +include AsmMacros.inc + + +ifdef FEATURE_CACHED_INTERFACE_DISPATCH + + +EXTERN RhpCidResolve : PROC +EXTERN RhpUniversalTransition_DebugStepTailCall : PROC + +;; Macro that generates code to check a single cache entry. +CHECK_CACHE_ENTRY macro entry +NextLabel textequ @CatStr( Attempt, %entry+1 ) + cmp rax, [r11 + OFFSETOF__InterfaceDispatchCache__m_rgEntries + (entry * 16)] + jne NextLabel + jmp qword ptr [r11 + OFFSETOF__InterfaceDispatchCache__m_rgEntries + (entry * 16) + 8] +NextLabel: +endm + + +;; Macro that generates a stub consuming a cache with the given number of entries. +DEFINE_INTERFACE_DISPATCH_STUB macro entries + +StubName textequ @CatStr( RhpInterfaceDispatch, entries ) + +LEAF_ENTRY StubName, _TEXT + +;EXTERN CID_g_cInterfaceDispatches : DWORD + ;inc [CID_g_cInterfaceDispatches] + + ;; r10 currently contains the indirection cell address. + ;; load r11 to point to the cache block. + mov r11, [r10 + OFFSETOF__InterfaceDispatchCell__m_pCache] + + ;; Load the EEType from the object instance in rcx. + mov rax, [rcx] + +CurrentEntry = 0 + while CurrentEntry lt entries + CHECK_CACHE_ENTRY %CurrentEntry +CurrentEntry = CurrentEntry + 1 + endm + + ;; r10 still contains the the indirection cell address. + + jmp RhpInterfaceDispatchSlow + +LEAF_END StubName, _TEXT + + endm ;; DEFINE_INTERFACE_DISPATCH_STUB + + +;; Define all the stub routines we currently need. +;; +;; The mrt100dbi requires these be exported to identify mrt100 code that dispatches back into managed. +;; If you change or add any new dispatch stubs, please also change slr.def and dbi\process.cpp CordbProcess::GetExportStepInfo +;; +DEFINE_INTERFACE_DISPATCH_STUB 1 +DEFINE_INTERFACE_DISPATCH_STUB 2 +DEFINE_INTERFACE_DISPATCH_STUB 4 +DEFINE_INTERFACE_DISPATCH_STUB 8 +DEFINE_INTERFACE_DISPATCH_STUB 16 +DEFINE_INTERFACE_DISPATCH_STUB 32 +DEFINE_INTERFACE_DISPATCH_STUB 64 + +;; Stub dispatch routine for dispatch to a vtable slot +LEAF_ENTRY RhpVTableOffsetDispatch, _TEXT + ;; r10 currently contains the indirection cell address. + ;; load rax to point to the vtable offset (which is stored in the m_pCache field). + mov rax, [r10 + OFFSETOF__InterfaceDispatchCell__m_pCache] + + ;; Load the EEType from the object instance in rcx, and add it to the vtable offset + ;; to get the address in the vtable of what we want to dereference + add rax, [rcx] + + ;; Load the target address of the vtable into rax + mov rax, [rax] + + TAILJMP_RAX +LEAF_END RhpVTableOffsetDispatch, _TEXT + + +;; Initial dispatch on an interface when we don't have a cache yet. +LEAF_ENTRY RhpInitialInterfaceDispatch, _TEXT +ALTERNATE_ENTRY RhpInitialDynamicInterfaceDispatch + + ;; Just tail call to the cache miss helper. + jmp RhpInterfaceDispatchSlow + +LEAF_END RhpInitialInterfaceDispatch, _TEXT + +;; Cache miss case, call the runtime to resolve the target and update the cache. +;; Use universal transition helper to allow an exception to flow out of resolution +LEAF_ENTRY RhpInterfaceDispatchSlow, _TEXT + ;; r10 contains indirection cell address, move to r11 where it will be passed by + ;; the universal transition thunk as an argument to RhpCidResolve + mov r11, r10 + lea r10, RhpCidResolve + jmp RhpUniversalTransition_DebugStepTailCall + +LEAF_END RhpInterfaceDispatchSlow, _TEXT + + +endif ;; FEATURE_CACHED_INTERFACE_DISPATCH + +end diff --git a/src/coreclr/src/nativeaot/Runtime/amd64/ThunkPoolThunks.asm b/src/coreclr/src/nativeaot/Runtime/amd64/ThunkPoolThunks.asm new file mode 100644 index 0000000000000..e2700d7fda390 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/amd64/ThunkPoolThunks.asm @@ -0,0 +1,291 @@ +;; Licensed to the .NET Foundation under one or more agreements. +;; The .NET Foundation licenses this file to you under the MIT license. + +;; ----------------------------------------------------------------------------------------------------------- +;;#include "asmmacros.inc" +;; ----------------------------------------------------------------------------------------------------------- + +LEAF_ENTRY macro Name, Section + Section segment para 'CODE' + align 16 + public Name + Name proc +endm + +NAMED_LEAF_ENTRY macro Name, Section, SectionAlias + Section segment para alias(SectionAlias) 'CODE' + align 16 + public Name + Name proc +endm + +LEAF_END macro Name, Section + Name endp + Section ends +endm + +NAMED_READONLY_DATA_SECTION macro Section, SectionAlias + Section segment alias(SectionAlias) read 'DATA' + align 16 + DQ 0 + Section ends +endm + +NAMED_READWRITE_DATA_SECTION macro Section, SectionAlias + Section segment alias(SectionAlias) read write 'DATA' + align 16 + DQ 0 + Section ends +endm + + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; STUBS & DATA SECTIONS ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +THUNK_CODESIZE equ 10h ;; 7-byte lea, 6-byte jmp, 3 bytes of nops +THUNK_DATASIZE equ 010h ;; 2 qwords + +THUNK_POOL_NUM_THUNKS_PER_PAGE equ 0FAh ;; 250 thunks per page + +PAGE_SIZE equ 01000h ;; 4K +POINTER_SIZE equ 08h + + +LOAD_DATA_ADDRESS macro groupIndex, index, thunkPool + ALIGN 10h ;; make sure we align to 16-byte boundary for CFG table + + ;; set r10 to begining of data page : r10 <- [thunkPool] + PAGE_SIZE + ;; fix offset of the data : r10 <- r10 + (THUNK_DATASIZE * current thunk's index) + lea r10, [thunkPool + PAGE_SIZE + (groupIndex * THUNK_DATASIZE * 10 + THUNK_DATASIZE * index)] +endm + +JUMP_TO_COMMON macro groupIndex, index, thunkPool + ;; jump to the location pointed at by the last qword in the data page + jmp qword ptr[thunkPool + PAGE_SIZE + PAGE_SIZE - POINTER_SIZE] +endm + +TenThunks macro groupIndex, thunkPool + ;; Each thunk will load the address of its corresponding data (from the page that immediately follows) + ;; and call a common stub. The address of the common stub is setup by the caller (first qword + ;; in the thunks data section, hence the +8's below) depending on the 'kind' of thunks needed (interop, + ;; fat function pointers, etc...) + + ;; Each data block used by a thunk consists of two qword values: + ;; - Context: some value given to the thunk as context (passed in r10). Example for fat-fptrs: context = generic dictionary + ;; - Target : target code that the thunk eventually jumps to. + + LOAD_DATA_ADDRESS groupIndex,0,thunkPool + JUMP_TO_COMMON groupIndex,0,thunkPool + + LOAD_DATA_ADDRESS groupIndex,1,thunkPool + JUMP_TO_COMMON groupIndex,1,thunkPool + + LOAD_DATA_ADDRESS groupIndex,2,thunkPool + JUMP_TO_COMMON groupIndex,2,thunkPool + + LOAD_DATA_ADDRESS groupIndex,3,thunkPool + JUMP_TO_COMMON groupIndex,3,thunkPool + + LOAD_DATA_ADDRESS groupIndex,4,thunkPool + JUMP_TO_COMMON groupIndex,4,thunkPool + + LOAD_DATA_ADDRESS groupIndex,5,thunkPool + JUMP_TO_COMMON groupIndex,5,thunkPool + + LOAD_DATA_ADDRESS groupIndex,6,thunkPool + JUMP_TO_COMMON groupIndex,6,thunkPool + + LOAD_DATA_ADDRESS groupIndex,7,thunkPool + JUMP_TO_COMMON groupIndex,7,thunkPool + + LOAD_DATA_ADDRESS groupIndex,8,thunkPool + JUMP_TO_COMMON groupIndex,8,thunkPool + + LOAD_DATA_ADDRESS groupIndex,9,thunkPool + JUMP_TO_COMMON groupIndex,9,thunkPool +endm + +THUNKS_PAGE_BLOCK macro thunkPool + TenThunks 0,thunkPool + TenThunks 1,thunkPool + TenThunks 2,thunkPool + TenThunks 3,thunkPool + TenThunks 4,thunkPool + TenThunks 5,thunkPool + TenThunks 6,thunkPool + TenThunks 7,thunkPool + TenThunks 8,thunkPool + TenThunks 9,thunkPool + TenThunks 10,thunkPool + TenThunks 11,thunkPool + TenThunks 12,thunkPool + TenThunks 13,thunkPool + TenThunks 14,thunkPool + TenThunks 15,thunkPool + TenThunks 16,thunkPool + TenThunks 17,thunkPool + TenThunks 18,thunkPool + TenThunks 19,thunkPool + TenThunks 20,thunkPool + TenThunks 21,thunkPool + TenThunks 22,thunkPool + TenThunks 23,thunkPool + TenThunks 24,thunkPool +endm + +;; +;; The first thunks section should be 64K aligned because it can get +;; mapped multiple times in memory, and mapping works on allocation +;; granularity boundaries (we don't want to map more than what we need) +;; +;; The easiest way to do so is by having the thunks section at the +;; first 64K aligned virtual address in the binary. We provide a section +;; layout file to the linker to tell it how to layout the thunks sections +;; that we care about. (ndp\rh\src\runtime\DLLs\app\mrt100_app_sectionlayout.txt) +;; +;; The PE spec says images cannot have gaps between sections (other +;; than what is required by the section alignment value in the header), +;; therefore we need a couple of padding data sections (otherwise the +;; OS will not load the image). +;; + +NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment0, ".pad0" +NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment1, ".pad1" +NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment2, ".pad2" +NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment3, ".pad3" +NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment4, ".pad4" +NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment5, ".pad5" +NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment6, ".pad6" +NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment7, ".pad7" +NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment8, ".pad8" +NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment9, ".pad9" +NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment10, ".pad10" +NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment11, ".pad11" +NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment12, ".pad12" +NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment13, ".pad13" +NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment14, ".pad14" + +;; +;; Thunk Stubs +;; NOTE: Keep number of blocks in sync with macro/constant named 'NUM_THUNK_BLOCKS' in: +;; - ndp\FxCore\src\System.Private.CoreLib\System\Runtime\InteropServices\ThunkPool.cs +;; - ndp\rh\src\tools\rhbind\zapimage.h +;; +NAMED_LEAF_ENTRY ThunkPool, TKS0, ".tks0" + THUNKS_PAGE_BLOCK ThunkPool +LEAF_END ThunkPool, TKS0 + +NAMED_READWRITE_DATA_SECTION ThunkData0, ".tkd0" + +NAMED_LEAF_ENTRY ThunkPool1, TKS1, ".tks1" + THUNKS_PAGE_BLOCK ThunkPool1 +LEAF_END ThunkPool1, TKS1 + +NAMED_READWRITE_DATA_SECTION ThunkData1, ".tkd1" + +NAMED_LEAF_ENTRY ThunkPool2, TKS2, ".tks2" + THUNKS_PAGE_BLOCK ThunkPool2 +LEAF_END ThunkPool2, TKS2 + +NAMED_READWRITE_DATA_SECTION ThunkData2, ".tkd2" + +NAMED_LEAF_ENTRY ThunkPool3, TKS3, ".tks3" + THUNKS_PAGE_BLOCK ThunkPool3 +LEAF_END ThunkPool3, TKS3 + +NAMED_READWRITE_DATA_SECTION ThunkData3, ".tkd3" + +NAMED_LEAF_ENTRY ThunkPool4, TKS4, ".tks4" + THUNKS_PAGE_BLOCK ThunkPool4 +LEAF_END ThunkPool4, TKS4 + +NAMED_READWRITE_DATA_SECTION ThunkData4, ".tkd4" + +NAMED_LEAF_ENTRY ThunkPool5, TKS5, ".tks5" + THUNKS_PAGE_BLOCK ThunkPool5 +LEAF_END ThunkPool5, TKS5 + +NAMED_READWRITE_DATA_SECTION ThunkData5, ".tkd5" + +NAMED_LEAF_ENTRY ThunkPool6, TKS6, ".tks6" + THUNKS_PAGE_BLOCK ThunkPool6 +LEAF_END ThunkPool6, TKS6 + +NAMED_READWRITE_DATA_SECTION ThunkData6, ".tkd6" + +NAMED_LEAF_ENTRY ThunkPool7, TKS7, ".tks7" + THUNKS_PAGE_BLOCK ThunkPool7 +LEAF_END ThunkPool7, TKS7 + +NAMED_READWRITE_DATA_SECTION ThunkData7, ".tkd7" + +;; +;; IntPtr RhpGetThunksBase() +;; +LEAF_ENTRY RhpGetThunksBase, _TEXT + ;; Return the address of the first thunk pool to the caller (this is really the base address) + lea rax, [ThunkPool] + ret +LEAF_END RhpGetThunksBase, _TEXT + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; General Helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; +;; int RhpGetNumThunksPerBlock() +;; +LEAF_ENTRY RhpGetNumThunksPerBlock, _TEXT + mov rax, THUNK_POOL_NUM_THUNKS_PER_PAGE + ret +LEAF_END RhpGetNumThunksPerBlock, _TEXT + +;; +;; int RhpGetThunkSize() +;; +LEAF_ENTRY RhpGetThunkSize, _TEXT + mov rax, THUNK_CODESIZE + ret +LEAF_END RhpGetThunkSize, _TEXT + +;; +;; int RhpGetNumThunkBlocksPerMapping() +;; +LEAF_ENTRY RhpGetNumThunkBlocksPerMapping, _TEXT + mov rax, 8 + ret +LEAF_END RhpGetNumThunkBlocksPerMapping, _TEXT + +;; +;; int RhpGetThunkBlockSize +;; +LEAF_ENTRY RhpGetThunkBlockSize, _TEXT + mov rax, PAGE_SIZE * 2 + ret +LEAF_END RhpGetThunkBlockSize, _TEXT + +;; +;; IntPtr RhpGetThunkDataBlockAddress(IntPtr thunkStubAddress) +;; +LEAF_ENTRY RhpGetThunkDataBlockAddress, _TEXT + mov rax, rcx + mov rcx, PAGE_SIZE - 1 + not rcx + and rax, rcx + add rax, PAGE_SIZE + ret +LEAF_END RhpGetThunkDataBlockAddress, _TEXT + +;; +;; IntPtr RhpGetThunkStubsBlockAddress(IntPtr thunkDataAddress) +;; +LEAF_ENTRY RhpGetThunkStubsBlockAddress, _TEXT + mov rax, rcx + mov rcx, PAGE_SIZE - 1 + not rcx + and rax, rcx + sub rax, PAGE_SIZE + ret +LEAF_END RhpGetThunkStubsBlockAddress, _TEXT + + +end diff --git a/src/coreclr/src/nativeaot/Runtime/amd64/UniversalTransition.S b/src/coreclr/src/nativeaot/Runtime/amd64/UniversalTransition.S new file mode 100644 index 0000000000000..9ad56f8965438 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/amd64/UniversalTransition.S @@ -0,0 +1,162 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +.intel_syntax noprefix +#include + +#ifdef FEATURE_DYNAMIC_CODE + +#ifdef _DEBUG +#define TRASH_SAVED_ARGUMENT_REGISTERS 1 +#endif + +#define SIZEOF_RETADDR 8 + +#define SIZEOF_RETURN_BLOCK 0x10 // for 16 bytes of conservatively reported space that the callee can + // use to manage the return value that the call eventually generates + +#define SIZEOF_ARGUMENT_REGISTERS 0x30 // Callee register spill + +// +// From CallerSP to ChildSP, the stack frame is composed of the following adjacent regions: +// +// SIZEOF_RETADDR +// SIZEOF_ARGUMENT_REGISTERS +// SIZEOF_RETURN_BLOCK +// SIZEOF_FP_REGS +// + +#define DISTANCE_FROM_CHILDSP_TO_FP_REGS 0 + +#define DISTANCE_FROM_CHILDSP_TO_RETURN_BLOCK SIZEOF_FP_REGS + +#define DISTANCE_FROM_CHILDSP_TO_ARGUMENT_REGISTERS (SIZEOF_FP_REGS + SIZEOF_RETURN_BLOCK) + +#define DISTANCE_FROM_CHILDSP_TO_RETADDR (SIZEOF_FP_REGS + SIZEOF_RETURN_BLOCK + SIZEOF_ARGUMENT_REGISTERS + 8) + +// +// Defines an assembly thunk used to make a transition from managed code to a callee, +// then (based on the return value from the callee), either returning or jumping to +// a new location while preserving the input arguments. The usage of this thunk also +// ensures arguments passed are properly reported. +// +// TODO: This code currently only tailcalls, and does not return. +// +// Inputs: +// rdi, esi, rcx, rdx, r8, r9, stack space: arguments as normal +// r10: The location of the target code the UniversalTransition thunk will call +// r11: The only parameter to the target function (passed in rdx to callee) +// + +// +// Frame layout is: +// +// {StackPassedArgs} ChildSP+0D0 CallerSP+000 +// {CallerRetaddr} ChildSP+0C8 CallerSP-008 +// {AlignmentPad (0x8 bytes)} ChildSP+0C0 CallerSP-010 +// {IntArgRegs (0x30 bytes)} ChildSP+090 CallerSP-040 +// {ReturnBlock (0x10 bytes)} ChildSP+080 CallerSP-050 +// {FpArgRegs (xmm0-xmm7) (0x80 bytes)} ChildSP+000 CallerSP-0D0 +// {CalleeRetaddr} ChildSP-008 CallerSP-0D8 +// +// NOTE: If the frame layout ever changes, the C++ UniversalTransitionStackFrame structure +// must be updated as well. +// +// NOTE: The callee receives a pointer to the base of the ReturnBlock, and the callee has +// knowledge of the exact layout of all pieces of the frame that lie at or above the pushed +// FpArgRegs. +// +// NOTE: The stack walker guarantees that conservative GC reporting will be applied to +// everything between the base of the ReturnBlock and the top of the StackPassedArgs. +// + +.macro UNIVERSAL_TRANSITION FunctionName + +NESTED_ENTRY Rhp\FunctionName, _TEXT, NoHandler + + alloc_stack DISTANCE_FROM_CHILDSP_TO_RETADDR + + // save integer argument registers + mov [rsp + DISTANCE_FROM_CHILDSP_TO_ARGUMENT_REGISTERS + 0x00], rdi + mov [rsp + DISTANCE_FROM_CHILDSP_TO_ARGUMENT_REGISTERS + 0x08], rsi + mov [rsp + DISTANCE_FROM_CHILDSP_TO_ARGUMENT_REGISTERS + 0x10], rcx + mov [rsp + DISTANCE_FROM_CHILDSP_TO_ARGUMENT_REGISTERS + 0x18], rdx + mov [rsp + DISTANCE_FROM_CHILDSP_TO_ARGUMENT_REGISTERS + 0x20], r8 + mov [rsp + DISTANCE_FROM_CHILDSP_TO_ARGUMENT_REGISTERS + 0x28], r9 + + // save fp argument registers + movdqa [rsp + DISTANCE_FROM_CHILDSP_TO_FP_REGS + 0x00], xmm0 + movdqa [rsp + DISTANCE_FROM_CHILDSP_TO_FP_REGS + 0x10], xmm1 + movdqa [rsp + DISTANCE_FROM_CHILDSP_TO_FP_REGS + 0x20], xmm2 + movdqa [rsp + DISTANCE_FROM_CHILDSP_TO_FP_REGS + 0x30], xmm3 + movdqa [rsp + DISTANCE_FROM_CHILDSP_TO_FP_REGS + 0x40], xmm4 + movdqa [rsp + DISTANCE_FROM_CHILDSP_TO_FP_REGS + 0x50], xmm5 + movdqa [rsp + DISTANCE_FROM_CHILDSP_TO_FP_REGS + 0x60], xmm6 + movdqa [rsp + DISTANCE_FROM_CHILDSP_TO_FP_REGS + 0x70], xmm7 + +#ifdef TRASH_SAVED_ARGUMENT_REGISTERS + + // Before calling out, trash all of the argument registers except the ones (rdi, rsi) that + // hold outgoing arguments. All of these registers have been saved to the transition + // frame, and the code at the call target is required to use only the transition frame + // copies when dispatching this call to the eventual callee. + + movsd xmm0, [C_VAR(RhpFpTrashValues) + 0x0] + movsd xmm1, [C_VAR(RhpFpTrashValues) + 0x8] + movsd xmm2, [C_VAR(RhpFpTrashValues) + 0x10] + movsd xmm3, [C_VAR(RhpFpTrashValues) + 0x18] + movsd xmm4, [C_VAR(RhpFpTrashValues) + 0x20] + movsd xmm5, [C_VAR(RhpFpTrashValues) + 0x28] + movsd xmm6, [C_VAR(RhpFpTrashValues) + 0x30] + movsd xmm7, [C_VAR(RhpFpTrashValues) + 0x38] + + mov rcx, qword ptr [C_VAR(RhpIntegerTrashValues) + 0x10] + mov rdx, qword ptr [C_VAR(RhpIntegerTrashValues) + 0x18] + mov r8, qword ptr [C_VAR(RhpIntegerTrashValues) + 0x20] + mov r9, qword ptr [C_VAR(RhpIntegerTrashValues) + 0x28] + +#endif // TRASH_SAVED_ARGUMENT_REGISTERS + + // + // Call out to the target, while storing and reporting arguments to the GC. + // + mov rsi, r11 + lea rdi, [rsp + DISTANCE_FROM_CHILDSP_TO_RETURN_BLOCK] + call r10 + + EXPORT_POINTER_TO_ADDRESS PointerToReturnFrom\FunctionName + + // restore fp argument registers + movdqa xmm0, [rsp + DISTANCE_FROM_CHILDSP_TO_FP_REGS + 0x00] + movdqa xmm1, [rsp + DISTANCE_FROM_CHILDSP_TO_FP_REGS + 0x10] + movdqa xmm2, [rsp + DISTANCE_FROM_CHILDSP_TO_FP_REGS + 0x20] + movdqa xmm3, [rsp + DISTANCE_FROM_CHILDSP_TO_FP_REGS + 0x30] + movdqa xmm4, [rsp + DISTANCE_FROM_CHILDSP_TO_FP_REGS + 0x40] + movdqa xmm5, [rsp + DISTANCE_FROM_CHILDSP_TO_FP_REGS + 0x50] + movdqa xmm6, [rsp + DISTANCE_FROM_CHILDSP_TO_FP_REGS + 0x60] + movdqa xmm7, [rsp + DISTANCE_FROM_CHILDSP_TO_FP_REGS + 0x70] + + // restore integer argument registers + mov rdi, [rsp + DISTANCE_FROM_CHILDSP_TO_ARGUMENT_REGISTERS + 0x00] + mov rsi, [rsp + DISTANCE_FROM_CHILDSP_TO_ARGUMENT_REGISTERS + 0x08] + mov rcx, [rsp + DISTANCE_FROM_CHILDSP_TO_ARGUMENT_REGISTERS + 0x10] + mov rdx, [rsp + DISTANCE_FROM_CHILDSP_TO_ARGUMENT_REGISTERS + 0x18] + mov r8, [rsp + DISTANCE_FROM_CHILDSP_TO_ARGUMENT_REGISTERS + 0x20] + mov r9, [rsp + DISTANCE_FROM_CHILDSP_TO_ARGUMENT_REGISTERS + 0x28] + + // Pop the space that was allocated between the ChildSP and the caller return address. + free_stack DISTANCE_FROM_CHILDSP_TO_RETADDR + + jmp rax + +NESTED_END Rhp\FunctionName, _TEXT + +.endm // UNIVERSAL_TRANSITION + + // To enable proper step-in behavior in the debugger, we need to have two instances + // of the thunk. For the first one, the debugger steps into the call in the function, + // for the other, it steps over it. + UNIVERSAL_TRANSITION UniversalTransition + UNIVERSAL_TRANSITION UniversalTransition_DebugStepTailCall + +#endif // FEATURE_DYNAMIC_CODE diff --git a/src/coreclr/src/nativeaot/Runtime/amd64/UniversalTransition.asm b/src/coreclr/src/nativeaot/Runtime/amd64/UniversalTransition.asm new file mode 100644 index 0000000000000..398c8bd487028 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/amd64/UniversalTransition.asm @@ -0,0 +1,167 @@ +;; Licensed to the .NET Foundation under one or more agreements. +;; The .NET Foundation licenses this file to you under the MIT license. + +include AsmMacros.inc + +ifdef FEATURE_DYNAMIC_CODE + +ifdef _DEBUG +TRASH_SAVED_ARGUMENT_REGISTERS equ 1 +else +TRASH_SAVED_ARGUMENT_REGISTERS equ 0 +endif + +if TRASH_SAVED_ARGUMENT_REGISTERS ne 0 +EXTERN RhpIntegerTrashValues : QWORD +EXTERN RhpFpTrashValues : QWORD +endif ;; TRASH_SAVED_ARGUMENT_REGISTERS + +SIZEOF_RETADDR equ 8h + +SIZEOF_ALIGNMENT_PADDING equ 8h + +SIZEOF_RETURN_BLOCK equ 10h ; for 16 bytes of conservatively reported space that the callee can + ; use to manage the return value that the call eventually generates + +SIZEOF_FP_REGS equ 40h ; xmm0-3 + +SIZEOF_OUT_REG_HOMES equ 20h ; Callee register spill + +; +; From CallerSP to ChildSP, the stack frame is composed of the following adjacent regions: +; +; SIZEOF_RETADDR +; SIZEOF_ALIGNMENT_PADDING +; SIZEOF_RETURN_BLOCK +; SIZEOF_FP_REGS +; SIZEOF_OUT_REG_HOMES +; + +DISTANCE_FROM_CHILDSP_TO_FP_REGS equ SIZEOF_OUT_REG_HOMES + +DISTANCE_FROM_CHILDSP_TO_RETURN_BLOCK equ DISTANCE_FROM_CHILDSP_TO_FP_REGS + SIZEOF_FP_REGS + +DISTANCE_FROM_CHILDSP_TO_RETADDR equ DISTANCE_FROM_CHILDSP_TO_RETURN_BLOCK + SIZEOF_RETURN_BLOCK + SIZEOF_ALIGNMENT_PADDING + +DISTANCE_FROM_CHILDSP_TO_CALLERSP equ DISTANCE_FROM_CHILDSP_TO_RETADDR + SIZEOF_RETADDR + +.errnz DISTANCE_FROM_CHILDSP_TO_CALLERSP mod 16 + +;; +;; Defines an assembly thunk used to make a transition from managed code to a callee, +;; then (based on the return value from the callee), either returning or jumping to +;; a new location while preserving the input arguments. The usage of this thunk also +;; ensures arguments passed are properly reported. +;; +;; TODO: This code currently only tailcalls, and does not return. +;; +;; Inputs: +;; rcx, rdx, r8, r9, stack space: arguments as normal +;; r10: The location of the target code the UniversalTransition thunk will call +;; r11: The only parameter to the target function (passed in rdx to callee) +;; + +; +; Frame layout is: +; +; {StackPassedArgs} ChildSP+0a0 CallerSP+020 +; {IntArgRegs (rcx,rdx,r8,r9) (0x20 bytes)} ChildSP+080 CallerSP+000 +; {CallerRetaddr} ChildSP+078 CallerSP-008 +; {AlignmentPad (0x8 bytes)} ChildSP+070 CallerSP-010 +; {ReturnBlock (0x10 bytes)} ChildSP+060 CallerSP-020 +; {FpArgRegs (xmm0-xmm3) (0x40 bytes)} ChildSP+020 CallerSP-060 +; {CalleeArgumentHomes (0x20 bytes)} ChildSP+000 CallerSP-080 +; {CalleeRetaddr} ChildSP-008 CallerSP-088 +; +; NOTE: If the frame layout ever changes, the C++ UniversalTransitionStackFrame structure +; must be updated as well. +; +; NOTE: The callee receives a pointer to the base of the ReturnBlock, and the callee has +; knowledge of the exact layout of all pieces of the frame that lie at or above the pushed +; FpArgRegs. +; +; NOTE: The stack walker guarantees that conservative GC reporting will be applied to +; everything between the base of the ReturnBlock and the top of the StackPassedArgs. +; + +UNIVERSAL_TRANSITION macro FunctionName + +NESTED_ENTRY Rhp&FunctionName, _TEXT + + alloc_stack DISTANCE_FROM_CHILDSP_TO_RETADDR + + save_reg_postrsp rcx, 0h + DISTANCE_FROM_CHILDSP_TO_CALLERSP + save_reg_postrsp rdx, 8h + DISTANCE_FROM_CHILDSP_TO_CALLERSP + save_reg_postrsp r8, 10h + DISTANCE_FROM_CHILDSP_TO_CALLERSP + save_reg_postrsp r9, 18h + DISTANCE_FROM_CHILDSP_TO_CALLERSP + + save_xmm128_postrsp xmm0, DISTANCE_FROM_CHILDSP_TO_FP_REGS + save_xmm128_postrsp xmm1, DISTANCE_FROM_CHILDSP_TO_FP_REGS + 10h + save_xmm128_postrsp xmm2, DISTANCE_FROM_CHILDSP_TO_FP_REGS + 20h + save_xmm128_postrsp xmm3, DISTANCE_FROM_CHILDSP_TO_FP_REGS + 30h + + END_PROLOGUE + +if TRASH_SAVED_ARGUMENT_REGISTERS ne 0 + + ; Before calling out, trash all of the argument registers except the ones (rcx, rdx) that + ; hold outgoing arguments. All of these registers have been saved to the transition + ; frame, and the code at the call target is required to use only the transition frame + ; copies when dispatching this call to the eventual callee. + + movsd xmm0, mmword ptr [RhpFpTrashValues + 0h] + movsd xmm1, mmword ptr [RhpFpTrashValues + 8h] + movsd xmm2, mmword ptr [RhpFpTrashValues + 10h] + movsd xmm3, mmword ptr [RhpFpTrashValues + 18h] + + mov r8, qword ptr [RhpIntegerTrashValues + 10h] + mov r9, qword ptr [RhpIntegerTrashValues + 18h] + +endif ; TRASH_SAVED_ARGUMENT_REGISTERS + + ; + ; Call out to the target, while storing and reporting arguments to the GC. + ; + mov rdx, r11 + lea rcx, [rsp + DISTANCE_FROM_CHILDSP_TO_RETURN_BLOCK] + call r10 + + EXPORT_POINTER_TO_ADDRESS PointerToReturnFrom&FunctionName + + ; We cannot make the label public as that tricks DIA stackwalker into thinking + ; it's the beginning of a method. For this reason we export the address + ; by means of an auxiliary variable. + + ; restore fp argument registers + movdqa xmm0, [rsp + DISTANCE_FROM_CHILDSP_TO_FP_REGS ] + movdqa xmm1, [rsp + DISTANCE_FROM_CHILDSP_TO_FP_REGS + 10h] + movdqa xmm2, [rsp + DISTANCE_FROM_CHILDSP_TO_FP_REGS + 20h] + movdqa xmm3, [rsp + DISTANCE_FROM_CHILDSP_TO_FP_REGS + 30h] + + ; restore integer argument registers + mov rcx, [rsp + 0h + DISTANCE_FROM_CHILDSP_TO_CALLERSP] + mov rdx, [rsp + 8h + DISTANCE_FROM_CHILDSP_TO_CALLERSP] + mov r8, [rsp + 10h + DISTANCE_FROM_CHILDSP_TO_CALLERSP] + mov r9, [rsp + 18h + DISTANCE_FROM_CHILDSP_TO_CALLERSP] + + ; epilog + nop + + ; Pop the space that was allocated between the ChildSP and the caller return address. + add rsp, DISTANCE_FROM_CHILDSP_TO_RETADDR + + TAILJMP_RAX + +NESTED_END Rhp&FunctionName, _TEXT + + endm + + ; To enable proper step-in behavior in the debugger, we need to have two instances + ; of the thunk. For the first one, the debugger steps into the call in the function, + ; for the other, it steps over it. + UNIVERSAL_TRANSITION UniversalTransition + UNIVERSAL_TRANSITION UniversalTransition_DebugStepTailCall + +endif + +end diff --git a/src/coreclr/src/nativeaot/Runtime/amd64/WriteBarriers.S b/src/coreclr/src/nativeaot/Runtime/amd64/WriteBarriers.S new file mode 100644 index 0000000000000..2927acd0241fc --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/amd64/WriteBarriers.S @@ -0,0 +1,287 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +.intel_syntax noprefix +#include + +#ifdef WRITE_BARRIER_CHECK + +.macro UPDATE_GC_SHADOW BASENAME, REFREG, DESTREG + + // If g_GCShadow is 0, don't perform the check. + cmp qword ptr [C_VAR(g_GCShadow)], 0 + je LOCAL_LABEL(\BASENAME\()_UpdateShadowHeap_Done_\REFREG) + + // Save DESTREG since we're about to modify it (and we need the original value both within the macro and + // once we exit the macro). Note that this is naughty since we're altering the stack pointer outside of + // the prolog inside a method without a frame. But given that this is only debug code and generally we + // shouldn't be walking the stack at this point it seems preferable to recoding the all the barrier + // variants to set up frames. The compiler knows exactly which registers are trashed in the simple write + // barrier case, so we don't have any more scratch registers to play with (and doing so would only make + // things harder if at a later stage we want to allow multiple barrier versions based on the input + // registers). + push \DESTREG + + // Transform DESTREG into the equivalent address in the shadow heap. + sub \DESTREG, [C_VAR(g_lowest_address)] + jb LOCAL_LABEL(\BASENAME\()_UpdateShadowHeap_PopThenDone_\REFREG) + add \DESTREG, [C_VAR(g_GCShadow)] + cmp \DESTREG, [C_VAR(g_GCShadowEnd)] + ja LOCAL_LABEL(\BASENAME\()_UpdateShadowHeap_PopThenDone_\REFREG) + + // Update the shadow heap. + mov [\DESTREG], \REFREG + + // Now check that the real heap location still contains the value we just wrote into the shadow heap. This + // read must be strongly ordered wrt to the previous write to prevent race conditions. We also need to + // recover the old value of DESTREG for the comparison so use an xchg instruction (which has an implicit lock + // prefix). + xchg [rsp], \DESTREG + cmp [\DESTREG], \REFREG + jne LOCAL_LABEL(\BASENAME\()_UpdateShadowHeap_Invalidate_\REFREG) + + // The original DESTREG value is now restored but the stack has a value (the shadow version of the + // location) pushed. Need to discard this push before we are done. + add rsp, 8 + jmp LOCAL_LABEL(\BASENAME\()_UpdateShadowHeap_Done_\REFREG) + +LOCAL_LABEL(\BASENAME\()_UpdateShadowHeap_Invalidate_\REFREG): + // Someone went and updated the real heap. We need to invalidate the shadow location since we can't + // guarantee whose shadow update won. + + // Retrieve shadow location from the stack and restore original DESTREG to the stack. This is an + // additional memory barrier we don't require but it's on the rare path and x86 doesn't have an xchg + // variant that doesn't implicitly specify the lock prefix. Note that INVALIDGCVALUE is a 64-bit + // immediate and therefore must be moved into a register before it can be written to the shadow + // location. + xchg [rsp], \DESTREG + push \REFREG + movabs \REFREG, INVALIDGCVALUE + mov qword ptr [\DESTREG], \REFREG + pop \REFREG + +LOCAL_LABEL(\BASENAME\()_UpdateShadowHeap_PopThenDone_\REFREG): + // Restore original DESTREG value from the stack. + pop \DESTREG + +LOCAL_LABEL(\BASENAME\()_UpdateShadowHeap_Done_\REFREG): +.endm + +#else // WRITE_BARRIER_CHECK + +.macro UPDATE_GC_SHADOW BASENAME, REFREG, DESTREG +.endm + +#endif // WRITE_BARRIER_CHECK + +// There are several different helpers used depending on which register holds the object reference. Since all +// the helpers have identical structure we use a macro to define this structure. Two arguments are taken, the +// name of the register that points to the location to be updated and the name of the register that holds the +// object reference (this should be in upper case as it's used in the definition of the name of the helper). +.macro DEFINE_UNCHECKED_WRITE_BARRIER_CORE BASENAME, REFREG + + // Update the shadow copy of the heap with the same value just written to the same heap. (A no-op unless + // we're in a debug build and write barrier checking has been enabled). + UPDATE_GC_SHADOW \BASENAME, \REFREG, rdi + + // If the reference is to an object that's not in an ephemeral generation we have no need to track it + // (since the object won't be collected or moved by an ephemeral collection). + cmp \REFREG, [C_VAR(g_ephemeral_low)] + jb LOCAL_LABEL(\BASENAME\()_NoBarrierRequired_\REFREG) + cmp \REFREG, [C_VAR(g_ephemeral_high)] + jae LOCAL_LABEL(\BASENAME\()_NoBarrierRequired_\REFREG) + + // We have a location on the GC heap being updated with a reference to an ephemeral object so we must + // track this write. The location address is translated into an offset in the card table bitmap. We set + // an entire byte in the card table since it's quicker than messing around with bitmasks and we only write + // the byte if it hasn't already been done since writes are expensive and impact scaling. + shr rdi, 11 + add rdi, [C_VAR(g_card_table)] + cmp byte ptr [rdi], 0x0FF + jne LOCAL_LABEL(\BASENAME\()_UpdateCardTable_\REFREG) + +LOCAL_LABEL(\BASENAME\()_NoBarrierRequired_\REFREG): + ret + +// We get here if it's necessary to update the card table. +LOCAL_LABEL(\BASENAME\()_UpdateCardTable_\REFREG): + mov byte ptr [rdi], 0x0FF + ret + +.endm + +// There are several different helpers used depending on which register holds the object reference. Since all +// the helpers have identical structure we use a macro to define this structure. One argument is taken, the +// name of the register that will hold the object reference (this should be in upper case as it's used in the +// definition of the name of the helper). +.macro DEFINE_UNCHECKED_WRITE_BARRIER REFREG, EXPORT_REG_NAME + +// Define a helper with a name of the form RhpAssignRefEAX etc. (along with suitable calling standard +// decoration). The location to be updated is in DESTREG. The object reference that will be assigned into that +// location is in one of the other general registers determined by the value of REFREG. + +// WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular: +// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen on the first instruction +// - Function "UnwindWriteBarrierToCaller" assumes the stack contains just the pushed return address +LEAF_ENTRY RhpAssignRef\EXPORT_REG_NAME, _TEXT + + // Export the canonical write barrier under unqualified name as well + .ifc \REFREG, RSI + ALTERNATE_ENTRY RhpAssignRef + ALTERNATE_ENTRY RhpAssignRefAVLocation + .endif + + // Write the reference into the location. Note that we rely on the fact that no GC can occur between here + // and the card table update we may perform below. + mov qword ptr [rdi], \REFREG + + DEFINE_UNCHECKED_WRITE_BARRIER_CORE RhpAssignRef, \REFREG + +LEAF_END RhpAssignRef\EXPORT_REG_NAME, _TEXT +.endm + +// One day we might have write barriers for all the possible argument registers but for now we have +// just one write barrier that assumes the input register is RSI. +DEFINE_UNCHECKED_WRITE_BARRIER RSI, ESI + +// +// Define the helpers used to implement the write barrier required when writing an object reference into a +// location residing on the GC heap. Such write barriers allow the GC to optimize which objects in +// non-ephemeral generations need to be scanned for references to ephemeral objects during an ephemeral +// collection. +// + +.macro DEFINE_CHECKED_WRITE_BARRIER_CORE BASENAME, REFREG + + // The location being updated might not even lie in the GC heap (a handle or stack location for instance), + // in which case no write barrier is required. + cmp rdi, [C_VAR(g_lowest_address)] + jb LOCAL_LABEL(\BASENAME\()_NoBarrierRequired_\REFREG) + cmp rdi, [C_VAR(g_highest_address)] + jae LOCAL_LABEL(\BASENAME\()_NoBarrierRequired_\REFREG) + + DEFINE_UNCHECKED_WRITE_BARRIER_CORE \BASENAME, \REFREG + +.endm + +// There are several different helpers used depending on which register holds the object reference. Since all +// the helpers have identical structure we use a macro to define this structure. One argument is taken, the +// name of the register that will hold the object reference (this should be in upper case as it's used in the +// definition of the name of the helper). +.macro DEFINE_CHECKED_WRITE_BARRIER REFREG, EXPORT_REG_NAME + +// Define a helper with a name of the form RhpCheckedAssignRefEAX etc. (along with suitable calling standard +// decoration). The location to be updated is always in RDI. The object reference that will be assigned into +// that location is in one of the other general registers determined by the value of REFREG. + +// WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular: +// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen on the first instruction +// - Function "UnwindWriteBarrierToCaller" assumes the stack contains just the pushed return address +LEAF_ENTRY RhpCheckedAssignRef\EXPORT_REG_NAME, _TEXT + + // Export the canonical write barrier under unqualified name as well + .ifc \REFREG, RSI + ALTERNATE_ENTRY RhpCheckedAssignRef + ALTERNATE_ENTRY RhpCheckedAssignRefAVLocation + .endif + + // Write the reference into the location. Note that we rely on the fact that no GC can occur between here + // and the card table update we may perform below. + mov qword ptr [rdi], \REFREG + + DEFINE_CHECKED_WRITE_BARRIER_CORE RhpCheckedAssignRef, \REFREG + +LEAF_END RhpCheckedAssignRef\EXPORT_REG_NAME, _TEXT +.endm + +// One day we might have write barriers for all the possible argument registers but for now we have +// just one write barrier that assumes the input register is RSI. +DEFINE_CHECKED_WRITE_BARRIER RSI, ESI + +// WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular: +// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at RhpCheckedLockCmpXchgAVLocation +// - Function "UnwindWriteBarrierToCaller" assumes the stack contains just the pushed return address +LEAF_ENTRY RhpCheckedLockCmpXchg, _TEXT + mov rax, rdx +ALTERNATE_ENTRY RhpCheckedLockCmpXchgAVLocation + lock cmpxchg [rdi], rsi + jne LOCAL_LABEL(RhpCheckedLockCmpXchg_NoBarrierRequired_RSI) + + DEFINE_CHECKED_WRITE_BARRIER_CORE RhpCheckedLockCmpXchg, RSI + +LEAF_END RhpCheckedLockCmpXchg, _TEXT + +// WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular: +// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at RhpCheckedXchgAVLocation +// - Function "UnwindWriteBarrierToCaller" assumes the stack contains just the pushed return address +LEAF_ENTRY RhpCheckedXchg, _TEXT + + // Setup rax with the new object for the exchange, that way it will automatically hold the correct result + // afterwards and we can leave rdx unaltered ready for the GC write barrier below. + mov rax, rsi +ALTERNATE_ENTRY RhpCheckedXchgAVLocation + xchg [rdi], rax + + DEFINE_CHECKED_WRITE_BARRIER_CORE RhpCheckedXchg, RSI + +LEAF_END RhpCheckedXchg, _TEXT + +// +// RhpByRefAssignRef simulates movs instruction for object references. +// +// On entry: +// rdi: address of ref-field (assigned to) +// rsi: address of the data (source) +// rcx: be trashed +// +// On exit: +// rdi, rsi are incremented by 8, +// rcx: trashed +// +LEAF_ENTRY RhpByRefAssignRef, _TEXT + mov rcx, [rsi] + mov [rdi], rcx + + // Check whether the writes were even into the heap. If not there's no card update required. + cmp rdi, [C_VAR(g_lowest_address)] + jb LOCAL_LABEL(RhpByRefAssignRef_NotInHeap) + cmp rdi, [C_VAR(g_highest_address)] + jae LOCAL_LABEL(RhpByRefAssignRef_NotInHeap) + + // Update the shadow copy of the heap with the same value just written to the same heap. (A no-op unless + // we're in a debug build and write barrier checking has been enabled). + UPDATE_GC_SHADOW BASENAME, rcx, rdi + + // If the reference is to an object that's not in an ephemeral generation we have no need to track it + // (since the object won't be collected or moved by an ephemeral collection). + cmp rcx, [C_VAR(g_ephemeral_low)] + jb LOCAL_LABEL(RhpByRefAssignRef_NotInHeap) + cmp rcx, [C_VAR(g_ephemeral_high)] + jae LOCAL_LABEL(RhpByRefAssignRef_NotInHeap) + + // move current rdi value into rcx and then increment the pointers + mov rcx, rdi + add rsi, 0x8 + add rdi, 0x8 + + // We have a location on the GC heap being updated with a reference to an ephemeral object so we must + // track this write. The location address is translated into an offset in the card table bitmap. We set + // an entire byte in the card table since it's quicker than messing around with bitmasks and we only write + // the byte if it hasn't already been done since writes are expensive and impact scaling. + shr rcx, 11 + add rcx, [C_VAR(g_card_table)] + cmp byte ptr [rcx], 0x0FF + jne LOCAL_LABEL(RhpByRefAssignRef_UpdateCardTable) + ret + +// We get here if it's necessary to update the card table. +LOCAL_LABEL(RhpByRefAssignRef_UpdateCardTable): + mov byte ptr [rcx], 0x0FF + ret + +LOCAL_LABEL(RhpByRefAssignRef_NotInHeap): + // Increment the pointers before leaving + add rdi, 0x8 + add rsi, 0x8 + ret +LEAF_END RhpByRefAssignRef, _TEXT diff --git a/src/coreclr/src/nativeaot/Runtime/amd64/WriteBarriers.asm b/src/coreclr/src/nativeaot/Runtime/amd64/WriteBarriers.asm new file mode 100644 index 0000000000000..67ecf7dc01728 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/amd64/WriteBarriers.asm @@ -0,0 +1,305 @@ +;; Licensed to the .NET Foundation under one or more agreements. +;; The .NET Foundation licenses this file to you under the MIT license. + +include AsmMacros.inc + +;; Macro used to copy contents of newly updated GC heap locations to a shadow copy of the heap. This is used +;; during garbage collections to verify that object references where never written to the heap without using a +;; write barrier. Note that we're potentially racing to update the shadow heap while other threads are writing +;; new references to the real heap. Since this can't be solved perfectly without critical sections around the +;; entire update process, we instead update the shadow location and then re-check the real location (as two +;; ordered operations) and if there is a disparity we'll re-write the shadow location with a special value +;; (INVALIDGCVALUE) which disables the check for that location. Since the shadow heap is only validated at GC +;; time and these write barrier operations are atomic wrt to GCs this is sufficient to guarantee that the +;; shadow heap contains only valid copies of real heap values or INVALIDGCVALUE. +ifdef WRITE_BARRIER_CHECK + +g_GCShadow TEXTEQU +g_GCShadowEnd TEXTEQU +INVALIDGCVALUE EQU 0CCCCCCCDh + +EXTERN g_GCShadow : QWORD +EXTERN g_GCShadowEnd : QWORD + +UPDATE_GC_SHADOW macro BASENAME, REFREG, DESTREG + + ;; If g_GCShadow is 0, don't perform the check. + cmp g_GCShadow, 0 + je &BASENAME&_UpdateShadowHeap_Done_&REFREG& + + ;; Save DESTREG since we're about to modify it (and we need the original value both within the macro and + ;; once we exit the macro). Note that this is naughty since we're altering the stack pointer outside of + ;; the prolog inside a method without a frame. But given that this is only debug code and generally we + ;; shouldn't be walking the stack at this point it seems preferable to recoding the all the barrier + ;; variants to set up frames. Unlike RhpBulkWriteBarrier below which is treated as a helper call using the + ;; usual calling convention, the compiler knows exactly which registers are trashed in the simple write + ;; barrier case, so we don't have any more scratch registers to play with (and doing so would only make + ;; things harder if at a later stage we want to allow multiple barrier versions based on the input + ;; registers). + push DESTREG + + ;; Transform DESTREG into the equivalent address in the shadow heap. + sub DESTREG, g_lowest_address + jb &BASENAME&_UpdateShadowHeap_PopThenDone_&REFREG& + add DESTREG, [g_GCShadow] + cmp DESTREG, [g_GCShadowEnd] + ja &BASENAME&_UpdateShadowHeap_PopThenDone_&REFREG& + + ;; Update the shadow heap. + mov [DESTREG], REFREG + + ;; Now check that the real heap location still contains the value we just wrote into the shadow heap. This + ;; read must be strongly ordered wrt to the previous write to prevent race conditions. We also need to + ;; recover the old value of DESTREG for the comparison so use an xchg instruction (which has an implicit lock + ;; prefix). + xchg [rsp], DESTREG + cmp [DESTREG], REFREG + jne &BASENAME&_UpdateShadowHeap_Invalidate_&REFREG& + + ;; The original DESTREG value is now restored but the stack has a value (the shadow version of the + ;; location) pushed. Need to discard this push before we are done. + add rsp, 8 + jmp &BASENAME&_UpdateShadowHeap_Done_&REFREG& + +&BASENAME&_UpdateShadowHeap_Invalidate_&REFREG&: + ;; Someone went and updated the real heap. We need to invalidate the shadow location since we can't + ;; guarantee whose shadow update won. + + ;; Retrieve shadow location from the stack and restore original DESTREG to the stack. This is an + ;; additional memory barrier we don't require but it's on the rare path and x86 doesn't have an xchg + ;; variant that doesn't implicitly specify the lock prefix. Note that INVALIDGCVALUE is a 64-bit + ;; immediate and therefore must be moved into a register before it can be written to the shadow + ;; location. + xchg [rsp], DESTREG + push REFREG + mov REFREG, INVALIDGCVALUE + mov qword ptr [DESTREG], REFREG + pop REFREG + +&BASENAME&_UpdateShadowHeap_PopThenDone_&REFREG&: + ;; Restore original DESTREG value from the stack. + pop DESTREG + +&BASENAME&_UpdateShadowHeap_Done_&REFREG&: +endm + +else ; WRITE_BARRIER_CHECK + +UPDATE_GC_SHADOW macro BASENAME, REFREG, DESTREG +endm + +endif ; WRITE_BARRIER_CHECK + +;; There are several different helpers used depending on which register holds the object reference. Since all +;; the helpers have identical structure we use a macro to define this structure. Two arguments are taken, the +;; name of the register that points to the location to be updated and the name of the register that holds the +;; object reference (this should be in upper case as it's used in the definition of the name of the helper). +DEFINE_UNCHECKED_WRITE_BARRIER_CORE macro BASENAME, REFREG + + ;; Update the shadow copy of the heap with the same value just written to the same heap. (A no-op unless + ;; we're in a debug build and write barrier checking has been enabled). + UPDATE_GC_SHADOW BASENAME, REFREG, rcx + + ;; If the reference is to an object that's not in an ephemeral generation we have no need to track it + ;; (since the object won't be collected or moved by an ephemeral collection). + cmp REFREG, [g_ephemeral_low] + jb &BASENAME&_NoBarrierRequired_&REFREG& + cmp REFREG, [g_ephemeral_high] + jae &BASENAME&_NoBarrierRequired_&REFREG& + + ;; We have a location on the GC heap being updated with a reference to an ephemeral object so we must + ;; track this write. The location address is translated into an offset in the card table bitmap. We set + ;; an entire byte in the card table since it's quicker than messing around with bitmasks and we only write + ;; the byte if it hasn't already been done since writes are expensive and impact scaling. + shr rcx, 11 + add rcx, [g_card_table] + cmp byte ptr [rcx], 0FFh + jne &BASENAME&_UpdateCardTable_&REFREG& + +&BASENAME&_NoBarrierRequired_&REFREG&: + ret + +;; We get here if it's necessary to update the card table. +&BASENAME&_UpdateCardTable_&REFREG&: + mov byte ptr [rcx], 0FFh + ret + +endm + +;; There are several different helpers used depending on which register holds the object reference. Since all +;; the helpers have identical structure we use a macro to define this structure. One argument is taken, the +;; name of the register that will hold the object reference (this should be in upper case as it's used in the +;; definition of the name of the helper). +DEFINE_UNCHECKED_WRITE_BARRIER macro REFREG, EXPORT_REG_NAME + +;; Define a helper with a name of the form RhpAssignRefEAX etc. (along with suitable calling standard +;; decoration). The location to be updated is in DESTREG. The object reference that will be assigned into that +;; location is in one of the other general registers determined by the value of REFREG. + +;; WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular: +;; - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen on the first instruction +;; - Function "UnwindWriteBarrierToCaller" assumes the stack contains just the pushed return address +LEAF_ENTRY RhpAssignRef&EXPORT_REG_NAME&, _TEXT + + ;; Export the canonical write barrier under unqualified name as well + ifidni , + ALTERNATE_ENTRY RhpAssignRef + ALTERNATE_ENTRY RhpAssignRefAVLocation + endif + + ;; Write the reference into the location. Note that we rely on the fact that no GC can occur between here + ;; and the card table update we may perform below. + mov qword ptr [rcx], REFREG + + DEFINE_UNCHECKED_WRITE_BARRIER_CORE RhpAssignRef, REFREG + +LEAF_END RhpAssignRef&EXPORT_REG_NAME&, _TEXT +endm + +;; One day we might have write barriers for all the possible argument registers but for now we have +;; just one write barrier that assumes the input register is RDX. +DEFINE_UNCHECKED_WRITE_BARRIER RDX, EDX + +;; +;; Define the helpers used to implement the write barrier required when writing an object reference into a +;; location residing on the GC heap. Such write barriers allow the GC to optimize which objects in +;; non-ephemeral generations need to be scanned for references to ephemeral objects during an ephemeral +;; collection. +;; + +DEFINE_CHECKED_WRITE_BARRIER_CORE macro BASENAME, REFREG + + ;; The location being updated might not even lie in the GC heap (a handle or stack location for instance), + ;; in which case no write barrier is required. + cmp rcx, [g_lowest_address] + jb &BASENAME&_NoBarrierRequired_&REFREG& + cmp rcx, [g_highest_address] + jae &BASENAME&_NoBarrierRequired_&REFREG& + + DEFINE_UNCHECKED_WRITE_BARRIER_CORE BASENAME, REFREG + +endm + +;; There are several different helpers used depending on which register holds the object reference. Since all +;; the helpers have identical structure we use a macro to define this structure. One argument is taken, the +;; name of the register that will hold the object reference (this should be in upper case as it's used in the +;; definition of the name of the helper). +DEFINE_CHECKED_WRITE_BARRIER macro REFREG, EXPORT_REG_NAME + +;; Define a helper with a name of the form RhpCheckedAssignRefEAX etc. (along with suitable calling standard +;; decoration). The location to be updated is always in RCX. The object reference that will be assigned into +;; that location is in one of the other general registers determined by the value of REFREG. + +;; WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular: +;; - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen on the first instruction +;; - Function "UnwindWriteBarrierToCaller" assumes the stack contains just the pushed return address +LEAF_ENTRY RhpCheckedAssignRef&EXPORT_REG_NAME&, _TEXT + + ;; Export the canonical write barrier under unqualified name as well + ifidni , + ALTERNATE_ENTRY RhpCheckedAssignRef + ALTERNATE_ENTRY RhpCheckedAssignRefAVLocation + endif + + ;; Write the reference into the location. Note that we rely on the fact that no GC can occur between here + ;; and the card table update we may perform below. + mov qword ptr [rcx], REFREG + + DEFINE_CHECKED_WRITE_BARRIER_CORE RhpCheckedAssignRef, REFREG + +LEAF_END RhpCheckedAssignRef&EXPORT_REG_NAME&, _TEXT +endm + +;; One day we might have write barriers for all the possible argument registers but for now we have +;; just one write barrier that assumes the input register is RDX. +DEFINE_CHECKED_WRITE_BARRIER RDX, EDX + +;; WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular: +;; - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at RhpCheckedLockCmpXchgAVLocation +;; - Function "UnwindWriteBarrierToCaller" assumes the stack contains just the pushed return address +LEAF_ENTRY RhpCheckedLockCmpXchg, _TEXT + mov rax, r8 +ALTERNATE_ENTRY RhpCheckedLockCmpXchgAVLocation + lock cmpxchg [rcx], rdx + jne RhpCheckedLockCmpXchg_NoBarrierRequired_RDX + + DEFINE_CHECKED_WRITE_BARRIER_CORE RhpCheckedLockCmpXchg, RDX + +LEAF_END RhpCheckedLockCmpXchg, _TEXT + +;; WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular: +;; - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at RhpCheckedXchgAVLocation +;; - Function "UnwindWriteBarrierToCaller" assumes the stack contains just the pushed return address +LEAF_ENTRY RhpCheckedXchg, _TEXT + + ;; Setup rax with the new object for the exchange, that way it will automatically hold the correct result + ;; afterwards and we can leave rdx unaltered ready for the GC write barrier below. + mov rax, rdx +ALTERNATE_ENTRY RhpCheckedXchgAVLocation + xchg [rcx], rax + + DEFINE_CHECKED_WRITE_BARRIER_CORE RhpCheckedXchg, RDX + +LEAF_END RhpCheckedXchg, _TEXT + +;; +;; RhpByRefAssignRef simulates movs instruction for object references. +;; +;; On entry: +;; rdi: address of ref-field (assigned to) +;; rsi: address of the data (source) +;; rcx: be trashed +;; +;; On exit: +;; rdi, rsi are incremented by 8, +;; rcx: trashed +;; +LEAF_ENTRY RhpByRefAssignRef, _TEXT + mov rcx, [rsi] + mov [rdi], rcx + + ;; Check whether the writes were even into the heap. If not there's no card update required. + cmp rdi, [g_lowest_address] + jb RhpByRefAssignRef_NotInHeap + cmp rdi, [g_highest_address] + jae RhpByRefAssignRef_NotInHeap + + ;; Update the shadow copy of the heap with the same value just written to the same heap. (A no-op unless + ;; we're in a debug build and write barrier checking has been enabled). + UPDATE_GC_SHADOW BASENAME, rcx, rdi + + ;; If the reference is to an object that's not in an ephemeral generation we have no need to track it + ;; (since the object won't be collected or moved by an ephemeral collection). + cmp rcx, [g_ephemeral_low] + jb RhpByRefAssignRef_NotInHeap + cmp rcx, [g_ephemeral_high] + jae RhpByRefAssignRef_NotInHeap + + ;; move current rdi value into rcx and then increment the pointers + mov rcx, rdi + add rsi, 8h + add rdi, 8h + + ;; We have a location on the GC heap being updated with a reference to an ephemeral object so we must + ;; track this write. The location address is translated into an offset in the card table bitmap. We set + ;; an entire byte in the card table since it's quicker than messing around with bitmasks and we only write + ;; the byte if it hasn't already been done since writes are expensive and impact scaling. + shr rcx, 11 + add rcx, [g_card_table] + cmp byte ptr [rcx], 0FFh + jne RhpByRefAssignRef_UpdateCardTable + ret + +;; We get here if it's necessary to update the card table. +RhpByRefAssignRef_UpdateCardTable: + mov byte ptr [rcx], 0FFh + ret + +RhpByRefAssignRef_NotInHeap: + ; Increment the pointers before leaving + add rdi, 8h + add rsi, 8h + ret +LEAF_END RhpByRefAssignRef, _TEXT + + end diff --git a/src/coreclr/src/nativeaot/Runtime/arm/AllocFast.S b/src/coreclr/src/nativeaot/Runtime/arm/AllocFast.S new file mode 100644 index 0000000000000..e72a3ef105b66 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/arm/AllocFast.S @@ -0,0 +1,555 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +.syntax unified +.thumb + +#include // generated by the build from AsmOffsets.cpp +#include + +// Allocate non-array, non-finalizable object. If the allocation doesn't fit into the current thread's +// allocation context then automatically fallback to the slow allocation path. +// r0 == EEType +LEAF_ENTRY RhpNewFast, _TEXT + PROLOG_PUSH "{r4,lr}" + mov r4, r0 // save EEType + + // r0 = GetThread() + INLINE_GETTHREAD + + // r4 contains EEType pointer + ldr r2, [r4, #OFFSETOF__EEType__m_uBaseSize] + + // r0: Thread pointer + // r4: EEType pointer + // r2: base size + + ldr r3, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] + add r2, r3 + ldr r1, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_limit] + cmp r2, r1 + bhi LOCAL_LABEL(RhpNewFast_RarePath) + + // set the new alloc pointer + str r2, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] + + // Set the new object's EEType pointer + str r4, [r3, #OFFSETOF__Object__m_pEEType] + + mov r0, r3 + + EPILOG_POP "{r4,pc}" + +LOCAL_LABEL(RhpNewFast_RarePath): + mov r0, r4 // restore EEType + mov r1, #0 + EPILOG_POP "{r4,lr}" + b C_FUNC(RhpNewObject) + +LEAF_END RhpNewFast, _TEXT + +// Allocate non-array object with finalizer. +// r0 == EEType +// +LEAF_ENTRY RhpNewFinalizable, _TEXT + mov r1, #GC_ALLOC_FINALIZE + b C_FUNC(RhpNewObject) +LEAF_END RhpNewFinalizable, _TEXT + + +// Allocate non-array object. +// r0 == EEType +// r1 == alloc flags +NESTED_ENTRY RhpNewObject, _TEXT, NoHandler + + PUSH_COOP_PINVOKE_FRAME r3 + + // r0: EEType + // r1: alloc flags + // r3: transition frame + + // Preserve the EEType in r5. + mov r5, r0 + + ldr r2, [r0, #OFFSETOF__EEType__m_uBaseSize] // cbSize + + // void* RhpGcAlloc(EEType *pEEType, UInt32 uFlags, UIntNative cbSize, void * pTransitionFrame) + blx C_FUNC(RhpGcAlloc) + + // Set the new object's EEType pointer on success. + cbz r0, LOCAL_LABEL(NewOutOfMemory) + str r5, [r0, #OFFSETOF__Object__m_pEEType] + + // If the object is bigger than RH_LARGE_OBJECT_SIZE, we must publish it to the BGC + ldr r1, [r5, #OFFSETOF__EEType__m_uBaseSize] + movw r2, #(RH_LARGE_OBJECT_SIZE & 0xFFFF) + movt r2, #(RH_LARGE_OBJECT_SIZE >> 16) + cmp r1, r2 + blo LOCAL_LABEL(New_SkipPublish) + + // r0: already contains object + // r1: already contains object size + + bl C_FUNC(RhpPublishObject) + // r0: function returned the passed-in object + +LOCAL_LABEL(New_SkipPublish): + + POP_COOP_PINVOKE_FRAME + bx lr + +LOCAL_LABEL(NewOutOfMemory): + // This is the OOM failure path. We're going to tail-call to a managed helper that will throw + // an out of memory exception that the caller of this allocator understands. + + mov r0, r5 // EEType pointer + mov r1, #0 // Indicate that we should throw OOM. + + POP_COOP_PINVOKE_FRAME + + b C_FUNC(RhExceptionHandling_FailedAllocation) + +NESTED_END RhpNewObject, _TEXT + + +// Allocate a string. +// r0 == EEType +// r1 == element/character count +LEAF_ENTRY RhNewString, _TEXT + PROLOG_PUSH "{r4-r6,lr}" + // Make sure computing the overall allocation size won't overflow + MOV32 r12, MAX_STRING_LENGTH + cmp r1, r12 + bhi LOCAL_LABEL(StringSizeOverflow) + + // Compute overall allocation size (align(base size + (element size * elements), 4)). + mov r2, #(STRING_BASE_SIZE + 3) +#if STRING_COMPONENT_SIZE == 2 + add r2, r2, r1, lsl #1 // r2 += characters * 2 +#else + NotImplementedComponentSize +#endif + bic r2, r2, #3 + + mov r4, r0 // Save EEType + mov r5, r1 // Save element count + mov r6, r2 // Save string size + // r0 = GetThread() + INLINE_GETTHREAD + // r4 == EEType + // r5 == element count + // r6 == string size + // r0 == Thread* + + // Load potential new object address into r12. + ldr r12, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] + + // Determine whether the end of the object would lie outside of the current allocation context. If so, + // we abandon the attempt to allocate the object directly and fall back to the slow helper. + adds r6, r12 + bcs LOCAL_LABEL(RhNewString_RarePath) // if we get a carry here, the string is too large to fit below 4 GB + + ldr r12, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_limit] + cmp r6, r12 + bhi LOCAL_LABEL(RhNewString_RarePath) + + // Reload new object address into r12. + ldr r12, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] + + // Update the alloc pointer to account for the allocation. + str r6, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] + + // Set the new object's EEType pointer and element count. + str r4, [r12, #OFFSETOF__Object__m_pEEType] + str r5, [r12, #OFFSETOF__String__m_Length] + + // Return the object allocated in r0. + mov r0, r12 + EPILOG_POP "{r4-r6,pc}" + +LOCAL_LABEL(StringSizeOverflow): + // We get here if the size of the final string object can't be represented as an unsigned + // 32-bit value. We're going to tail-call to a managed helper that will throw + // an OOM exception that the caller of this allocator understands. + + // EEType is in r0 already + mov r1, 0 // Indicate that we should throw OOM + EPILOG_POP "{r4-r6,lr}" + b C_FUNC(RhExceptionHandling_FailedAllocation) + +LOCAL_LABEL(RhNewString_RarePath): + mov r3, r0 + mov r0, r4 + mov r1, r5 + mov r2, r6 + // r0 == EEType + // r1 == element count + // r2 == string size + Thread::m_alloc_context::alloc_ptr + // r3 == Thread + EPILOG_POP "{r4-r6,lr}" + b C_FUNC(RhpNewArrayRare) + +LEAF_END RhNewString, _TEXT + + +// Allocate one dimensional, zero based array (SZARRAY). +// r0 == EEType +// r1 == element count +LEAF_ENTRY RhpNewArray, _TEXT + PROLOG_PUSH "{r4-r6,lr}" + + // Compute overall allocation size (align(base size + (element size * elements), 4)). + // if the element count is <= 0x10000, no overflow is possible because the component + // size is <= 0xffff (it's an unsigned 16-bit value) and thus the product is <= 0xffff0000 + // and the base size for the worst case (32 dimensional MdArray) is less than 0xffff. + ldrh r2, [r0, #OFFSETOF__EEType__m_usComponentSize] + cmp r1, #0x10000 + bhi LOCAL_LABEL(ArraySizeBig) + umull r2, r3, r2, r1 + ldr r3, [r0, #OFFSETOF__EEType__m_uBaseSize] + adds r2, r3 + adds r2, #3 +LOCAL_LABEL(ArrayAlignSize): + bic r2, r2, #3 + + mov r4, r0 // Save EEType + mov r5, r1 // Save element count + mov r6, r2 // Save array size + // r0 = GetThread() + INLINE_GETTHREAD + // r4 == EEType + // r5 == element count + // r6 == array size + // r0 == Thread* + + // Load potential new object address into r12. + ldr r12, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] + + // Determine whether the end of the object would lie outside of the current allocation context. If so, + // we abandon the attempt to allocate the object directly and fall back to the slow helper. + adds r6, r12 + bcs LOCAL_LABEL(RhpNewArray_RarePath) // if we get a carry here, the array is too large to fit below 4 GB + + ldr r12, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_limit] + cmp r6, r12 + bhi LOCAL_LABEL(RhpNewArray_RarePath) + + // Reload new object address into r12. + ldr r12, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] + + // Update the alloc pointer to account for the allocation. + str r6, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] + + // Set the new object's EEType pointer and element count. + str r4, [r12, #OFFSETOF__Object__m_pEEType] + str r5, [r12, #OFFSETOF__Array__m_Length] + + // Return the object allocated in r0. + mov r0, r12 + EPILOG_POP "{r4-r6,pc}" + +LOCAL_LABEL(ArraySizeBig): + // if the element count is negative, it's an overflow error + cmp r1, #0 + blt LOCAL_LABEL(ArraySizeOverflow) + + // now we know the element count is in the signed int range [0..0x7fffffff] + // overflow in computing the total size of the array size gives an out of memory exception, + // NOT an overflow exception + // we already have the component size in r2 + umull r2, r3, r2, r1 + cbnz r3, LOCAL_LABEL(ArrayOutOfMemoryFinal) + ldr r3, [r0, #OFFSETOF__EEType__m_uBaseSize] + adds r2, r3 + bcs LOCAL_LABEL(ArrayOutOfMemoryFinal) + adds r2, #3 + bcs LOCAL_LABEL(ArrayOutOfMemoryFinal) + b LOCAL_LABEL(ArrayAlignSize) + +LOCAL_LABEL(ArrayOutOfMemoryFinal): + + // EEType is in r0 already + mov r1, #0 // Indicate that we should throw OOM. + EPILOG_POP "{r4-r6,lr}" + b C_FUNC(RhExceptionHandling_FailedAllocation) + +LOCAL_LABEL(ArraySizeOverflow): + // We get here if the size of the final array object can't be represented as an unsigned + // 32-bit value. We're going to tail-call to a managed helper that will throw + // an overflow exception that the caller of this allocator understands. + + // EEType is in r0 already + mov r1, #1 // Indicate that we should throw OverflowException + EPILOG_POP "{r4-r6,lr}" + b C_FUNC(RhExceptionHandling_FailedAllocation) + +LOCAL_LABEL(RhpNewArray_RarePath): + mov r3, r0 + mov r0, r4 + mov r1, r5 + mov r2, r6 + // r0 == EEType + // r1 == element count + // r2 == array size + Thread::m_alloc_context::alloc_ptr + // r3 == Thread + EPILOG_POP "{r4-r6,lr}" + b C_FUNC(RhpNewArrayRare) + +LEAF_END RhpNewArray, _TEXT + + +// Allocate one dimensional, zero based array (SZARRAY) using the slow path that calls a runtime helper. +// r0 == EEType +// r1 == element count +// r2 == array size + Thread::m_alloc_context::alloc_ptr +// r3 == Thread +NESTED_ENTRY RhpNewArrayRare, _TEXT, NoHandler + + // Recover array size by subtracting the alloc_ptr from r2. + ldr r12, [r3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] + sub r2, r12 + + PUSH_COOP_PINVOKE_FRAME r3 + + // Preserve the EEType in r5 and element count in r6. + mov r5, r0 + mov r6, r1 + + mov r7, r2 // Save array size in r7 + + mov r1, #0 // uFlags + + // void* RhpGcAlloc(EEType *pEEType, UInt32 uFlags, UIntNative cbSize, void * pTransitionFrame) + blx C_FUNC(RhpGcAlloc) + + // Test for failure (NULL return). + cbz r0, LOCAL_LABEL(ArrayOutOfMemory) + + // Success, set the array's type and element count in the new object. + str r5, [r0, #OFFSETOF__Object__m_pEEType] + str r6, [r0, #OFFSETOF__Array__m_Length] + + // If the object is bigger than RH_LARGE_OBJECT_SIZE, we must publish it to the BGC + movw r2, #(RH_LARGE_OBJECT_SIZE & 0xFFFF) + movt r2, #(RH_LARGE_OBJECT_SIZE >> 16) + cmp r7, r2 + blo LOCAL_LABEL(NewArray_SkipPublish) + + // r0: already contains object + mov r1, r7 // r1: object size + bl C_FUNC(RhpPublishObject) + // r0: function returned the passed-in object + +LOCAL_LABEL(NewArray_SkipPublish): + + POP_COOP_PINVOKE_FRAME + bx lr + +LOCAL_LABEL(ArrayOutOfMemory): + + mov r0, r5 // EEType + mov r1, #0 // Indicate that we should throw OOM. + + POP_COOP_PINVOKE_FRAME + + b C_FUNC(RhExceptionHandling_FailedAllocation) + +NESTED_END RhpNewArrayRare, _TEXT + +// Allocate simple object (not finalizable, array or value type) on an 8 byte boundary. +// r0 == EEType +LEAF_ENTRY RhpNewFastAlign8, _TEXT + PROLOG_PUSH "{r4,lr}" + + mov r4, r0 // save EEType + + // r0 = GetThread() + INLINE_GETTHREAD + + // Fetch object size into r2. + ldr r2, [r4, #OFFSETOF__EEType__m_uBaseSize] + + // r4: EEType pointer + // r0: Thread pointer + // r2: base size + + // Load potential new object address into r3. Cache this result in r12 as well for the common case + // where the allocation succeeds (r3 will be overwritten in the following bounds check). + ldr r3, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] + mov r12, r3 + + // Check whether the current allocation context is already aligned for us. + tst r3, #0x7 + bne LOCAL_LABEL(Alloc8Failed) + + // Determine whether the end of the object would lie outside of the current allocation context. If so, + // we abandon the attempt to allocate the object directly and fall back to the slow helper. + add r2, r3 + ldr r3, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_limit] + cmp r2, r3 + bhi LOCAL_LABEL(Alloc8Failed) + + // Update the alloc pointer to account for the allocation. + str r2, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] + + // Set the new object's EEType pointer. + str r4, [r12, #OFFSETOF__Object__m_pEEType] + + // Return the object allocated in r0. + mov r0, r12 + + EPILOG_POP "{r4,pc}" + +LOCAL_LABEL(Alloc8Failed): + // Fast allocation failed. Call slow helper with flags set to indicate an 8-byte alignment and no + // finalization. + mov r0, r4 // restore EEType + mov r1, #GC_ALLOC_ALIGN8 + EPILOG_POP "{r4,lr}" + b C_FUNC(RhpNewObject) + +LEAF_END RhpNewFastAlign8, _TEXT + +// Allocate a finalizable object (by definition not an array or value type) on an 8 byte boundary. +// r0 == EEType +LEAF_ENTRY RhpNewFinalizableAlign8, _TEXT + mov r1, #(GC_ALLOC_FINALIZE | GC_ALLOC_ALIGN8) + b C_FUNC(RhpNewObject) +LEAF_END RhpNewFinalizableAlign8, _TEXT + +// Allocate a value type object (i.e. box it) on an 8 byte boundary + 4 (so that the value type payload +// itself is 8 byte aligned). +// r0 == EEType +LEAF_ENTRY RhpNewFastMisalign, _TEXT + PROLOG_PUSH "{r4,lr}" + + mov r4, r0 // save EEType + + // r0 = GetThread() + INLINE_GETTHREAD + + // Fetch object size into r2. + ldr r2, [r4, #OFFSETOF__EEType__m_uBaseSize] + + // r4: EEType pointer + // r0: Thread pointer + // r2: base size + + // Load potential new object address into r3. Cache this result in r12 as well for the common case + // where the allocation succeeds (r3 will be overwritten in the following bounds check). + ldr r3, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] + mov r12, r3 + + // Check whether the current allocation context is already aligned for us (for boxing that means the + // address % 8 == 4, so the value type payload following the EEType* is actually 8-byte aligned). + tst r3, #0x7 + beq LOCAL_LABEL(BoxAlloc8Failed) + + // Determine whether the end of the object would lie outside of the current allocation context. If so, + // we abandon the attempt to allocate the object directly and fall back to the slow helper. + add r2, r3 + ldr r3, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_limit] + cmp r2, r3 + bhi LOCAL_LABEL(BoxAlloc8Failed) + + // Update the alloc pointer to account for the allocation. + str r2, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] + + // Set the new object's EEType pointer. + str r4, [r12, #OFFSETOF__Object__m_pEEType] + + // Return the object allocated in r0. + mov r0, r12 + + EPILOG_POP "{r4,pc}" + +LOCAL_LABEL(BoxAlloc8Failed): + // Fast allocation failed. Call slow helper with flags set to indicate an 8+4 byte alignment and no + // finalization. + mov r0, r4 // restore EEType + mov r1, #(GC_ALLOC_ALIGN8 | GC_ALLOC_ALIGN8_BIAS) + EPILOG_POP "{r4,lr}" + b C_FUNC(RhpNewObject) + +LEAF_END RhpNewFastMisalign, _TEXT + +// Allocate an array on an 8 byte boundary. +// r0 == EEType +// r1 == element count +NESTED_ENTRY RhpNewArrayAlign8, _TEXT, NoHandler + + PUSH_COOP_PINVOKE_FRAME r3 + + // Compute overall allocation size (base size + align((element size * elements), 4)). + ldrh r2, [r0, #OFFSETOF__EEType__m_usComponentSize] + umull r2, r4, r2, r1 + cbnz r4, LOCAL_LABEL(Array8SizeOverflow) + adds r2, #3 + bcs LOCAL_LABEL(Array8SizeOverflow) + bic r2, r2, #3 + ldr r4, [r0, #OFFSETOF__EEType__m_uBaseSize] + adds r2, r4 + bcs LOCAL_LABEL(Array8SizeOverflow) + + // Preserve the EEType in r5 and element count in r6. + mov r5, r0 + mov r6, r1 + mov r7, r2 // Save array size in r7 + + mov r1, #GC_ALLOC_ALIGN8 // uFlags + + // void* RhpGcAlloc(EEType *pEEType, UInt32 uFlags, UIntNative cbSize, void * pTransitionFrame) + blx C_FUNC(RhpGcAlloc) + + // Test for failure (NULL return). + cbz r0, LOCAL_LABEL(Array8OutOfMemory) + + // Success, set the array's type and element count in the new object. + str r5, [r0, #OFFSETOF__Object__m_pEEType] + str r6, [r0, #OFFSETOF__Array__m_Length] + + // If the object is bigger than RH_LARGE_OBJECT_SIZE, we must publish it to the BGC + movw r2, #(RH_LARGE_OBJECT_SIZE & 0xFFFF) + movt r2, #(RH_LARGE_OBJECT_SIZE >> 16) + cmp r7, r2 + blo LOCAL_LABEL(NewArray8_SkipPublish) + + // r0: already contains object + mov r1, r7 // r1: object size + bl C_FUNC(RhpPublishObject) + // r0: function returned the passed-in object +LOCAL_LABEL(NewArray8_SkipPublish): + + POP_COOP_PINVOKE_FRAME + + bx lr + +LOCAL_LABEL(Array8SizeOverflow): + // We get here if the size of the final array object can't be represented as an unsigned + // 32-bit value. We're going to tail-call to a managed helper that will throw + // an OOM or overflow exception that the caller of this allocator understands. + + // if the element count is non-negative, it's an OOM error + cmp r1, #0 + bge LOCAL_LABEL(Array8OutOfMemory1) + + // r0 holds EEType pointer already + mov r1, #1 // Indicate that we should throw OverflowException + + POP_COOP_PINVOKE_FRAME + b C_FUNC(RhExceptionHandling_FailedAllocation) + +LOCAL_LABEL(Array8OutOfMemory): + // This is the OOM failure path. We're going to tail-call to a managed helper that will throw + // an out of memory exception that the caller of this allocator understands. + + mov r0, r5 // EEType pointer + +LOCAL_LABEL(Array8OutOfMemory1): + + mov r1, #0 // Indicate that we should throw OOM. + + POP_COOP_PINVOKE_FRAME + b C_FUNC(RhExceptionHandling_FailedAllocation) + +NESTED_END RhpNewArrayAlign8, _TEXT diff --git a/src/coreclr/src/nativeaot/Runtime/arm/AllocFast.asm b/src/coreclr/src/nativeaot/Runtime/arm/AllocFast.asm new file mode 100644 index 0000000000000..d459ef8fa6d8d --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/arm/AllocFast.asm @@ -0,0 +1,578 @@ +;; Licensed to the .NET Foundation under one or more agreements. +;; The .NET Foundation licenses this file to you under the MIT license. + +#include "AsmMacros.h" + + TEXTAREA + +;; Allocate non-array, non-finalizable object. If the allocation doesn't fit into the current thread's +;; allocation context then automatically fallback to the slow allocation path. +;; r0 == EEType + LEAF_ENTRY RhpNewFast + + ;; r1 = GetThread(), TRASHES r2 + INLINE_GETTHREAD r1, r2 + + ;; Fetch object size into r2. + ldr r2, [r0, #OFFSETOF__EEType__m_uBaseSize] + + ;; + ;; r0: EEType pointer + ;; r1: Thread pointer + ;; r2: base size + ;; + + ;; Load potential new object address into r3. Cache this result in r12 as well for the common case + ;; where the allocation succeeds (r3 will be overwritten in the following bounds check). + ldr r3, [r1, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] + mov r12, r3 + + ;; Determine whether the end of the object would lie outside of the current allocation context. If so, + ;; we abandon the attempt to allocate the object directly and fall back to the slow helper. + add r2, r3 + ldr r3, [r1, #OFFSETOF__Thread__m_alloc_context__alloc_limit] + cmp r2, r3 + bhi AllocFailed + + ;; Update the alloc pointer to account for the allocation. + str r2, [r1, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] + + ;; Set the new object's EEType pointer. + str r0, [r12, #OFFSETOF__Object__m_pEEType] + + ;; Return the object allocated in r0. + mov r0, r12 + + bx lr + +AllocFailed + ;; Fast allocation failed. Call slow helper with flags set to zero (this isn't a finalizable object). + mov r1, #0 + b RhpNewObject + + LEAF_END RhpNewFast + + INLINE_GETTHREAD_CONSTANT_POOL + + +;; Allocate non-array object with finalizer. +;; r0 == EEType + LEAF_ENTRY RhpNewFinalizable + mov r1, #GC_ALLOC_FINALIZE + b RhpNewObject + LEAF_END RhpNewFinalizable + +;; Allocate non-array object. +;; r0 == EEType +;; r1 == alloc flags + NESTED_ENTRY RhpNewObject + + PUSH_COOP_PINVOKE_FRAME r3 + + ; r0: EEType + ; r1: alloc flags + ; r3: transition frame + + ;; Preserve the EEType in r5. + mov r5, r0 + + ldr r2, [r0, #OFFSETOF__EEType__m_uBaseSize] ; cbSize + + ;; void* RhpGcAlloc(EEType *pEEType, UInt32 uFlags, UIntNative cbSize, void * pTransitionFrame) + blx RhpGcAlloc + + ;; Set the new object's EEType pointer on success. + cbz r0, NewOutOfMemory + str r5, [r0, #OFFSETOF__Object__m_pEEType] + + ;; If the object is bigger than RH_LARGE_OBJECT_SIZE, we must publish it to the BGC + ldr r1, [r5, #OFFSETOF__EEType__m_uBaseSize] + movw r2, #(RH_LARGE_OBJECT_SIZE & 0xFFFF) + movt r2, #(RH_LARGE_OBJECT_SIZE >> 16) + cmp r1, r2 + blo New_SkipPublish + ;; r0: already contains object + ;; r1: already contains object size + bl RhpPublishObject + ;; r0: function returned the passed-in object +New_SkipPublish + POP_COOP_PINVOKE_FRAME + EPILOG_RETURN + +NewOutOfMemory + ;; This is the OOM failure path. We're going to tail-call to a managed helper that will throw + ;; an out of memory exception that the caller of this allocator understands. + + mov r0, r5 ; EEType pointer + mov r1, #0 ; Indicate that we should throw OOM. + + POP_COOP_PINVOKE_FRAME + EPILOG_BRANCH RhExceptionHandling_FailedAllocation + + NESTED_END RhpNewObject + + +;; Allocate a string. +;; r0 == EEType +;; r1 == element/character count + LEAF_ENTRY RhNewString + + ; Make sure computing the overall allocation size won't overflow + MOV32 r2, MAX_STRING_LENGTH + cmp r1, r2 + bhs StringSizeOverflow + + ; Compute overall allocation size (align(base size + (element size * elements), 4)). + mov r2, #(STRING_BASE_SIZE + 3) +#if STRING_COMPONENT_SIZE == 2 + add r2, r2, r1, lsl #1 ; r2 += characters * 2 +#else + NotImplementedComponentSize +#endif + bic r2, r2, #3 + + ; r0 == EEType + ; r1 == element count + ; r2 == string size + + INLINE_GETTHREAD r3, r12 + + ;; Load potential new object address into r12. + ldr r12, [r3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] + + ;; Determine whether the end of the object would lie outside of the current allocation context. If so, + ;; we abandon the attempt to allocate the object directly and fall back to the slow helper. + adds r2, r12 + bcs RhpNewArrayRare ; if we get a carry here, the array is too large to fit below 4 GB + ldr r12, [r3, #OFFSETOF__Thread__m_alloc_context__alloc_limit] + cmp r2, r12 + bhi RhpNewArrayRare + + ;; Reload new object address into r12. + ldr r12, [r3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] + + ;; Update the alloc pointer to account for the allocation. + str r2, [r3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] + + ;; Set the new object's EEType pointer and element count. + str r0, [r12, #OFFSETOF__Object__m_pEEType] + str r1, [r12, #OFFSETOF__String__m_Length] + + ;; Return the object allocated in r0. + mov r0, r12 + + bx lr + +StringSizeOverflow + ; We get here if the size of the final string object can't be represented as an unsigned + ; 32-bit value. We're going to tail-call to a managed helper that will throw + ; an OOM exception that the caller of this allocator understands. + + ; r0 holds EEType pointer already + mov r1, #0 ; Indicate that we should throw OOM. + b RhExceptionHandling_FailedAllocation + + LEAF_END RhNewString + + INLINE_GETTHREAD_CONSTANT_POOL + + +;; Allocate one dimensional, zero based array (SZARRAY). +;; r0 == EEType +;; r1 == element count + LEAF_ENTRY RhpNewArray + + ; Compute overall allocation size (align(base size + (element size * elements), 4)). + ; if the element count is <= 0x10000, no overflow is possible because the component + ; size is <= 0xffff (it's an unsigned 16-bit value) and thus the product is <= 0xffff0000 + ; and the base size for the worst case (32 dimensional MdArray) is less than 0xffff. + ldrh r2, [r0, #OFFSETOF__EEType__m_usComponentSize] + cmp r1, #0x10000 + bhi ArraySizeBig + umull r2, r3, r2, r1 + ldr r3, [r0, #OFFSETOF__EEType__m_uBaseSize] + adds r2, r3 + adds r2, #3 +ArrayAlignSize + bic r2, r2, #3 + + ; r0 == EEType + ; r1 == element count + ; r2 == array size + + INLINE_GETTHREAD r3, r12 + + ;; Load potential new object address into r12. + ldr r12, [r3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] + + ;; Determine whether the end of the object would lie outside of the current allocation context. If so, + ;; we abandon the attempt to allocate the object directly and fall back to the slow helper. + adds r2, r12 + bcs RhpNewArrayRare ; if we get a carry here, the array is too large to fit below 4 GB + ldr r12, [r3, #OFFSETOF__Thread__m_alloc_context__alloc_limit] + cmp r2, r12 + bhi RhpNewArrayRare + + ;; Reload new object address into r12. + ldr r12, [r3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] + + ;; Update the alloc pointer to account for the allocation. + str r2, [r3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] + + ;; Set the new object's EEType pointer and element count. + str r0, [r12, #OFFSETOF__Object__m_pEEType] + str r1, [r12, #OFFSETOF__Array__m_Length] + + ;; Return the object allocated in r0. + mov r0, r12 + + bx lr + +ArraySizeOverflow + ; We get here if the size of the final array object can't be represented as an unsigned + ; 32-bit value. We're going to tail-call to a managed helper that will throw + ; an overflow exception that the caller of this allocator understands. + + ; r0 holds EEType pointer already + mov r1, #1 ; Indicate that we should throw OverflowException + b RhExceptionHandling_FailedAllocation + +ArraySizeBig + ; if the element count is negative, it's an overflow error + cmp r1, #0 + blt ArraySizeOverflow + ; now we know the element count is in the signed int range [0..0x7fffffff] + ; overflow in computing the total size of the array size gives an out of memory exception, + ; NOT an overflow exception + ; we already have the component size in r2 + umull r2, r3, r2, r1 + cbnz r3, ArrayOutOfMemoryFinal + ldr r3, [r0, #OFFSETOF__EEType__m_uBaseSize] + adds r2, r3 + bcs ArrayOutOfMemoryFinal + adds r2, #3 + bcs ArrayOutOfMemoryFinal + b ArrayAlignSize + +ArrayOutOfMemoryFinal + ; r0 holds EEType pointer already + mov r1, #0 ; Indicate that we should throw OOM. + b RhExceptionHandling_FailedAllocation + + LEAF_END RhpNewArray + + INLINE_GETTHREAD_CONSTANT_POOL + + +;; Allocate one dimensional, zero based array (SZARRAY) using the slow path that calls a runtime helper. +;; r0 == EEType +;; r1 == element count +;; r2 == array size + Thread::m_alloc_context::alloc_ptr +;; r3 == Thread + NESTED_ENTRY RhpNewArrayRare + + ; Recover array size by subtracting the alloc_ptr from r2. + PROLOG_NOP ldr r12, [r3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] + PROLOG_NOP sub r2, r12 + + PUSH_COOP_PINVOKE_FRAME r3 + + ; Preserve the EEType in r5 and element count in r6. + mov r5, r0 + mov r6, r1 + + mov r7, r2 ; Save array size in r7 + + mov r1, #0 ; uFlags + + ; void* RhpGcAlloc(EEType *pEEType, UInt32 uFlags, UIntNative cbSize, void * pTransitionFrame) + blx RhpGcAlloc + + ; Test for failure (NULL return). + cbz r0, ArrayOutOfMemory + + ; Success, set the array's type and element count in the new object. + str r5, [r0, #OFFSETOF__Object__m_pEEType] + str r6, [r0, #OFFSETOF__Array__m_Length] + + ;; If the object is bigger than RH_LARGE_OBJECT_SIZE, we must publish it to the BGC + movw r2, #(RH_LARGE_OBJECT_SIZE & 0xFFFF) + movt r2, #(RH_LARGE_OBJECT_SIZE >> 16) + cmp r7, r2 + blo NewArray_SkipPublish + ;; r0: already contains object + mov r1, r7 ;; r1: object size + bl RhpPublishObject + ;; r0: function returned the passed-in object +NewArray_SkipPublish + + POP_COOP_PINVOKE_FRAME + EPILOG_RETURN + +ArrayOutOfMemory + ;; This is the OOM failure path. We're going to tail-call to a managed helper that will throw + ;; an out of memory exception that the caller of this allocator understands. + + mov r0, r5 ;; EEType pointer + mov r1, #0 ;; Indicate that we should throw OOM. + + POP_COOP_PINVOKE_FRAME + EPILOG_BRANCH RhExceptionHandling_FailedAllocation + + NESTED_END RhpNewArrayRare + +;; Allocate simple object (not finalizable, array or value type) on an 8 byte boundary. +;; r0 == EEType + LEAF_ENTRY RhpNewFastAlign8 + + ;; r1 = GetThread(), TRASHES r2 + INLINE_GETTHREAD r1, r2 + + ;; Fetch object size into r2. + ldr r2, [r0, #OFFSETOF__EEType__m_uBaseSize] + + ;; + ;; r0: EEType pointer + ;; r1: Thread pointer + ;; r2: base size + ;; + + ;; Load potential new object address into r3. Cache this result in r12 as well for the common case + ;; where the allocation succeeds (r3 will be overwritten in the following bounds check). + ldr r3, [r1, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] + mov r12, r3 + + ;; Check whether the current allocation context is already aligned for us. + tst r3, #0x7 + bne ContextMisaligned + + ;; Determine whether the end of the object would lie outside of the current allocation context. If so, + ;; we abandon the attempt to allocate the object directly and fall back to the slow helper. + add r2, r3 + ldr r3, [r1, #OFFSETOF__Thread__m_alloc_context__alloc_limit] + cmp r2, r3 + bhi Alloc8Failed + + ;; Update the alloc pointer to account for the allocation. + str r2, [r1, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] + + ;; Set the new object's EEType pointer. + str r0, [r12, #OFFSETOF__Object__m_pEEType] + + ;; Return the object allocated in r0. + mov r0, r12 + + bx lr + +ContextMisaligned + ;; Allocation context is currently misaligned. We attempt to fix this by allocating a minimum sized + ;; free object (which is sized such that it "flips" the alignment to a good value). + + ;; Determine whether the end of both objects would lie outside of the current allocation context. If + ;; so, we abandon the attempt to allocate the object directly and fall back to the slow helper. + add r2, r3 + add r2, #SIZEOF__MinObject + ldr r3, [r1, #OFFSETOF__Thread__m_alloc_context__alloc_limit] + cmp r2, r3 + bhi Alloc8Failed + + ;; Update the alloc pointer to account for the allocation. + str r2, [r1, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] + + ;; Set the free object's EEType pointer (it's the only field we need to set, a component count of zero + ;; is what we want). + ldr r2, =$G_FREE_OBJECT_EETYPE + ldr r2, [r2] + str r2, [r12, #OFFSETOF__Object__m_pEEType] + + ;; Set the new object's EEType pointer. + str r0, [r12, #(SIZEOF__MinObject + OFFSETOF__Object__m_pEEType)] + + ;; Return the object allocated in r0. + add r0, r12, #SIZEOF__MinObject + + bx lr + +Alloc8Failed + ;; Fast allocation failed. Call slow helper with flags set to indicate an 8-byte alignment and no + ;; finalization. + mov r1, #GC_ALLOC_ALIGN8 + b RhpNewObject + + LEAF_END RhpNewFastAlign8 + + INLINE_GETTHREAD_CONSTANT_POOL + + +;; Allocate a finalizable object (by definition not an array or value type) on an 8 byte boundary. +;; r0 == EEType + LEAF_ENTRY RhpNewFinalizableAlign8 + + mov r1, #(GC_ALLOC_FINALIZE | GC_ALLOC_ALIGN8) + b RhpNewObject + + LEAF_END RhpNewFinalizableAlign8 + +;; Allocate a value type object (i.e. box it) on an 8 byte boundary + 4 (so that the value type payload +;; itself is 8 byte aligned). +;; r0 == EEType + LEAF_ENTRY RhpNewFastMisalign + + ;; r1 = GetThread(), TRASHES r2 + INLINE_GETTHREAD r1, r2 + + ;; Fetch object size into r2. + ldr r2, [r0, #OFFSETOF__EEType__m_uBaseSize] + + ;; + ;; r0: EEType pointer + ;; r1: Thread pointer + ;; r2: base size + ;; + + ;; Load potential new object address into r3. Cache this result in r12 as well for the common case + ;; where the allocation succeeds (r3 will be overwritten in the following bounds check). + ldr r3, [r1, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] + mov r12, r3 + + ;; Check whether the current allocation context is already aligned for us (for boxing that means the + ;; address % 8 == 4, so the value type payload following the EEType* is actually 8-byte aligned). + tst r3, #0x7 + beq BoxContextMisaligned + + ;; Determine whether the end of the object would lie outside of the current allocation context. If so, + ;; we abandon the attempt to allocate the object directly and fall back to the slow helper. + add r2, r3 + ldr r3, [r1, #OFFSETOF__Thread__m_alloc_context__alloc_limit] + cmp r2, r3 + bhi BoxAlloc8Failed + + ;; Update the alloc pointer to account for the allocation. + str r2, [r1, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] + + ;; Set the new object's EEType pointer. + str r0, [r12, #OFFSETOF__Object__m_pEEType] + + ;; Return the object allocated in r0. + mov r0, r12 + + bx lr + +BoxContextMisaligned + ;; Allocation context is currently misaligned. We attempt to fix this by allocating a minimum sized + ;; free object (which is sized such that it "flips" the alignment to a good value). + + ;; Determine whether the end of both objects would lie outside of the current allocation context. If + ;; so, we abandon the attempt to allocate the object directly and fall back to the slow helper. + add r2, r3 + add r2, #SIZEOF__MinObject + ldr r3, [r1, #OFFSETOF__Thread__m_alloc_context__alloc_limit] + cmp r2, r3 + bhi BoxAlloc8Failed + + ;; Update the alloc pointer to account for the allocation. + str r2, [r1, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] + + ;; Set the free object's EEType pointer (it's the only field we need to set, a component count of zero + ;; is what we want). + ldr r2, =$G_FREE_OBJECT_EETYPE + ldr r2, [r2] + str r2, [r12, #OFFSETOF__Object__m_pEEType] + + ;; Set the new object's EEType pointer. + str r0, [r12, #(SIZEOF__MinObject + OFFSETOF__Object__m_pEEType)] + + ;; Return the object allocated in r0. + add r0, r12, #SIZEOF__MinObject + + bx lr + +BoxAlloc8Failed + ;; Fast allocation failed. Call slow helper with flags set to indicate an 8+4 byte alignment and no + ;; finalization. + mov r1, #(GC_ALLOC_ALIGN8 | GC_ALLOC_ALIGN8_BIAS) + b RhpNewObject + + LEAF_END RhpNewFastMisalign + + INLINE_GETTHREAD_CONSTANT_POOL + + +;; Allocate an array on an 8 byte boundary. +;; r0 == EEType +;; r1 == element count + NESTED_ENTRY RhpNewArrayAlign8 + + PUSH_COOP_PINVOKE_FRAME r3 + + ; Compute overall allocation size (base size + align((element size * elements), 4)). + ldrh r2, [r0, #OFFSETOF__EEType__m_usComponentSize] + umull r2, r4, r2, r1 + cbnz r4, Array8SizeOverflow + adds r2, #3 + bcs Array8SizeOverflow + bic r2, r2, #3 + ldr r4, [r0, #OFFSETOF__EEType__m_uBaseSize] + adds r2, r4 + bcs Array8SizeOverflow + + ; Preserve the EEType in r5 and element count in r6. + mov r5, r0 + mov r6, r1 + mov r7, r2 ; Save array size in r7 + + mov r1, #GC_ALLOC_ALIGN8 ; uFlags + + ; void* RhpGcAlloc(EEType *pEEType, UInt32 uFlags, UIntNative cbSize, void * pTransitionFrame) + blx RhpGcAlloc + + ; Test for failure (NULL return). + cbz r0, Array8OutOfMemory + + ; Success, set the array's type and element count in the new object. + str r5, [r0, #OFFSETOF__Object__m_pEEType] + str r6, [r0, #OFFSETOF__Array__m_Length] + + ;; If the object is bigger than RH_LARGE_OBJECT_SIZE, we must publish it to the BGC + movw r2, #(RH_LARGE_OBJECT_SIZE & 0xFFFF) + movt r2, #(RH_LARGE_OBJECT_SIZE >> 16) + cmp r7, r2 + blo NewArray8_SkipPublish + ;; r0: already contains object + mov r1, r7 ;; r1: object size + bl RhpPublishObject + ;; r0: function returned the passed-in object +NewArray8_SkipPublish + + POP_COOP_PINVOKE_FRAME + EPILOG_RETURN + +Array8SizeOverflow + ; We get here if the size of the final array object can't be represented as an unsigned + ; 32-bit value. We're going to tail-call to a managed helper that will throw + ; an OOM or overflow exception that the caller of this allocator understands. + + ; if the element count is non-negative, it's an OOM error + cmp r1, #0 + bge Array8OutOfMemory1 + + ; r0 holds EEType pointer already + mov r1, #1 ;; Indicate that we should throw OverflowException + + POP_COOP_PINVOKE_FRAME + EPILOG_BRANCH RhExceptionHandling_FailedAllocation + +Array8OutOfMemory + ; This is the OOM failure path. We're going to tail-call to a managed helper that will throw + ; an out of memory exception that the caller of this allocator understands. + + mov r0, r5 ;; EEType pointer +Array8OutOfMemory1 + mov r1, #0 ;; Indicate that we should throw OOM. + + POP_COOP_PINVOKE_FRAME + EPILOG_BRANCH RhExceptionHandling_FailedAllocation + + NESTED_END RhpNewArrayAlign8 + + END diff --git a/src/coreclr/src/nativeaot/Runtime/arm/AsmMacros.h b/src/coreclr/src/nativeaot/Runtime/arm/AsmMacros.h new file mode 100644 index 0000000000000..8c61fb571194c --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/arm/AsmMacros.h @@ -0,0 +1,288 @@ +;; Licensed to the.NET Foundation under one or more agreements. +;; The.NET Foundation licenses this file to you under the MIT license. + +;; OS provided macros +#include +;; generated by the build from AsmOffsets.cpp +#include "AsmOffsets.inc" + +;; +;; CONSTANTS -- INTEGER +;; +TSF_Attached equ 0x01 +TSF_SuppressGcStress equ 0x08 +TSF_DoNotTriggerGc equ 0x10 +TSF_SuppressGcStress__OR__TSF_DoNotTriggerGC equ 0x18 + +;; GC type flags +GC_ALLOC_FINALIZE equ 1 +GC_ALLOC_ALIGN8_BIAS equ 4 +GC_ALLOC_ALIGN8 equ 8 + +;; GC minimal sized object. We use this to switch between 4 and 8 byte alignment in the GC heap (see AllocFast.asm). +SIZEOF__MinObject equ 12 + ASSERT (SIZEOF__MinObject :MOD: 8) == 4 + +;; Note: these must match the defs in PInvokeTransitionFrameFlags +PTFF_SAVE_R4 equ 0x00000001 +PTFF_SAVE_R5 equ 0x00000002 +PTFF_SAVE_R6 equ 0x00000004 +PTFF_SAVE_R7 equ 0x00000008 +PTFF_SAVE_R8 equ 0x00000010 +PTFF_SAVE_R9 equ 0x00000020 +PTFF_SAVE_R10 equ 0x00000040 +PTFF_SAVE_ALL_PRESERVED equ 0x00000077 ;; NOTE: FP is not included in this set! +PTFF_SAVE_SP equ 0x00000100 +PTFF_SAVE_R0 equ 0x00000200 ;; R0 is saved if it contains a GC ref and we're in hijack handler +PTFF_SAVE_ALL_SCRATCH equ 0x00003e00 ;; R0-R3,LR (R12 is trashed by the helpers anyway, but LR is relevant for loop hijacking +PTFF_R0_IS_GCREF equ 0x00004000 ;; iff PTFF_SAVE_R0: set -> r0 is Object, clear -> r0 is scalar +PTFF_R0_IS_BYREF equ 0x00008000 ;; iff PTFF_SAVE_R0: set -> r0 is ByRef, clear -> r0 is Object or scalar +PTFF_THREAD_ABORT equ 0x00010000 ;; indicates that ThreadAbortException should be thrown when returning from the transition + +;; These must match the TrapThreadsFlags enum +TrapThreadsFlags_None equ 0 +TrapThreadsFlags_AbortInProgress equ 1 +TrapThreadsFlags_TrapThreads equ 2 + +;; This must match HwExceptionCode.STATUS_REDHAWK_THREAD_ABORT +STATUS_REDHAWK_THREAD_ABORT equ 0x43 + +;; +;; Rename fields of nested structs +;; +OFFSETOF__Thread__m_alloc_context__alloc_ptr equ OFFSETOF__Thread__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr +OFFSETOF__Thread__m_alloc_context__alloc_limit equ OFFSETOF__Thread__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_limit + + +__tls_array equ 0x2C ;; offsetof(TEB, ThreadLocalStoragePointer) + +;; +;; MACROS +;; + + GBLS __SECTIONREL_tls_CurrentThread +__SECTIONREL_tls_CurrentThread SETS "SECTIONREL_tls_CurrentThread" + + MACRO + INLINE_GETTHREAD $destReg, $trashReg + EXTERN _tls_index + + ldr $destReg, =_tls_index + ldr $destReg, [$destReg] + mrc p15, 0, $trashReg, c13, c0, 2 + ldr $trashReg, [$trashReg, #__tls_array] + ldr $destReg, [$trashReg, $destReg, lsl #2] + ldr $trashReg, $__SECTIONREL_tls_CurrentThread + add $destReg, $trashReg + MEND + + ;; INLINE_GETTHREAD_CONSTANT_POOL macro has to be used after the last function in the .asm file that used + ;; INLINE_GETTHREAD. Optionally, it can be also used after any function that used INLINE_GETTHREAD + ;; to improve density, or to reduce distance betweeen the constant pool and its use. + MACRO + INLINE_GETTHREAD_CONSTANT_POOL + EXTERN tls_CurrentThread + +$__SECTIONREL_tls_CurrentThread + DCD tls_CurrentThread + RELOC 15 ;; SECREL + +__SECTIONREL_tls_CurrentThread SETS "$__SECTIONREL_tls_CurrentThread":CC:"_" + + MEND + + MACRO + INLINE_THREAD_UNHIJACK $threadReg, $trashReg1, $trashReg2 + ;; + ;; Thread::Unhijack() + ;; + ldr $trashReg1, [$threadReg, #OFFSETOF__Thread__m_pvHijackedReturnAddress] + cbz $trashReg1, %ft0 + + ldr $trashReg2, [$threadReg, #OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation] + str $trashReg1, [$trashReg2] + mov $trashReg1, #0 + str $trashReg1, [$threadReg, #OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation] + str $trashReg1, [$threadReg, #OFFSETOF__Thread__m_pvHijackedReturnAddress] +0 + MEND + +DEFAULT_FRAME_SAVE_FLAGS equ PTFF_SAVE_ALL_PRESERVED + PTFF_SAVE_SP + +;; +;; Macro used from unmanaged helpers called from managed code where the helper does not transition immediately +;; into pre-emptive mode but may cause a GC and thus requires the stack is crawlable. This is typically the +;; case for helpers that meddle in GC state (e.g. allocation helpers) where the code must remain in +;; cooperative mode since it handles object references and internal GC state directly but a garbage collection +;; may be inevitable. In these cases we need to be able to transition to pre-meptive mode deep within the +;; unmanaged code but still be able to initialize the stack iterator at the first stack frame which may hold +;; interesting GC references. In all our helper cases this corresponds to the most recent managed frame (e.g. +;; the helper's caller). +;; +;; This macro builds a frame describing the current state of managed code. +;; +;; INVARIANTS +;; - The macro assumes it defines the method prolog, it should typically be the first code in a method and +;; certainly appear before any attempt to alter the stack pointer. +;; - This macro uses trashReg (after its initial value has been saved in the frame) and upon exit trashReg +;; will contain the address of transition frame. +;; + MACRO + PUSH_COOP_PINVOKE_FRAME $trashReg + + PROLOG_STACK_ALLOC 4 ; Save space for caller's SP + PROLOG_PUSH {r4-r6,r8-r10} ; Save preserved registers + PROLOG_STACK_ALLOC 8 ; Save space for flags and Thread* + PROLOG_PUSH {r7} ; Save caller's FP + PROLOG_PUSH {r11,lr} ; Save caller's frame-chain pointer and PC + + ; Compute SP value at entry to this method and save it in the last slot of the frame (slot #11). + add $trashReg, sp, #(12 * 4) + str $trashReg, [sp, #(11 * 4)] + + ; Record the bitmask of saved registers in the frame (slot #4). + mov $trashReg, #DEFAULT_FRAME_SAVE_FLAGS + str $trashReg, [sp, #(4 * 4)] + + mov $trashReg, sp + MEND + +;; Pop the frame and restore register state preserved by PUSH_COOP_PINVOKE_FRAME + MACRO + POP_COOP_PINVOKE_FRAME + EPILOG_POP {r11,lr} ; Restore caller's frame-chain pointer and PC (return address) + EPILOG_POP {r7} ; Restore caller's FP + EPILOG_STACK_FREE 8 ; Discard flags and Thread* + EPILOG_POP {r4-r6,r8-r10} ; Restore preserved registers + EPILOG_STACK_FREE 4 ; Discard caller's SP + MEND + + +; Macro used to assign an alternate name to a symbol containing characters normally disallowed in a symbol +; name (e.g. C++ decorated names). + MACRO + SETALIAS $name, $symbol + GBLS $name +$name SETS "|$symbol|" + MEND + + + ; + ; Helper macro: create a global label for the given name, + ; decorate it, and export it for external consumption. + ; + + MACRO + __ExportLabelName $FuncName + + LCLS Name +Name SETS "|$FuncName|" + EXPORT $Name +$Name + MEND + + ; + ; Macro for indicating an alternate entry point into a function. + ; + + MACRO + LABELED_RETURN_ADDRESS $ReturnAddressName + + ; export the return address name, but do not perturb the code by forcing alignment + __ExportLabelName $ReturnAddressName + + ; flush any pending literal pool stuff + ROUT + + MEND + + MACRO + EXPORT_POINTER_TO_ADDRESS $Name + +1 + + AREA |.rdata|, ALIGN=4, DATA, READONLY + +$Name + + DCD %BT1 + + EXPORT $Name + + TEXTAREA + + ROUT + + MEND + +;----------------------------------------------------------------------------- +; Macro used to check (in debug builds only) whether the stack is 64-bit aligned (a requirement before calling +; out into C++/OS code). Invoke this directly after your prolog (if the stack frame size is fixed) or directly +; before a call (if you have a frame pointer and a dynamic stack). A breakpoint will be invoked if the stack +; is misaligned. +; + MACRO + CHECK_STACK_ALIGNMENT + +#ifdef _DEBUG + push {r0} + add r0, sp, #4 + tst r0, #7 + pop {r0} + beq %F0 + EMIT_BREAKPOINT +0 +#endif + MEND + +;; Loads a 32bit constant into destination register + MACRO + MOV32 $destReg, $constant + + movw $destReg, #(($constant) & 0xFFFF) + movt $destReg, #(($constant) >> 16) + MEND + +;; +;; CONSTANTS -- SYMBOLS +;; + + SETALIAS G_LOWEST_ADDRESS, g_lowest_address + SETALIAS G_HIGHEST_ADDRESS, g_highest_address + SETALIAS G_EPHEMERAL_LOW, g_ephemeral_low + SETALIAS G_EPHEMERAL_HIGH, g_ephemeral_high + SETALIAS G_CARD_TABLE, g_card_table + SETALIAS G_FREE_OBJECT_EETYPE, ?g_pFreeObjectEEType@@3PAVEEType@@A +#ifdef FEATURE_GC_STRESS + SETALIAS THREAD__HIJACKFORGCSTRESS, ?HijackForGcStress@Thread@@SAXPAUPAL_LIMITED_CONTEXT@@@Z + SETALIAS REDHAWKGCINTERFACE__STRESSGC, ?StressGc@RedhawkGCInterface@@SAXXZ +#endif ;; FEATURE_GC_STRESS +;; +;; IMPORTS +;; + EXTERN RhpGcAlloc + EXTERN RhDebugBreak + EXTERN RhpWaitForSuspend2 + EXTERN RhpWaitForGC2 + EXTERN RhpReversePInvokeAttachOrTrapThread2 + EXTERN RhExceptionHandling_FailedAllocation + EXTERN RhpPublishObject + EXTERN RhpCalculateStackTraceWorker + + + EXTERN $G_LOWEST_ADDRESS + EXTERN $G_HIGHEST_ADDRESS + EXTERN $G_EPHEMERAL_LOW + EXTERN $G_EPHEMERAL_HIGH + EXTERN $G_CARD_TABLE + EXTERN RhpTrapThreads + EXTERN $G_FREE_OBJECT_EETYPE + + EXTERN RhThrowHwEx + EXTERN RhThrowEx + EXTERN RhRethrow + +#ifdef FEATURE_GC_STRESS + EXTERN $REDHAWKGCINTERFACE__STRESSGC + EXTERN $THREAD__HIJACKFORGCSTRESS +#endif ;; FEATURE_GC_STRESS diff --git a/src/coreclr/src/nativeaot/Runtime/arm/AsmOffsetsCpu.h b/src/coreclr/src/nativeaot/Runtime/arm/AsmOffsetsCpu.h new file mode 100644 index 0000000000000..a8b3b9465a9f7 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/arm/AsmOffsetsCpu.h @@ -0,0 +1,58 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// +// This file is used by AsmOffsets.h to validate that our +// assembly-code offsets always match their C++ counterparts. +// +// NOTE: the offsets MUST be in hex notation WITHOUT the 0x prefix + +PLAT_ASM_SIZEOF(138, ExInfo) +PLAT_ASM_OFFSET(0, ExInfo, m_pPrevExInfo) +PLAT_ASM_OFFSET(4, ExInfo, m_pExContext) +PLAT_ASM_OFFSET(8, ExInfo, m_exception) +PLAT_ASM_OFFSET(0c, ExInfo, m_kind) +PLAT_ASM_OFFSET(0d, ExInfo, m_passNumber) +PLAT_ASM_OFFSET(10, ExInfo, m_idxCurClause) +PLAT_ASM_OFFSET(18, ExInfo, m_frameIter) +PLAT_ASM_OFFSET(130, ExInfo, m_notifyDebuggerSP) + +PLAT_ASM_OFFSET(4, PInvokeTransitionFrame, m_RIP) +PLAT_ASM_OFFSET(8, PInvokeTransitionFrame, m_FramePointer) +PLAT_ASM_OFFSET(0c, PInvokeTransitionFrame, m_pThread) +PLAT_ASM_OFFSET(10, PInvokeTransitionFrame, m_Flags) +PLAT_ASM_OFFSET(14, PInvokeTransitionFrame, m_PreservedRegs) + +PLAT_ASM_SIZEOF(118, StackFrameIterator) +PLAT_ASM_OFFSET(08, StackFrameIterator, m_FramePointer) +PLAT_ASM_OFFSET(0c, StackFrameIterator, m_ControlPC) +PLAT_ASM_OFFSET(10, StackFrameIterator, m_RegDisplay) +PLAT_ASM_OFFSET(114, StackFrameIterator, m_OriginalControlPC) + +PLAT_ASM_SIZEOF(70, PAL_LIMITED_CONTEXT) +PLAT_ASM_OFFSET(24, PAL_LIMITED_CONTEXT, IP) + +PLAT_ASM_OFFSET(0, PAL_LIMITED_CONTEXT, R0) +PLAT_ASM_OFFSET(4, PAL_LIMITED_CONTEXT, R4) +PLAT_ASM_OFFSET(8, PAL_LIMITED_CONTEXT, R5) +PLAT_ASM_OFFSET(0c, PAL_LIMITED_CONTEXT, R6) +PLAT_ASM_OFFSET(10, PAL_LIMITED_CONTEXT, R7) +PLAT_ASM_OFFSET(14, PAL_LIMITED_CONTEXT, R8) +PLAT_ASM_OFFSET(18, PAL_LIMITED_CONTEXT, R9) +PLAT_ASM_OFFSET(1c, PAL_LIMITED_CONTEXT, R10) +PLAT_ASM_OFFSET(20, PAL_LIMITED_CONTEXT, R11) +PLAT_ASM_OFFSET(28, PAL_LIMITED_CONTEXT, SP) +PLAT_ASM_OFFSET(2c, PAL_LIMITED_CONTEXT, LR) + +PLAT_ASM_SIZEOF(88, REGDISPLAY) +PLAT_ASM_OFFSET(38, REGDISPLAY, SP) + +PLAT_ASM_OFFSET(10, REGDISPLAY, pR4) +PLAT_ASM_OFFSET(14, REGDISPLAY, pR5) +PLAT_ASM_OFFSET(18, REGDISPLAY, pR6) +PLAT_ASM_OFFSET(1c, REGDISPLAY, pR7) +PLAT_ASM_OFFSET(20, REGDISPLAY, pR8) +PLAT_ASM_OFFSET(24, REGDISPLAY, pR9) +PLAT_ASM_OFFSET(28, REGDISPLAY, pR10) +PLAT_ASM_OFFSET(2c, REGDISPLAY, pR11) +PLAT_ASM_OFFSET(48, REGDISPLAY, D) diff --git a/src/coreclr/src/nativeaot/Runtime/arm/CallDescrWorker.S b/src/coreclr/src/nativeaot/Runtime/arm/CallDescrWorker.S new file mode 100644 index 0000000000000..53184d9b28b2a --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/arm/CallDescrWorker.S @@ -0,0 +1,20 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +.syntax unified +.thumb + +#include // generated by the build from AsmOffsets.cpp +#include + +// TODO: Implement Arm support + +NESTED_ENTRY RhCallDescrWorker, _TEXT, NoHandler +LOCAL_LABEL(ReturnFromCallDescrThunk): + + EXPORT_POINTER_TO_ADDRESS PointerToReturnFromCallDescrThunk + + // UNIXTODO: Implement this function + EMIT_BREAKPOINT +NESTED_END RhCallDescrWorker, _TEXT + diff --git a/src/coreclr/src/nativeaot/Runtime/arm/CallDescrWorker.asm b/src/coreclr/src/nativeaot/Runtime/arm/CallDescrWorker.asm new file mode 100644 index 0000000000000..55a1be033bbc4 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/arm/CallDescrWorker.asm @@ -0,0 +1,128 @@ +;; Licensed to the .NET Foundation under one or more agreements. +;; The .NET Foundation licenses this file to you under the MIT license. + +#include "AsmMacros.h" + + TEXTAREA + +;;----------------------------------------------------------------------------- +;; This helper routine enregisters the appropriate arguments and makes the +;; actual call. +;;----------------------------------------------------------------------------- +;;void RhCallDescrWorker(CallDescrData * pCallDescrData); + NESTED_ENTRY RhCallDescrWorker + PROLOG_PUSH {r4,r5,r7,lr} + PROLOG_STACK_SAVE r7 + + mov r5,r0 ; save pCallDescrData in r5 + + ldr r1, [r5,#OFFSETOF__CallDescrData__numStackSlots] + cbz r1, Ldonestack + + ;; Add frame padding to ensure frame size is a multiple of 8 (a requirement of the OS ABI). + ;; We push four registers (above) and numStackSlots arguments (below). If this comes to an odd number + ;; of slots we must pad with another. This simplifies to "if the low bit of numStackSlots is set, + ;; extend the stack another four bytes". + lsls r2, r1, #2 + and r3, r2, #4 + sub sp, sp, r3 + + ;; This loop copies numStackSlots words + ;; from [pSrcEnd-4,pSrcEnd-8,...] to [sp-4,sp-8,...] + ldr r0, [r5,#OFFSETOF__CallDescrData__pSrc] + add r0,r0,r2 +Lstackloop + ldr r2, [r0,#-4]! + str r2, [sp,#-4]! + subs r1, r1, #1 + bne Lstackloop +Ldonestack + + ;; If FP arguments are supplied in registers (r3 != NULL) then initialize all of them from the pointer + ;; given in r3. Do not use "it" since it faults in floating point even when the instruction is not executed. + ldr r3, [r5,#OFFSETOF__CallDescrData__pFloatArgumentRegisters] + cbz r3, LNoFloatingPoint + vldm r3, {s0-s15} +LNoFloatingPoint + + ;; Copy [pArgumentRegisters, ..., pArgumentRegisters + 12] + ;; into r0, ..., r3 + + ldr r4, [r5,#OFFSETOF__CallDescrData__pArgumentRegisters] + ldm r4, {r0-r3} + + CHECK_STACK_ALIGNMENT + + ;; call pTarget + ;; Note that remoting expect target in r4. + ldr r4, [r5,#OFFSETOF__CallDescrData__pTarget] + blx r4 + + EXPORT_POINTER_TO_ADDRESS PointerToReturnFromCallDescrThunk + + ;; Symbol used to identify thunk call to managed function so the special + ;; case unwinder can unwind through this function. Sadly we cannot directly + ;; export this symbol right now because it confuses DIA unwinder to believe + ;; it's the beginning of a new method, therefore we export the address + ;; of an auxiliary variable holding the address instead. + + ldr r3, [r5,#OFFSETOF__CallDescrData__fpReturnSize] + + ;; Save FP return value if appropriate + cbz r3, LFloatingPointReturnDone + + ;; Float return case + ;; Do not use "it" since it faults in floating point even when the instruction is not executed. + cmp r3, #4 + bne LNoFloatReturn + vmov r0, s0 + b LFloatingPointReturnDone +LNoFloatReturn + + ;; Double return case + ;; Do not use "it" since it faults in floating point even when the instruction is not executed. + cmp r3, #8 + bne LNoDoubleReturn + vmov r0, r1, s0, s1 + b LFloatingPointReturnDone +LNoDoubleReturn +; Unlike desktop returnValue is a pointer to a return buffer, not the buffer itself + ldr r2, [r5, #OFFSETOF__CallDescrData__pReturnBuffer] + + cmp r3, #16 + bne LNoFloatHFAReturn + vstm r2, {s0-s3} + b LReturnDone +LNoFloatHFAReturn + + cmp r3, #32 + bne LNoDoubleHFAReturn + vstm r2, {d0-d3} + b LReturnDone +LNoDoubleHFAReturn + + EMIT_BREAKPOINT ; Unreachable + +LFloatingPointReturnDone + +; Unlike desktop returnValue is a pointer to a return buffer, not the buffer itself + ldr r5, [r5, #OFFSETOF__CallDescrData__pReturnBuffer] + + ;; Save return value into retbuf + str r0, [r5, #(0)] + str r1, [r5, #(4)] + +LReturnDone + +#ifdef _DEBUG + ;; trash the floating point registers to ensure that the HFA return values + ;; won't survive by accident + vldm sp, {d0-d3} +#endif + + EPILOG_STACK_RESTORE r7 + EPILOG_POP {r4,r5,r7,pc} + + NESTED_END RhCallDescrWorker + + END diff --git a/src/coreclr/src/nativeaot/Runtime/arm/CallingConventionConverterHelpers.S b/src/coreclr/src/nativeaot/Runtime/arm/CallingConventionConverterHelpers.S new file mode 100644 index 0000000000000..3e216602b0c6b --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/arm/CallingConventionConverterHelpers.S @@ -0,0 +1,65 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// TODO: Implement Arm support +.syntax unified +.thumb + +#include // generated by the build from AsmOffsets.cpp +#include + +// +// void CallingConventionConverter_ReturnVoidReturnThunk() +// +LEAF_ENTRY CallingConventionConverter_ReturnVoidReturnThunk, _TEXT + bx lr +LEAF_END CallingConventionConverter_ReturnVoidReturnThunk, _TEXT + +// +// int CallingConventionConverter_ReturnIntegerReturnThunk(int) +// +LEAF_ENTRY CallingConventionConverter_ReturnIntegerReturnThunk, _TEXT + // UNIXTODO: Implement this function + EMIT_BREAKPOINT +LEAF_END CallingConventionConverter_ReturnIntegerReturnThunk, _TEXT + +// +// __jmpstub__CallingConventionConverter_CommonCallingStub +// +// struct CallingConventionConverter_CommonCallingStub_PointerData +// { +// void *ManagedCallConverterThunk; +// void *UniversalThunk; +// } +// +// struct CommonCallingStubInputData +// { +// ULONG_PTR CallingConventionId; +// CallingConventionConverter_CommonCallingStub_PointerData *commonData; // Only the ManagedCallConverterThunk field is used +// // However, it is specified just like other platforms, so the behavior of the common +// // calling stub is easier to debug +// } +// +// sp-4 - Points at CommonCallingStubInputData +// +// +LEAF_ENTRY __jmpstub__CallingConventionConverter_CommonCallingStub + // UNIXTODO: Implement this function + EMIT_BREAKPOINT +LEAF_END __jmpstub__CallingConventionConverter_CommonCallingStub + +// +// void CallingConventionConverter_GetStubs(IntPtr *returnVoidStub, IntPtr *returnIntegerStub, IntPtr *commonCallingStub) +// +LEAF_ENTRY CallingConventionConverter_GetStubs, _TEXT + // UNIXTODO: Implement this function + EMIT_BREAKPOINT +LEAF_END CallingConventionConverter_GetStubs, _TEXT + +// +// void CallingConventionConverter_SpecifyCommonStubData(CallingConventionConverter_CommonCallingStub_PointerData *commonData); +// +LEAF_ENTRY CallingConventionConverter_SpecifyCommonStubData + // UNIXTODO: Implement this function + EMIT_BREAKPOINT +LEAF_END CallingConventionConverter_SpecifyCommonStubData diff --git a/src/coreclr/src/nativeaot/Runtime/arm/CallingConventionConverterHelpers.asm b/src/coreclr/src/nativeaot/Runtime/arm/CallingConventionConverterHelpers.asm new file mode 100644 index 0000000000000..0383182caf567 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/arm/CallingConventionConverterHelpers.asm @@ -0,0 +1,88 @@ +;; Licensed to the .NET Foundation under one or more agreements. +;; The .NET Foundation licenses this file to you under the MIT license. + +#include "kxarm.h" + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; DATA SECTIONS ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + DATAAREA +UniversalThunkPointer % 4 + TEXTAREA + +OFFSETOF_CallingConventionId EQU 0 +OFFSETOF_commonData EQU 4 +OFFSETOF_ManagedCallConverterThunk EQU 0 +OFFSETOF_UniversalThunk EQU 4 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; CallingConventionCoverter Helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; +;; Note: The "__jmpstub__" prefix is used to indicate to debugger +;; that it must step-through this stub when it encounters it while +;; stepping. +;; + + ;; + ;; void CallingConventionConverter_ReturnThunk() + ;; + LEAF_ENTRY CallingConventionConverter_ReturnThunk + bx lr + LEAF_END CallingConventionConverter_ReturnThunk + + ;; + ;; __jmpstub__CallingConventionConverter_CommonCallingStub + ;; + ;; struct CallingConventionConverter_CommonCallingStub_PointerData + ;; { + ;; void *ManagedCallConverterThunk; + ;; void *UniversalThunk; + ;; } + ;; + ;; struct CommonCallingStubInputData + ;; { + ;; ULONG_PTR CallingConventionId; + ;; CallingConventionConverter_CommonCallingStub_PointerData *commonData; // Only the ManagedCallConverterThunk field is used + ;; // However, it is specified just like other platforms, so the behavior of the common + ;; // calling stub is easier to debug + ;; } + ;; + ;; sp-4 - Points at CommonCallingStubInputData + ;; + ;; + LEAF_ENTRY __jmpstub__CallingConventionConverter_CommonCallingStub + ldr r12, [sp, #-4] + ldr r12, [r12, #OFFSETOF_CallingConventionId] ; Get CallingConventionId into r12 + str r12, [sp, #-8] ; Put calling convention id into red zone + ldr r12, [sp, #-4] + ldr r12, [r12, #OFFSETOF_commonData] ; Get pointer to common data + ldr r12, [r12, #OFFSETOF_ManagedCallConverterThunk] ; Get pointer to managed call converter thunk + str r12, [sp, #-4] ; Put managed calling convention thunk pointer into red zone (overwrites pointer to CommonCallingStubInputData) + ldr r12, =UniversalThunkPointer + ldr r12, [r12] + bx r12 + LEAF_END __jmpstub__CallingConventionConverter_CommonCallingStub + + ;; + ;; void CallingConventionConverter_SpecifyCommonStubData(CallingConventionConverter_CommonCallingStub_PointerData *commonData); + ;; + LEAF_ENTRY CallingConventionConverter_SpecifyCommonStubData + ldr r1, [r0, #OFFSETOF_ManagedCallConverterThunk] ; Load ManagedCallConverterThunk into r1 {r1 = (CallingConventionConverter_CommonCallingStub_PointerData*)r0->ManagedCallConverterThunk } + ldr r2, [r0, #OFFSETOF_UniversalThunk] ; Load UniversalThunk into r2 {r2 = (CallingConventionConverter_CommonCallingStub_PointerData*)r0->UniversalThunk } + ldr r12, =UniversalThunkPointer + str r2, [r12] + bx lr + LEAF_END CallingConventionConverter_SpecifyCommonStubData + + ;; + ;; void CallingConventionConverter_GetStubs(IntPtr *returnVoidStub, IntPtr *returnIntegerStub, IntPtr *commonCallingStub) + ;; + LEAF_ENTRY CallingConventionConverter_GetStubs + ldr r12, =CallingConventionConverter_ReturnThunk + str r12, [r0] ;; ARM doesn't need different return thunks. + str r12, [r1] + ldr r12, =__jmpstub__CallingConventionConverter_CommonCallingStub + str r12, [r2] + bx lr + LEAF_END CallingConventionConverter_GetStubs + + END diff --git a/src/coreclr/src/nativeaot/Runtime/arm/Dummies.asm b/src/coreclr/src/nativeaot/Runtime/arm/Dummies.asm new file mode 100644 index 0000000000000..ea6c21fc810d0 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/arm/Dummies.asm @@ -0,0 +1,18 @@ +;; Licensed to the .NET Foundation under one or more agreements. +;; The .NET Foundation licenses this file to you under the MIT license. + +#include "AsmMacros.h" + + TEXTAREA + + LEAF_ENTRY RhpLMod + DCW 0xdefe + bx lr + LEAF_END RhpLMod + + LEAF_ENTRY RhpLMul + DCW 0xdefe + bx lr + LEAF_END RhpLMul + + END diff --git a/src/coreclr/src/nativeaot/Runtime/arm/ExceptionHandling.S b/src/coreclr/src/nativeaot/Runtime/arm/ExceptionHandling.S new file mode 100644 index 0000000000000..82a1d89d8df17 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/arm/ExceptionHandling.S @@ -0,0 +1,500 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +.syntax unified +.thumb + +#include // generated by the build from AsmOffsets.cpp +#include + +#define STACKSIZEOF_ExInfo ((SIZEOF__ExInfo + 7)&(~7)) + +#define rsp_offsetof_ExInfo 0 +#define rsp_offsetof_Context STACKSIZEOF_ExInfo + +////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// +// RhpThrowHwEx +// +// INPUT: R0: exception code of fault +// R1: faulting RIP +// +// OUTPUT: +// +////////////////////////////////////////////////////////////////////////////////////////////////////////////// +NESTED_ENTRY RhpThrowHwEx, _TEXT, NoHandler + + mov r2, r0 // save exception code into r2 + mov r0, sp // get SP of fault site + + mov lr, r1 // set IP of fault site + + // Setup a PAL_LIMITED_CONTEXT on the stack { + PROLOG_VPUSH {d8-d15} + PROLOG_PUSH "{r0,lr}" // push {sp, pc} of fault site + PROLOG_PUSH "{r0,r4-r11,lr}" + // } end PAL_LIMITED_CONTEXT + + PROLOG_STACK_ALLOC STACKSIZEOF_ExInfo + + // r0: SP of fault site + // r1: IP of fault site + // r2: exception code of fault + // lr: IP of fault site (as a 'return address') + mov r4, r2 // save exception code of fault + + // r0 = GetThread() + INLINE_GETTHREAD + + // r1 <- ExInfo* + add r1, sp, #rsp_offsetof_ExInfo + mov r3, #0 + str r3, [r1, #OFFSETOF__ExInfo__m_exception] // pExInfo->m_exception = null + mov r3, #1 + strb r3, [r1, #OFFSETOF__ExInfo__m_passNumber] // pExInfo->m_passNumber = 1 + mov r3, #0xFFFFFFFF + str r3, [r1, #OFFSETOF__ExInfo__m_idxCurClause] // pExInfo->m_idxCurClause = MaxTryRegionIdx + mov r3, #2 + strb r3, [r1, #OFFSETOF__ExInfo__m_kind] // pExInfo->m_kind = ExKind.HardwareFault + + // link the ExInfo into the thread's ExInfo chain + ldr r3, [r0, #OFFSETOF__Thread__m_pExInfoStackHead] + str r3, [r1, #OFFSETOF__ExInfo__m_pPrevExInfo] // pExInfo->m_pPrevExInfo = m_pExInfoStackHead + str r1, [r0, #OFFSETOF__Thread__m_pExInfoStackHead] // m_pExInfoStackHead = pExInfo + + mov r0, r4 // restore the exception code + // r0 contains the exception code + // r1 contains the address of the ExInfo + bl C_FUNC(RhThrowHwEx) + + EXPORT_POINTER_TO_ADDRESS PointerToRhpThrowHwEx2 + + // no return + EMIT_BREAKPOINT + +NESTED_END RhpThrowHwEx + + +////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// +// RhpThrowEx +// +// INPUT: R0: exception object +// +// OUTPUT: +// +////////////////////////////////////////////////////////////////////////////////////////////////////////////// +NESTED_ENTRY RhpThrowEx, _TEXT, NoHandler + + // Setup a PAL_LIMITED_CONTEXT on the stack { + PROLOG_VPUSH {d8-d15} + PROLOG_PUSH "{r0,lr}" // Reserve space for SP and store LR + PROLOG_PUSH "{r0,r4-r11,lr}" + // } end PAL_LIMITED_CONTEXT + + PROLOG_STACK_ALLOC STACKSIZEOF_ExInfo + + // Calculate SP at callsite and save into the PAL_LIMITED_CONTEXT + add r4, sp, #(STACKSIZEOF_ExInfo + SIZEOF__PAL_LIMITED_CONTEXT) + str r4, [sp, #(rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__SP)] + + mov r4, r0 // Save exception object + // r0 = GetThread() + INLINE_GETTHREAD + + add r2, sp, #(rsp_offsetof_Context + SIZEOF__PAL_LIMITED_CONTEXT + 0x8) // r2 <- addr of return address + + // There is runtime C# code that can tail call to RhpThrowEx using a binder intrinsic. So the return + // address could have been hijacked when we were in that C# code and we must remove the hijack and + // reflect the correct return address in our exception context record. The other throw helpers don't + // need this because they cannot be tail-called from C#. + // NOTE: we cannot use INLINE_THREAD_UNHIJACK because it will write into the stack at the location + // where the tail-calling thread had saved LR, which may not match where we have saved LR. + + ldr r1, [r0, #OFFSETOF__Thread__m_pvHijackedReturnAddress] + cbz r1, LOCAL_LABEL(NotHiJacked) + + ldr r3, [r0, #OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation] + + // r4: exception object + // r1: hijacked return address + // r0: pThread + // r3: hijacked return address location + + add r12, sp, #(STACKSIZEOF_ExInfo + SIZEOF__PAL_LIMITED_CONTEXT) // re-compute SP at callsite + cmp r3, r12 // if (m_ppvHijackedReturnAddressLocation < SP at callsite) + blo LOCAL_LABEL(TailCallWasHijacked) + + // normal case where a valid return address location is hijacked + str r1, [r3] + b LOCAL_LABEL(ClearThreadState) + +LOCAL_LABEL(TailCallWasHijacked): + + // Abnormal case where the return address location is now invalid because we ended up here via a tail + // call. In this case, our hijacked return address should be the correct caller of this method. + // + + // stick the previous return address in LR as well as in the right spots in our PAL_LIMITED_CONTEXT. + mov lr, r1 + str lr, [sp, #(rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__LR)] + str lr, [sp, #(rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__IP)] + +LOCAL_LABEL(ClearThreadState): + + // clear the Thread's hijack state + mov r3, #0 + str r3, [r0, #OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation] + str r3, [r0, #OFFSETOF__Thread__m_pvHijackedReturnAddress] + +LOCAL_LABEL(NotHiJacked): + + add r1, sp, #rsp_offsetof_ExInfo // r1 <- ExInfo* + mov r3, #0 + str r3, [r1, #OFFSETOF__ExInfo__m_exception] // init the exception object to null + mov r3, #1 + strb r3, [r1, #OFFSETOF__ExInfo__m_passNumber] // init to the first pass + strb r3, [r1, #OFFSETOF__ExInfo__m_kind] + mov r3, #0xFFFFFFFF + str r3, [r1, #OFFSETOF__ExInfo__m_idxCurClause] // ExKind.Throw + + // link the ExInfo into the thread's ExInfo chain + ldr r3, [r0, #OFFSETOF__Thread__m_pExInfoStackHead] + str r3, [r1, #OFFSETOF__ExInfo__m_pPrevExInfo] // pExInfo->m_pPrevExInfo = m_pExInfoStackHead + str r1, [r0, #OFFSETOF__Thread__m_pExInfoStackHead] // m_pExInfoStackHead = pExInfo + + // set the exception context field on the ExInfo + add r3, sp, #rsp_offsetof_Context // r3 <- PAL_LIMITED_CONTEXT* + str r3, [r1, #OFFSETOF__ExInfo__m_pExContext] // init ExInfo.m_pExContext + + mov r0, r4 // Restore exception object + // r0 contains the exception object + // r1 contains the address of the new ExInfo + bl C_FUNC(RhThrowEx) + + EXPORT_POINTER_TO_ADDRESS PointerToRhpThrowEx2 + + // no return + EMIT_BREAKPOINT + +NESTED_END RhpThrowEx, _TEXT + + +////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// +// void FASTCALL RhpRethrow() +// +// SUMMARY: Similar to RhpThrowEx, except that it passes along the currently active ExInfo +// +// INPUT: +// +// OUTPUT: +// +////////////////////////////////////////////////////////////////////////////////////////////////////////////// +NESTED_ENTRY RhpRethrow, _TEXT, NoHandler + + // Setup a PAL_LIMITED_CONTEXT on the stack { + PROLOG_VPUSH {d8-d15} + PROLOG_PUSH "{r0,lr}" // Reserve space for SP and store LR + PROLOG_PUSH "{r0,r4-r11,lr}" + // } end PAL_LIMITED_CONTEXT + + PROLOG_STACK_ALLOC STACKSIZEOF_ExInfo + + // Compute and save SP at callsite. + add r1, sp, #(STACKSIZEOF_ExInfo + SIZEOF__PAL_LIMITED_CONTEXT) + str r1, [sp, #(rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__SP)] + + // r0 = GetThread(); + INLINE_GETTHREAD + + // r1 <- ExInfo* + add r1, sp, #rsp_offsetof_ExInfo + + mov r3, #0 + str r3, [r1, #OFFSETOF__ExInfo__m_exception] // init the exception object to null + strb r3, [r1, #OFFSETOF__ExInfo__m_kind] // init to a deterministic value (ExKind.None) + mov r3, #1 + strb r3, [r1, #OFFSETOF__ExInfo__m_passNumber] // pExInfo->m_passNumber = 1 + mov r3, #0xFFFFFFFF + str r3, [r1, #OFFSETOF__ExInfo__m_idxCurClause] + + // link the ExInfo into the thread's ExInfo chain + ldr r3, [r0, #OFFSETOF__Thread__m_pExInfoStackHead] // r3 <- currently active ExInfo + str r3, [r1, #OFFSETOF__ExInfo__m_pPrevExInfo] // pExInfo->m_pPrevExInfo = m_pExInfoStackHead + str r1, [r0, #OFFSETOF__Thread__m_pExInfoStackHead] // m_pExInfoStackHead = pExInfo + + // set the exception context field on the ExInfo + add r2, sp, #rsp_offsetof_Context // r2 <- PAL_LIMITED_CONTEXT* + str r2, [r1, #OFFSETOF__ExInfo__m_pExContext] // init ExInfo.m_pExContext + + mov r0, r3 + // r0 contains the currently active ExInfo + // r1 contains the address of the new ExInfo + blx C_FUNC(RhRethrow) + + EXPORT_POINTER_TO_ADDRESS PointerToRhpRethrow2 + + // no return + EMIT_BREAKPOINT + +NESTED_END RhpRethrow, _TEXT + + +////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// +// void* FASTCALL RhpCallCatchFunclet(RtuObjectRef exceptionObj, +// void* pHandlerIP, +// REGDISPLAY* pRegDisplay, +// ExInfo* pExInfo) +// +// INPUT: R0: exception object +// R1: handler funclet address +// R2: REGDISPLAY* +// R3: ExInfo* +// +// OUTPUT: +// +////////////////////////////////////////////////////////////////////////////////////////////////////////////// +NESTED_ENTRY RhpCallCatchFunclet, _TEXT, NoHandler + + PROLOG_PUSH "{r0,r2-r11,lr}" // r0, r2 & r3 are saved so we have the exception object, + // REGDISPLAY and ExInfo later + PROLOG_VPUSH {d8-d15} + +#define rsp_offset_r2 (8 * 8) + 4 +#define rsp_offset_r3 (8 * 8) + 8 + + mov r4, r0 // Save exception object + mov r5, r1 // Save handler funclet address + mov r6, r2 // Save REGDISPLAY* + + // Clear the DoNotTriggerGc state before calling out to our managed catch funclet, + // trashes r0-r2. + // r0 = GetThread() + INLINE_GETTHREAD + +LOCAL_LABEL(ClearRetry_Catch): + ldrex r1, [r0, #OFFSETOF__Thread__m_ThreadStateFlags] + bics r1, #TSF_DoNotTriggerGc + strex r2, r1, [r0, #OFFSETOF__Thread__m_ThreadStateFlags] + cbz r2, LOCAL_LABEL(ClearSuccess_Catch) + b LOCAL_LABEL(ClearRetry_Catch) +LOCAL_LABEL(ClearSuccess_Catch): + + mov r0, r4 // Reload exception object + mov r3, r5 // Reload handler funclet address + mov r2, r6 // Reload REGDISPLAY pointer + + // + // set preserved regs to the values expected by the funclet + // + ldr r12, [r2, #OFFSETOF__REGDISPLAY__pR4] + ldr r4, [r12] + ldr r12, [r2, #OFFSETOF__REGDISPLAY__pR5] + ldr r5, [r12] + ldr r12, [r2, #OFFSETOF__REGDISPLAY__pR6] + ldr r6, [r12] + ldr r12, [r2, #OFFSETOF__REGDISPLAY__pR7] + ldr r7, [r12] + ldr r12, [r2, #OFFSETOF__REGDISPLAY__pR8] + ldr r8, [r12] + ldr r12, [r2, #OFFSETOF__REGDISPLAY__pR9] + ldr r9, [r12] + ldr r12, [r2, #OFFSETOF__REGDISPLAY__pR10] + ldr r10, [r12] + ldr r12, [r2, #OFFSETOF__REGDISPLAY__pR11] + ldr r11, [r12] + + // + // load vfp preserved regs + // + add r12, r2, #OFFSETOF__REGDISPLAY__D + vldm r12!, {d8-d15} + + // r0 <- exception object + blx r3 // call handler funclet + + EXPORT_POINTER_TO_ADDRESS PointerToRhpCallCatchFunclet2 + + mov r4, r0 // Save the result + + INLINE_GETTHREAD // r0 <- Thread* + // We must unhijack the thread at this point because the section of stack where the + // hijack is applied may go dead. If it does, then the next time we try to unhijack + // the thread, it will corrupt the stack. + INLINE_THREAD_UNHIJACK r0, r3, r12 // Thread in r0, trashes r3 and r1 + ldr r2, [sp, #rsp_offset_r2] // r2 <- REGDISPLAY* + ldr r3, [sp, #rsp_offset_r3] // r3 <- current ExInfo* + ldr r2, [r2, #OFFSETOF__REGDISPLAY__SP] // r2 <- resume SP value + +LOCAL_LABEL(PopExInfoLoop): + ldr r3, [r3, #OFFSETOF__ExInfo__m_pPrevExInfo] // r3 <- next ExInfo + cbz r3, LOCAL_LABEL(DonePopping) // if (pExInfo == null) { we're done } + cmp r3, r2 + blt LOCAL_LABEL(PopExInfoLoop) // if (pExInfo < resume SP} { keep going } +LOCAL_LABEL(DonePopping): + + str r3, [r0, #OFFSETOF__Thread__m_pExInfoStackHead] // store the new head on the Thread + + // reset RSP and jump to the continuation address + mov sp, r2 + bx r4 + +NESTED_END RhpCallCatchFunclet, _TEXT + +////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// +// void FASTCALL RhpCallFinallyFunclet(void* pHandlerIP, REGDISPLAY* pRegDisplay) +// +// INPUT: R0: handler funclet address +// R1: REGDISPLAY* +// +// OUTPUT: +// +////////////////////////////////////////////////////////////////////////////////////////////////////////////// +NESTED_ENTRY RhpCallFinallyFunclet, _TEXT, NoHandler + + PROLOG_PUSH "{r1,r4-r11,lr}" // r1 is saved so we have the REGDISPLAY later + PROLOG_VPUSH {d8-d15} +#define rsp_offset_r1 8 * 8 + + // + // We want to suppress hijacking between invocations of subsequent finallys. We do + // this because we cannot tolerate a GC after one finally has run (and possibly + // side-effected the GC state of the method) and then been popped off the stack, + // leaving behind no trace of its effect. + // + // So we clear the state before and set it after invocation of the handler. + // + + mov r4, r0 // Save handler funclet address + mov r5, r1 // Save REGDISPLAY* + // + // clear the DoNotTriggerGc flag, trashes r0-r2 + // + INLINE_GETTHREAD // r0 <- Thread* + +LOCAL_LABEL(ClearRetry): + ldrex r1, [r0, #OFFSETOF__Thread__m_ThreadStateFlags] + bics r1, #TSF_DoNotTriggerGc + strex r2, r1, [r0, #OFFSETOF__Thread__m_ThreadStateFlags] + cbz r2, LOCAL_LABEL(ClearSuccess) + b LOCAL_LABEL(ClearRetry) +LOCAL_LABEL(ClearSuccess): + + mov r2, r4 // reload handler funclet address + mov r1, r5 // reload REGDISPLAY pointer + + // + // set preserved regs to the values expected by the funclet + // + ldr r12, [r1, #OFFSETOF__REGDISPLAY__pR4] + ldr r4, [r12] + ldr r12, [r1, #OFFSETOF__REGDISPLAY__pR5] + ldr r5, [r12] + ldr r12, [r1, #OFFSETOF__REGDISPLAY__pR6] + ldr r6, [r12] + ldr r12, [r1, #OFFSETOF__REGDISPLAY__pR7] + ldr r7, [r12] + ldr r12, [r1, #OFFSETOF__REGDISPLAY__pR8] + ldr r8, [r12] + ldr r12, [r1, #OFFSETOF__REGDISPLAY__pR9] + ldr r9, [r12] + ldr r12, [r1, #OFFSETOF__REGDISPLAY__pR10] + ldr r10, [r12] + ldr r12, [r1, #OFFSETOF__REGDISPLAY__pR11] + ldr r11, [r12] + + // + // load vfp preserved regs + // + add r12, r1, #OFFSETOF__REGDISPLAY__D + vldm r12!, {d8-d15} + + blx r2 // handler funclet address + + EXPORT_POINTER_TO_ADDRESS PointerToRhpCallFinallyFunclet2 + + ldr r1, [sp, #rsp_offset_r1] // reload REGDISPLAY pointer + + // + // save new values of preserved regs into REGDISPLAY + // + ldr r12, [r1, #OFFSETOF__REGDISPLAY__pR4] + str r4, [r12] + ldr r12, [r1, #OFFSETOF__REGDISPLAY__pR5] + str r5, [r12] + ldr r12, [r1, #OFFSETOF__REGDISPLAY__pR6] + str r6, [r12] + ldr r12, [r1, #OFFSETOF__REGDISPLAY__pR7] + str r7, [r12] + ldr r12, [r1, #OFFSETOF__REGDISPLAY__pR8] + str r8, [r12] + ldr r12, [r1, #OFFSETOF__REGDISPLAY__pR9] + str r9, [r12] + ldr r12, [r1, #OFFSETOF__REGDISPLAY__pR10] + str r10, [r12] + ldr r12, [r1, #OFFSETOF__REGDISPLAY__pR11] + str r11, [r12] + + // + // store vfp preserved regs + // + add r12, r1, #OFFSETOF__REGDISPLAY__D + vstm r12!, {d8-d15} + + // + // set the DoNotTriggerGc flag, trashes r0-r2 + // + INLINE_GETTHREAD // r0 <- Thread* +LOCAL_LABEL(SetRetry): + ldrex r1, [r0, #OFFSETOF__Thread__m_ThreadStateFlags] + orrs r1, #TSF_DoNotTriggerGc + strex r2, r1, [r0, #OFFSETOF__Thread__m_ThreadStateFlags] + cbz r2, LOCAL_LABEL(SetSuccess) + b LOCAL_LABEL(SetRetry) +LOCAL_LABEL(SetSuccess): + + EPILOG_VPOP {d8-d15} + EPILOG_POP "{r1,r4-r11,pc}" + +NESTED_END RhpCallFinallyFunclet, _TEXT + +////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// +// void* FASTCALL RhpCallFilterFunclet(RtuObjectRef exceptionObj, void* pFilterIP, REGDISPLAY* pRegDisplay) +// +// INPUT: R0: exception object +// R1: filter funclet address +// R2: REGDISPLAY* +// +// OUTPUT: +// +////////////////////////////////////////////////////////////////////////////////////////////////////////////// +NESTED_ENTRY RhpCallFilterFunclet, _TEXT, NoHandler + + PROLOG_PUSH "{r2,r4-r11,lr}" + PROLOG_VPUSH {d8-d15} + + ldr r12, [r2, #OFFSETOF__REGDISPLAY__pR11] + ldr r11, [r12] + + mov r12, r1 // r12 <- handler funclet address + // r0 still contains the exception object + ldr r1, [r2, #OFFSETOF__REGDISPLAY__SP] // r1 <- establisher frame + + // + // call the funclet + // r0 = exception object + // r1 = establisher frame + blx r12 + + EXPORT_POINTER_TO_ADDRESS PointerToRhpCallFilterFunclet2 + + // R0 contains the result of the filter execution + + EPILOG_VPOP {d8-d15} + EPILOG_POP "{r2,r4-r11,pc}" + +NESTED_END RhpCallFilterFunclet, _TEXT diff --git a/src/coreclr/src/nativeaot/Runtime/arm/ExceptionHandling.asm b/src/coreclr/src/nativeaot/Runtime/arm/ExceptionHandling.asm new file mode 100644 index 0000000000000..41b7a66bda2c3 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/arm/ExceptionHandling.asm @@ -0,0 +1,555 @@ +;; Licensed to the .NET Foundation under one or more agreements. +;; The .NET Foundation licenses this file to you under the MIT license. + +#include "AsmMacros.h" + + TEXTAREA + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; RhpThrowHwEx +;; +;; INPUT: R0: exception code of fault +;; R1: faulting IP +;; +;; OUTPUT: +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + NESTED_ENTRY RhpThrowHwEx + +#define STACKSIZEOF_ExInfo ((SIZEOF__ExInfo + 7)&(~7)) + +#define rsp_offsetof_ExInfo 0 +#define rsp_offsetof_Context STACKSIZEOF_ExInfo + + PROLOG_NOP mov r2, r0 ;; save exception code into r2 + PROLOG_NOP mov r0, sp ;; get SP of fault site + + PROLOG_NOP mov lr, r1 ;; set IP of fault site + + ;; Setup a PAL_LIMITED_CONTEXT on the stack { + PROLOG_NOP vpush {d8-d15} + PROLOG_NOP push {r0,lr} ;; push {sp, pc} of fault site + PROLOG_PUSH_MACHINE_FRAME ;; unwind code only + PROLOG_PUSH {r0,r4-r11,lr} + ;; } end PAL_LIMITED_CONTEXT + + PROLOG_STACK_ALLOC STACKSIZEOF_ExInfo + + ; r0: SP of fault site + ; r1: IP of fault site + ; r2: exception code of fault + ; lr: IP of fault site (as a 'return address') + + mov r0, r2 ;; r0 <- exception code of fault + + ;; r2 = GetThread(), TRASHES r1 + INLINE_GETTHREAD r2, r1 + + add r1, sp, #rsp_offsetof_ExInfo ;; r1 <- ExInfo* + mov r3, #0 + str r3, [r1, #OFFSETOF__ExInfo__m_exception] ;; pExInfo->m_exception = null + mov r3, #1 + strb r3, [r1, #OFFSETOF__ExInfo__m_passNumber] ;; pExInfo->m_passNumber = 1 + mov r3, #0xFFFFFFFF + str r3, [r1, #OFFSETOF__ExInfo__m_idxCurClause] ;; pExInfo->m_idxCurClause = MaxTryRegionIdx + mov r3, #2 + strb r3, [r1, #OFFSETOF__ExInfo__m_kind] ;; pExInfo->m_kind = ExKind.HardwareFault + + + ;; link the ExInfo into the thread's ExInfo chain + ldr r3, [r2, #OFFSETOF__Thread__m_pExInfoStackHead] + str r3, [r1, #OFFSETOF__ExInfo__m_pPrevExInfo] ;; pExInfo->m_pPrevExInfo = m_pExInfoStackHead + str r1, [r2, #OFFSETOF__Thread__m_pExInfoStackHead] ;; m_pExInfoStackHead = pExInfo + + ;; set the exception context field on the ExInfo + add r2, sp, #rsp_offsetof_Context ;; r2 <- PAL_LIMITED_CONTEXT* + str r2, [r1, #OFFSETOF__ExInfo__m_pExContext] ;; pExInfo->m_pExContext = pContext + + ;; r0: exception code + ;; r1: ExInfo* + bl RhThrowHwEx + + EXPORT_POINTER_TO_ADDRESS PointerToRhpThrowHwEx2 + + ;; no return + __debugbreak + + NESTED_END RhpThrowHwEx + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; RhpThrowEx +;; +;; INPUT: R0: exception object +;; +;; OUTPUT: +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + NESTED_ENTRY RhpThrowEx + + ;; Setup a PAL_LIMITED_CONTEXT on the stack { + PROLOG_VPUSH {d8-d15} + PROLOG_PUSH {r0,lr} ;; Reserve space for SP and store LR + PROLOG_PUSH {r0,r4-r11,lr} + ;; } end PAL_LIMITED_CONTEXT + + PROLOG_STACK_ALLOC STACKSIZEOF_ExInfo + + ;; Compute and save SP at callsite. + add r1, sp, #(STACKSIZEOF_ExInfo + SIZEOF__PAL_LIMITED_CONTEXT) + str r1, [sp, #(rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__SP)] + + ;; r2 = GetThread(), TRASHES r1 + INLINE_GETTHREAD r2, r1 + + ;; There is runtime C# code that can tail call to RhpThrowEx using a binder intrinsic. So the return + ;; address could have been hijacked when we were in that C# code and we must remove the hijack and + ;; reflect the correct return address in our exception context record. The other throw helpers don't + ;; need this because they cannot be tail-called from C#. + + ;; NOTE: we cannot use INLINE_THREAD_UNHIJACK because it will write into the stack at the location + ;; where the tail-calling thread had saved LR, which may not match where we have saved LR. + + ldr r1, [r2, #OFFSETOF__Thread__m_pvHijackedReturnAddress] + cbz r1, NotHijacked + + ldr r3, [r2, #OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation] + + ;; r0: exception object + ;; r1: hijacked return address + ;; r2: pThread + ;; r3: hijacked return address location + + add r12, sp, #(STACKSIZEOF_ExInfo + SIZEOF__PAL_LIMITED_CONTEXT) ;; re-compute SP at callsite + cmp r3, r12 ;; if (m_ppvHijackedReturnAddressLocation < SP at callsite) + blo TailCallWasHijacked + + ;; normal case where a valid return address location is hijacked + str r1, [r3] + b ClearThreadState + +TailCallWasHijacked + + ;; Abnormal case where the return address location is now invalid because we ended up here via a tail + ;; call. In this case, our hijacked return address should be the correct caller of this method. + ;; + + ;; stick the previous return address in LR as well as in the right spots in our PAL_LIMITED_CONTEXT. + mov lr, r1 + str lr, [sp, #(rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__LR)] + str lr, [sp, #(rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__IP)] + +ClearThreadState + + ;; clear the Thread's hijack state + mov r3, #0 + str r3, [r2, #OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation] + str r3, [r2, #OFFSETOF__Thread__m_pvHijackedReturnAddress] + +NotHijacked + + add r1, sp, #rsp_offsetof_ExInfo ;; r1 <- ExInfo* + mov r3, #0 + str r3, [r1, #OFFSETOF__ExInfo__m_exception] ;; pExInfo->m_exception = null + mov r3, #1 + strb r3, [r1, #OFFSETOF__ExInfo__m_passNumber] ;; pExInfo->m_passNumber = 1 + mov r3, #0xFFFFFFFF + str r3, [r1, #OFFSETOF__ExInfo__m_idxCurClause] ;; pExInfo->m_idxCurClause = MaxTryRegionIdx + mov r3, #1 + strb r3, [r1, #OFFSETOF__ExInfo__m_kind] ;; pExInfo->m_kind = ExKind.Throw + + ;; link the ExInfo into the thread's ExInfo chain + ldr r3, [r2, #OFFSETOF__Thread__m_pExInfoStackHead] + str r3, [r1, #OFFSETOF__ExInfo__m_pPrevExInfo] ;; pExInfo->m_pPrevExInfo = m_pExInfoStackHead + str r1, [r2, #OFFSETOF__Thread__m_pExInfoStackHead] ;; m_pExInfoStackHead = pExInfo + + ;; set the exception context field on the ExInfo + add r2, sp, #rsp_offsetof_Context ;; r2 <- PAL_LIMITED_CONTEXT* + str r2, [r1, #OFFSETOF__ExInfo__m_pExContext] ;; pExInfo->m_pExContext = pContext + + ;; r0: exception object + ;; r1: ExInfo* + bl RhThrowEx + + EXPORT_POINTER_TO_ADDRESS PointerToRhpThrowEx2 + + ;; no return + __debugbreak + + NESTED_END RhpThrowEx + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; void FASTCALL RhpRethrow() +;; +;; SUMMARY: Similar to RhpThrowEx, except that it passes along the currently active ExInfo +;; +;; INPUT: +;; +;; OUTPUT: +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + NESTED_ENTRY RhpRethrow + + ;; Setup a PAL_LIMITED_CONTEXT on the stack { + PROLOG_VPUSH {d8-d15} + PROLOG_PUSH {r0,lr} ;; Reserve space for SP and store LR + PROLOG_PUSH {r0,r4-r11,lr} + ;; } end PAL_LIMITED_CONTEXT + + PROLOG_STACK_ALLOC STACKSIZEOF_ExInfo + + ;; Compute and save SP at callsite. + add r1, sp, #(STACKSIZEOF_ExInfo + SIZEOF__PAL_LIMITED_CONTEXT) + str r1, [sp, #(rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__SP)] + + ;; r2 = GetThread(), TRASHES r1 + INLINE_GETTHREAD r2, r1 + + add r1, sp, #rsp_offsetof_ExInfo ;; r1 <- ExInfo* + mov r3, #0 + str r3, [r1, #OFFSETOF__ExInfo__m_exception] ;; pExInfo->m_exception = null + strb r3, [r1, #OFFSETOF__ExInfo__m_kind] ;; init to a deterministic value (ExKind.None) + mov r3, #1 + strb r3, [r1, #OFFSETOF__ExInfo__m_passNumber] ;; pExInfo->m_passNumber = 1 + mov r3, #0xFFFFFFFF + str r3, [r1, #OFFSETOF__ExInfo__m_idxCurClause] ;; pExInfo->m_idxCurClause = MaxTryRegionIdx + + ;; link the ExInfo into the thread's ExInfo chain + ldr r3, [r2, #OFFSETOF__Thread__m_pExInfoStackHead] + mov r0, r3 ;; r0 <- current ExInfo + str r3, [r1, #OFFSETOF__ExInfo__m_pPrevExInfo] ;; pExInfo->m_pPrevExInfo = m_pExInfoStackHead + str r1, [r2, #OFFSETOF__Thread__m_pExInfoStackHead] ;; m_pExInfoStackHead = pExInfo + + ;; set the exception context field on the ExInfo + add r2, sp, #rsp_offsetof_Context ;; r2 <- PAL_LIMITED_CONTEXT* + str r2, [r1, #OFFSETOF__ExInfo__m_pExContext] ;; pExInfo->m_pExContext = pContext + + ;; r0 contains the currently active ExInfo + ;; r1 contains the address of the new ExInfo + bl RhRethrow + + EXPORT_POINTER_TO_ADDRESS PointerToRhpRethrow2 + + ;; no return + __debugbreak + + NESTED_END RhpRethrow + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; void* FASTCALL RhpCallCatchFunclet(RtuObjectRef exceptionObj, void* pHandlerIP, REGDISPLAY* pRegDisplay, +;; ExInfo* pExInfo) +;; +;; INPUT: R0: exception object +;; R1: handler funclet address +;; R2: REGDISPLAY* +;; R3: ExInfo* +;; +;; OUTPUT: +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + NESTED_ENTRY RhpCallCatchFunclet + + PROLOG_PUSH {r0,r2-r11,lr} ;; r0, r2 & r3 are saved so we have the exception object, + ;; REGDISPLAY and ExInfo later + PROLOG_VPUSH {d8-d15} + +#define rsp_offset_is_not_handling_thread_abort (8 * 8) + 0 +#define rsp_offset_r2 (8 * 8) + 4 +#define rsp_offset_r3 (8 * 8) + 8 + + ;; + ;; clear the DoNotTriggerGc flag, trashes r4-r6 + ;; + INLINE_GETTHREAD r5, r6 ;; r5 <- Thread*, r6 <- trashed + + ldr r4, [r5, #OFFSETOF__Thread__m_threadAbortException] + sub r4, r0 + str r4, [sp, #rsp_offset_is_not_handling_thread_abort] ;; Non-zero if the exception is not ThreadAbortException + +ClearRetry_Catch + ldrex r4, [r5, #OFFSETOF__Thread__m_ThreadStateFlags] + bic r4, #TSF_DoNotTriggerGc + strex r6, r4, [r5, #OFFSETOF__Thread__m_ThreadStateFlags] + cbz r6, ClearSuccess_Catch + b ClearRetry_Catch +ClearSuccess_Catch + + ;; + ;; set preserved regs to the values expected by the funclet + ;; + ldr r12, [r2, #OFFSETOF__REGDISPLAY__pR4] + ldr r4, [r12] + ldr r12, [r2, #OFFSETOF__REGDISPLAY__pR5] + ldr r5, [r12] + ldr r12, [r2, #OFFSETOF__REGDISPLAY__pR6] + ldr r6, [r12] + ldr r12, [r2, #OFFSETOF__REGDISPLAY__pR7] + ldr r7, [r12] + ldr r12, [r2, #OFFSETOF__REGDISPLAY__pR8] + ldr r8, [r12] + ldr r12, [r2, #OFFSETOF__REGDISPLAY__pR9] + ldr r9, [r12] + ldr r12, [r2, #OFFSETOF__REGDISPLAY__pR10] + ldr r10, [r12] + ldr r12, [r2, #OFFSETOF__REGDISPLAY__pR11] + ldr r11, [r12] + +#if 0 // def _DEBUG ;; @TODO: temporarily removed because trashing the frame pointer breaks the debugger + ;; trash the values at the old homes to make sure nobody uses them + movw r3, #0xdeed + movt r3, #0xbaad + ldr r12, [r2, #OFFSETOF__REGDISPLAY__pR4] + str r3, [r12] + ldr r12, [r2, #OFFSETOF__REGDISPLAY__pR5] + str r3, [r12] + ldr r12, [r2, #OFFSETOF__REGDISPLAY__pR6] + str r3, [r12] + ldr r12, [r2, #OFFSETOF__REGDISPLAY__pR7] + str r3, [r12] + ldr r12, [r2, #OFFSETOF__REGDISPLAY__pR8] + str r3, [r12] + ldr r12, [r2, #OFFSETOF__REGDISPLAY__pR9] + str r3, [r12] + ldr r12, [r2, #OFFSETOF__REGDISPLAY__pR10] + str r3, [r12] + ldr r12, [r2, #OFFSETOF__REGDISPLAY__pR11] + str r3, [r12] +#endif // _DEBUG + + ;; + ;; load vfp preserved regs + ;; + add r12, r2, #OFFSETOF__REGDISPLAY__D + vldm r12!, {d8-d15} + + ;; + ;; call the funclet + ;; + ;; r0 still contains the exception object + blx r1 + + EXPORT_POINTER_TO_ADDRESS PointerToRhpCallCatchFunclet2 + + ;; r0 contains resume IP + + ldr r2, [sp, #rsp_offset_r2] ;; r2 <- REGDISPLAY* + +;; @TODO: add debug-only validation code for ExInfo pop + + INLINE_GETTHREAD r1, r3 ;; r1 <- Thread*, r3 <- trashed + + ;; We must unhijack the thread at this point because the section of stack where the hijack is applied + ;; may go dead. If it does, then the next time we try to unhijack the thread, it will corrupt the stack. + INLINE_THREAD_UNHIJACK r1, r3, r12 ;; Thread in r1, trashes r3 and r12 + + ldr r3, [sp, #rsp_offset_r3] ;; r3 <- current ExInfo* + ldr r2, [r2, #OFFSETOF__REGDISPLAY__SP] ;; r2 <- resume SP value + +PopExInfoLoop + ldr r3, [r3, #OFFSETOF__ExInfo__m_pPrevExInfo] ;; r3 <- next ExInfo + cbz r3, DonePopping ;; if (pExInfo == null) { we're done } + cmp r3, r2 + blt PopExInfoLoop ;; if (pExInfo < resume SP} { keep going } + +DonePopping + str r3, [r1, #OFFSETOF__Thread__m_pExInfoStackHead] ;; store the new head on the Thread + + ldr r3, =RhpTrapThreads + ldr r3, [r3] + tst r3, #TrapThreadsFlags_AbortInProgress + beq NoAbort + + ldr r3, [sp, #rsp_offset_is_not_handling_thread_abort] + cmp r3, #0 + bne NoAbort + + ;; It was the ThreadAbortException, so rethrow it + ;; reset SP + mov r1, r0 ;; r1 <- continuation address as exception PC + mov r0, #STATUS_REDHAWK_THREAD_ABORT + mov sp, r2 + b RhpThrowHwEx + +NoAbort + ;; reset SP and jump to continuation address + mov sp, r2 + bx r0 + + NESTED_END RhpCallCatchFunclet + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; void FASTCALL RhpCallFinallyFunclet(void* pHandlerIP, REGDISPLAY* pRegDisplay) +;; +;; INPUT: R0: handler funclet address +;; R1: REGDISPLAY* +;; +;; OUTPUT: +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + NESTED_ENTRY RhpCallFinallyFunclet + + PROLOG_PUSH {r1,r4-r11,lr} ;; r1 is saved so we have the REGDISPLAY later + PROLOG_VPUSH {d8-d15} +#define rsp_offset_r1 8 * 8 + + ;; + ;; We want to suppress hijacking between invocations of subsequent finallys. We do this because we + ;; cannot tolerate a GC after one finally has run (and possibly side-effected the GC state of the + ;; method) and then been popped off the stack, leaving behind no trace of its effect. + ;; + ;; So we clear the state before and set it after invocation of the handler. + ;; + + ;; + ;; clear the DoNotTriggerGc flag, trashes r1-r3 + ;; + INLINE_GETTHREAD r2, r3 ;; r2 <- Thread*, r3 <- trashed +ClearRetry + ldrex r1, [r2, #OFFSETOF__Thread__m_ThreadStateFlags] + bic r1, #TSF_DoNotTriggerGc + strex r3, r1, [r2, #OFFSETOF__Thread__m_ThreadStateFlags] + cbz r3, ClearSuccess + b ClearRetry +ClearSuccess + + ldr r1, [sp, #rsp_offset_r1] ;; reload REGDISPLAY pointer + + ;; + ;; set preserved regs to the values expected by the funclet + ;; + ldr r12, [r1, #OFFSETOF__REGDISPLAY__pR4] + ldr r4, [r12] + ldr r12, [r1, #OFFSETOF__REGDISPLAY__pR5] + ldr r5, [r12] + ldr r12, [r1, #OFFSETOF__REGDISPLAY__pR6] + ldr r6, [r12] + ldr r12, [r1, #OFFSETOF__REGDISPLAY__pR7] + ldr r7, [r12] + ldr r12, [r1, #OFFSETOF__REGDISPLAY__pR8] + ldr r8, [r12] + ldr r12, [r1, #OFFSETOF__REGDISPLAY__pR9] + ldr r9, [r12] + ldr r12, [r1, #OFFSETOF__REGDISPLAY__pR10] + ldr r10, [r12] + ldr r12, [r1, #OFFSETOF__REGDISPLAY__pR11] + ldr r11, [r12] + +#if 0 // def _DEBUG ;; @TODO: temporarily removed because trashing the frame pointer breaks the debugger + ;; trash the values at the old homes to make sure nobody uses them + movw r3, #0xdeed + movt r3, #0xbaad + ldr r12, [r1, #OFFSETOF__REGDISPLAY__pR4] + str r3, [r12] + ldr r12, [r1, #OFFSETOF__REGDISPLAY__pR5] + str r3, [r12] + ldr r12, [r1, #OFFSETOF__REGDISPLAY__pR6] + str r3, [r12] + ldr r12, [r1, #OFFSETOF__REGDISPLAY__pR7] + str r3, [r12] + ldr r12, [r1, #OFFSETOF__REGDISPLAY__pR8] + str r3, [r12] + ldr r12, [r1, #OFFSETOF__REGDISPLAY__pR9] + str r3, [r12] + ldr r12, [r1, #OFFSETOF__REGDISPLAY__pR10] + str r3, [r12] + ldr r12, [r1, #OFFSETOF__REGDISPLAY__pR11] + str r3, [r12] +#endif // _DEBUG + + ;; + ;; load vfp preserved regs + ;; + add r12, r1, #OFFSETOF__REGDISPLAY__D + vldm r12!, {d8-d15} + + ;; + ;; call the funclet + ;; + blx r0 + + EXPORT_POINTER_TO_ADDRESS PointerToRhpCallFinallyFunclet2 + + ldr r1, [sp, #rsp_offset_r1] ;; reload REGDISPLAY pointer + + ;; + ;; save new values of preserved regs into REGDISPLAY + ;; + ldr r12, [r1, #OFFSETOF__REGDISPLAY__pR4] + str r4, [r12] + ldr r12, [r1, #OFFSETOF__REGDISPLAY__pR5] + str r5, [r12] + ldr r12, [r1, #OFFSETOF__REGDISPLAY__pR6] + str r6, [r12] + ldr r12, [r1, #OFFSETOF__REGDISPLAY__pR7] + str r7, [r12] + ldr r12, [r1, #OFFSETOF__REGDISPLAY__pR8] + str r8, [r12] + ldr r12, [r1, #OFFSETOF__REGDISPLAY__pR9] + str r9, [r12] + ldr r12, [r1, #OFFSETOF__REGDISPLAY__pR10] + str r10, [r12] + ldr r12, [r1, #OFFSETOF__REGDISPLAY__pR11] + str r11, [r12] + + ;; + ;; store vfp preserved regs + ;; + add r12, r1, #OFFSETOF__REGDISPLAY__D + vstm r12!, {d8-d15} + + ;; + ;; set the DoNotTriggerGc flag, trashes r1-r3 + ;; + INLINE_GETTHREAD r2, r3 ;; r2 <- Thread*, r3 <- trashed +SetRetry + ldrex r1, [r2, #OFFSETOF__Thread__m_ThreadStateFlags] + orr r1, #TSF_DoNotTriggerGc + strex r3, r1, [r2, #OFFSETOF__Thread__m_ThreadStateFlags] + cbz r3, SetSuccess + b SetRetry +SetSuccess + + EPILOG_VPOP {d8-d15} + EPILOG_POP {r1,r4-r11,pc} + + NESTED_END RhpCallFinallyFunclet + + INLINE_GETTHREAD_CONSTANT_POOL + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; void* FASTCALL RhpCallFilterFunclet(RtuObjectRef exceptionObj, void* pFilterIP, REGDISPLAY* pRegDisplay) +;; +;; INPUT: R0: exception object +;; R1: filter funclet address +;; R2: REGDISPLAY* +;; +;; OUTPUT: +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + NESTED_ENTRY RhpCallFilterFunclet + + PROLOG_PUSH {r2,r4-r11,lr} + PROLOG_VPUSH {d8-d15} + + ldr r12, [r2, #OFFSETOF__REGDISPLAY__pR7] + ldr r7, [r12] + + ;; + ;; call the funclet + ;; + ;; r0 still contains the exception object + blx r1 + + EXPORT_POINTER_TO_ADDRESS PointerToRhpCallFilterFunclet2 + + EPILOG_VPOP {d8-d15} + EPILOG_POP {r2,r4-r11,pc} + + NESTED_END RhpCallFilterFunclet + + end diff --git a/src/coreclr/src/nativeaot/Runtime/arm/FloatingPoint.asm b/src/coreclr/src/nativeaot/Runtime/arm/FloatingPoint.asm new file mode 100644 index 0000000000000..9d872fecb6576 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/arm/FloatingPoint.asm @@ -0,0 +1,38 @@ +;; Licensed to the .NET Foundation under one or more agreements. +;; The .NET Foundation licenses this file to you under the MIT license. + +#include "AsmMacros.h" + + TEXTAREA + + IMPORT fmod + + NESTED_ENTRY RhpFltRemRev + + PROLOG_PUSH {r4,lr} ; Save return address (and r4 for stack alignment) + + ;; The CRT only exports the double form of fmod, so we need to convert our input registers (s0, s1) to + ;; doubles (d0, d1). Unfortunately these registers overlap (d0 == s0/s1) so we need to move our inputs + ;; elsewhere first. In this case we can move them into s4/s5, which are also volatile and don't need + ;; to be preserved. + vmov.f32 s4, s0 + vmov.f32 s5, s1 + + ;; Convert s4 and s5 into d0 and d1. + vcvt.f64.f32 d0, s4 + vcvt.f64.f32 d1, s5 + + ;; Call the CRT's fmod to calculate the remainder into d0. + ldr r12, =fmod + blx r12 + + ;; Convert double result back to single. As far as I can see it's legal to do this directly even + ;; though d0 overlaps s0. + vcvt.f32.f64 s0, d0 + + EPILOG_POP {r4,lr} + EPILOG_RETURN + + NESTED_END RhpFltRemRev + + end diff --git a/src/coreclr/src/nativeaot/Runtime/arm/GcProbe.asm b/src/coreclr/src/nativeaot/Runtime/arm/GcProbe.asm new file mode 100644 index 0000000000000..6418b0af16441 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/arm/GcProbe.asm @@ -0,0 +1,620 @@ +;; Licensed to the .NET Foundation under one or more agreements. +;; The .NET Foundation licenses this file to you under the MIT license. + +#include "AsmMacros.h" + + TEXTAREA + + ;; ARM64TODO: do same fix here as on Arm64? + SETALIAS g_fGcStressStarted, ?g_GCShadow@@3PAEA + + EXTERN $g_fGcStressStarted + +PROBE_SAVE_FLAGS_EVERYTHING equ DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_ALL_SCRATCH +PROBE_SAVE_FLAGS_R0_IS_GCREF equ DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_R0 + PTFF_R0_IS_GCREF + + + ;; Build a map of symbols representing offsets into a transition frame (see PInvokeTransitionFrame in + ;; rhbinder.h and keep these two in sync. + map 0 +m_ChainPointer field 4 ; r11 - OS frame chain used for quick stackwalks +m_RIP field 4 ; lr +m_FramePointer field 4 ; r7 +m_pThread field 4 +m_Flags field 4 ; bitmask of saved registers +m_PreservedRegs field (4 * 6) ; r4-r6,r8-r10 +m_CallersSP field 4 ; sp at routine entry +m_SavedR0 field 4 ; r0 +m_VolatileRegs field (4 * 4) ; r1-r3,lr +m_ReturnVfpRegs field (8 * 4) ; d0-d3, not really part of the struct +m_SavedAPSR field 4 ; saved condition codes +PROBE_FRAME_SIZE field 0 + + ;; Support for setting up a transition frame when performing a GC probe. In many respects this is very + ;; similar to the logic in PUSH_COOP_PINVOKE_FRAME in AsmMacros.h. In most cases setting up the + ;; transition frame comprises the entirety of the caller's prolog (and initial non-prolog code) and + ;; similarly for the epilog. Those cases can be dealt with using PROLOG_PROBE_FRAME and EPILOG_PROBE_FRAME + ;; defined below. For the special cases where additional work has to be done in the prolog we also provide + ;; the lower level macros ALLOC_PROBE_FRAME, FREE_PROBE_FRAME and INIT_PROBE_FRAME that allow more control + ;; to be asserted. + ;; + ;; Note that we currently employ a significant simplification of frame setup: we always allocate a + ;; maximally-sized PInvokeTransitionFrame and save all of the registers. Depending on the caller this can + ;; lead to upto five additional register saves (r0-r3,r12) or 20 bytes of stack space. I have done no + ;; analysis to see whether any of the worst cases occur on performance sensitive paths and whether the + ;; additional saves will show any measurable degradation. + + ;; Perform the parts of setting up a probe frame that can occur during the prolog (and indeed this macro + ;; can only be called from within the prolog). + MACRO + ALLOC_PROBE_FRAME + + PROLOG_STACK_ALLOC 4 ; Space for saved APSR + PROLOG_VPUSH {d0-d3} ; Save floating point return registers + PROLOG_PUSH {r0-r3,lr} ; Save volatile registers + PROLOG_STACK_ALLOC 4 ; Space for caller's SP + PROLOG_PUSH {r4-r6,r8-r10} ; Save non-volatile registers + PROLOG_STACK_ALLOC 8 ; Space for flags and Thread* + PROLOG_PUSH {r7} ; Save caller's frame pointer + PROLOG_PUSH {r11,lr} ; Save frame-chain pointer and return address + MEND + + ;; Undo the effects of an ALLOC_PROBE_FRAME. This may only be called within an epilog. Note that all + ;; registers are restored (apart for sp and pc), even volatiles. + MACRO + FREE_PROBE_FRAME + + EPILOG_POP {r11,lr} ; Restore frame-chain pointer and return address + EPILOG_POP {r7} ; Restore caller's frame pointer + EPILOG_STACK_FREE 8 ; Discard flags and Thread* + EPILOG_POP {r4-r6,r8-r10} ; Restore non-volatile registers + EPILOG_STACK_FREE 4 ; Discard caller's SP + EPILOG_POP {r0-r3,lr} ; Restore volatile registers + EPILOG_VPOP {d0-d3} ; Restore floating point return registers + EPILOG_STACK_FREE 4 ; Space for saved APSR + MEND + + ;; Complete the setup of a probe frame allocated with ALLOC_PROBE_FRAME with the initialization that can + ;; occur only outside the prolog (includes linking the frame to the current Thread). This macro assumes SP + ;; is invariant outside of the prolog. + ;; + ;; $threadReg : register containing the Thread* (this will be preserved) + ;; $trashReg : register that can be trashed by this macro + ;; $BITMASK : value to initialize m_Flags field with (register or #constant) + ;; $frameSize : total size of the method's stack frame (including probe frame size) + MACRO + INIT_PROBE_FRAME $threadReg, $trashReg, $BITMASK, $frameSize + + str $threadReg, [sp, #m_pThread] ; Thread * + mov $trashReg, $BITMASK ; Bitmask of preserved registers + str $trashReg, [sp, #m_Flags] + add $trashReg, sp, #$frameSize + str $trashReg, [sp, #m_CallersSP] + MEND + + ;; Simple macro to use when setting up the probe frame can comprise the entire prolog. Call this macro + ;; first in the method (no further prolog instructions can be added after this). + ;; + ;; $threadReg : register containing the Thread* (this will be preserved). If defaulted (specify |) then + ;; the current thread will be calculated inline into r2 ($trashReg must not equal r2 in + ;; this case) + ;; $trashReg : register that can be trashed by this macro + ;; $BITMASK : value to initialize m_Flags field with (register or #constant) + MACRO + PROLOG_PROBE_FRAME $threadReg, $trashReg, $BITMASK + + ; Local string tracking the name of the register in which the Thread* is kept. Defaults to the value + ; of $threadReg. + LCLS __PPF_ThreadReg +__PPF_ThreadReg SETS "$threadReg" + + ; Define the method prolog, allocating enough stack space for the PInvokeTransitionFrame and saving + ; incoming register values into it. + ALLOC_PROBE_FRAME + + ; If the caller didn't provide a value for $threadReg then generate code to fetch the Thread* into r2. + ; Record that r2 holds the Thread* in our local variable. + IF "$threadReg" == "" + ASSERT "$trashReg" != "r2" +__PPF_ThreadReg SETS "r2" + INLINE_GETTHREAD $__PPF_ThreadReg, $trashReg + ENDIF + + ; Perform the rest of the PInvokeTransitionFrame initialization. + INIT_PROBE_FRAME $__PPF_ThreadReg, $trashReg, $BITMASK, PROBE_FRAME_SIZE + str sp, [$__PPF_ThreadReg, #OFFSETOF__Thread__m_pHackPInvokeTunnel] + MEND + + ; Simple macro to use when PROLOG_PROBE_FRAME was used to set up and initialize the prolog and + ; PInvokeTransitionFrame. This will define the epilog including a return via the restored LR. + MACRO + EPILOG_PROBE_FRAME + + FREE_PROBE_FRAME + EPILOG_RETURN + MEND + + +;; +;; Macro to clear the hijack state. This is safe to do because the suspension code will not Unhijack this +;; thread if it finds it at an IP that isn't managed code. +;; +;; Register state on entry: +;; r2: thread pointer +;; +;; Register state on exit: +;; r12: trashed +;; + MACRO + ClearHijackState + + mov r12, #0 + str r12, [r2, #OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation] + str r12, [r2, #OFFSETOF__Thread__m_pvHijackedReturnAddress] + MEND + + +;; +;; The prolog for all GC suspension hijacks (normal and stress). Fixes up the hijacked return address, and +;; clears the hijack state. +;; +;; Register state on entry: +;; All registers correct for return to the original return address. +;; +;; Register state on exit: +;; r2: thread pointer +;; r3: trashed +;; r12: trashed +;; + MACRO + FixupHijackedCallstack + + ;; r2 <- GetThread(), TRASHES r3 + INLINE_GETTHREAD r2, r3 + + ;; + ;; Fix the stack by restoring the original return address + ;; + ldr lr, [r2, #OFFSETOF__Thread__m_pvHijackedReturnAddress] + + ClearHijackState + MEND + +;; +;; Set the Thread state and wait for a GC to complete. +;; +;; Register state on entry: +;; r4: thread pointer +;; +;; Register state on exit: +;; r4: thread pointer +;; All other registers trashed +;; + + EXTERN RhpWaitForGCNoAbort + + MACRO + WaitForGCCompletion + + ldr r2, [r4, #OFFSETOF__Thread__m_ThreadStateFlags] + tst r2, #TSF_SuppressGcStress__OR__TSF_DoNotTriggerGC + bne %ft0 + + ldr r2, [r4, #OFFSETOF__Thread__m_pHackPInvokeTunnel] + bl RhpWaitForGCNoAbort +0 + MEND + + + MACRO + HijackTargetFakeProlog + + ;; This is a fake entrypoint for the method that 'tricks' the OS into calling our personality routine. + ;; The code here should never be executed, and the unwind info is bogus, but we don't mind since the + ;; stack is broken by the hijack anyway until after we fix it below. + PROLOG_PUSH {lr} + nop ; We also need a nop here to simulate the implied bl instruction. Without + ; this, an OS-applied -2 will back up into the method prolog and the unwind + ; will not be applied as desired. + + MEND + + +;; +;; +;; +;; GC Probe Hijack targets +;; +;; + EXTERN RhpPInvokeExceptionGuard + + + NESTED_ENTRY RhpGcProbeHijackScalarWrapper, .text, RhpPInvokeExceptionGuard + + HijackTargetFakeProlog + + LABELED_RETURN_ADDRESS RhpGcProbeHijackScalar + + FixupHijackedCallstack + mov r12, #DEFAULT_FRAME_SAVE_FLAGS + b RhpGcProbe + NESTED_END RhpGcProbeHijackScalarWrapper + + NESTED_ENTRY RhpGcProbeHijackObjectWrapper, .text, RhpPInvokeExceptionGuard + + HijackTargetFakeProlog + + LABELED_RETURN_ADDRESS RhpGcProbeHijackObject + + FixupHijackedCallstack + mov r12, #(DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_R0 + PTFF_R0_IS_GCREF) + b RhpGcProbe + NESTED_END RhpGcProbeHijackObjectWrapper + + NESTED_ENTRY RhpGcProbeHijackByrefWrapper, .text, RhpPInvokeExceptionGuard + + HijackTargetFakeProlog + + LABELED_RETURN_ADDRESS RhpGcProbeHijackByref + + FixupHijackedCallstack + mov r12, #(DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_R0 + PTFF_R0_IS_BYREF) + b RhpGcProbe + NESTED_END RhpGcProbeHijackByrefWrapper + +#ifdef FEATURE_GC_STRESS +;; +;; +;; GC Stress Hijack targets +;; +;; + LEAF_ENTRY RhpGcStressHijackScalar + FixupHijackedCallstack + mov r12, #DEFAULT_FRAME_SAVE_FLAGS + b RhpGcStressProbe + LEAF_END RhpGcStressHijackScalar + + LEAF_ENTRY RhpGcStressHijackObject + FixupHijackedCallstack + mov r12, #(DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_R0 + PTFF_R0_IS_GCREF) + b RhpGcStressProbe + LEAF_END RhpGcStressHijackObject + + LEAF_ENTRY RhpGcStressHijackByref + FixupHijackedCallstack + mov r12, #(DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_R0 + PTFF_R0_IS_BYREF) + b RhpGcStressProbe + LEAF_END RhpGcStressHijackByref + + +;; +;; Worker for our GC stress probes. Do not call directly!! +;; Instead, go through RhpGcStressHijack{Scalar|Object|Byref}. +;; This worker performs the GC Stress work and returns to the original return address. +;; +;; Register state on entry: +;; r0: hijacked function return value +;; r1: hijacked function return value +;; r2: thread pointer +;; r12: register bitmask +;; +;; Register state on exit: +;; Scratch registers, except for r0, have been trashed +;; All other registers restored as they were when the hijack was first reached. +;; + NESTED_ENTRY RhpGcStressProbe + PROLOG_PROBE_FRAME r2, r3, r12 + + bl $REDHAWKGCINTERFACE__STRESSGC + + EPILOG_PROBE_FRAME + NESTED_END RhpGcStressProbe +#endif ;; FEATURE_GC_STRESS + + EXTERN RhpThrowHwEx + + LEAF_ENTRY RhpGcProbe + ldr r3, =RhpTrapThreads + ldr r3, [r3] + tst r3, #TrapThreadsFlags_TrapThreads + bne %0 + bx lr +0 + b RhpGcProbeRare + LEAF_END RhpGcProbe + + NESTED_ENTRY RhpGcProbeRare + PROLOG_PROBE_FRAME r2, r3, r12 + + mov r4, r2 + WaitForGCCompletion + + ldr r2, [sp, #OFFSETOF__PInvokeTransitionFrame__m_Flags] + tst r2, #PTFF_THREAD_ABORT + bne %1 + + EPILOG_PROBE_FRAME + +1 + FREE_PROBE_FRAME + EPILOG_NOP mov r0, #STATUS_REDHAWK_THREAD_ABORT + EPILOG_NOP mov r1, lr ;; return address as exception PC + EPILOG_BRANCH RhpThrowHwEx + + NESTED_END RhpGcProbe + + LEAF_ENTRY RhpGcPoll + ; @todo: I'm assuming it's not OK to trash any register here. If that's not true we can optimize the + ; push/pops out of this fast path. + push {r0} + ldr r0, =RhpTrapThreads + ldr r0, [r0] + tst r0, #TrapThreadsFlags_TrapThreads + bne %0 + pop {r0} + bx lr +0 + pop {r0} + b RhpGcPollRare + LEAF_END RhpGcPoll + + NESTED_ENTRY RhpGcPollRare + PROLOG_PROBE_FRAME |, r3, #PROBE_SAVE_FLAGS_EVERYTHING + + ; Unhijack this thread, if necessary. + INLINE_THREAD_UNHIJACK r2, r0, r1 ;; trashes r0, r1 + + mov r4, r2 + WaitForGCCompletion + + EPILOG_PROBE_FRAME + NESTED_END RhpGcPollRare + + LEAF_ENTRY RhpGcPollStress + ; + ; loop hijacking is used instead + ; + __debugbreak + + LEAF_END RhpGcPollStress + + +#ifdef FEATURE_GC_STRESS + NESTED_ENTRY RhpHijackForGcStress + PROLOG_PUSH {r0,r1} ; Save return value + PROLOG_VPUSH {d0-d3} ; Save VFP return value + + ;; + ;; Setup a PAL_LIMITED_CONTEXT that looks like what you'd get if you had suspended this thread at the + ;; IP after the call to this helper. + ;; + ;; This is very likely overkill since the calculation of the return address should only need SP and + ;; LR, but this is test code, so I'm not too worried about efficiency. + ;; + ;; Setup a PAL_LIMITED_CONTEXT on the stack { + ;; we'll need to reserve the size of the D registers in the context + ;; compute in the funny way below to include any padding between LR and D +DREG_SZ equ (SIZEOF__PAL_LIMITED_CONTEXT - (OFFSETOF__PAL_LIMITED_CONTEXT__LR + 4)) + + PROLOG_STACK_ALLOC DREG_SZ ;; Reserve space for d8-d15 + PROLOG_PUSH {r0,lr} ;; Reserve space for SP and store LR + PROLOG_PUSH {r0,r4-r11,lr} + ;; } end PAL_LIMITED_CONTEXT + + ;; Compute and save SP at callsite. + add r0, sp, #(SIZEOF__PAL_LIMITED_CONTEXT + 0x20 + 8) ;; +0x20 for vpush {d0-d3}, +8 for push {r0,r1} + str r0, [sp, #OFFSETOF__PAL_LIMITED_CONTEXT__SP] + + mov r0, sp ; Address of PAL_LIMITED_CONTEXT + bl $THREAD__HIJACKFORGCSTRESS + + ;; epilog + EPILOG_POP {r0,r4-r11,lr} + EPILOG_STACK_FREE DREG_SZ + 8 ; Discard saved SP and LR and space for d8-d15 + EPILOG_VPOP {d0-d3} ; Restore VFP return value + EPILOG_POP {r0,r1} ; Restore return value + bx lr + NESTED_END RhpHijackForGcStress +#endif ;; FEATURE_GC_STRESS + + +;; +;; The following functions are _jumped_ to when we need to transfer control from one method to another for EH +;; dispatch. These are needed to properly coordinate with the GC hijacking logic. We are essentially replacing +;; the return from the throwing method with a jump to the handler in the caller, but we need to be aware of +;; any return address hijack that may be in place for GC suspension. These routines use a quick test of the +;; return address against a specific GC hijack routine, and then fixup the stack pointer to what it would be +;; after a real return from the throwing method. Then, if we are not hijacked we can simply jump to the +;; handler in the caller. +;; +;; If we are hijacked, then we jump to a routine that will unhijack appropriatley and wait for the GC to +;; complete. There are also variants for GC stress. +;; +;; Note that at this point we are eiher hijacked or we are not, and this will not change until we return to +;; managed code. It is an invariant of the system that a thread will only attempt to hijack or unhijack +;; another thread while the target thread is suspended in managed code, and this is _not_ managed code. +;; +;; Register state on entry: +;; r0: pointer to this function (i.e., trash) +;; r1: reference to the exception object. +;; r2: handler address we want to jump to. +;; Non-volatile registers are all already correct for return to the caller. +;; LR still contains the return address. +;; +;; Register state on exit: +;; All registers except r0 and lr unchanged +;; + MACRO + RTU_EH_JUMP_HELPER $funcName, $hijackFuncName, $isStress, $stressFuncName + + LEAF_ENTRY $funcName + ; Currently the EH epilog won't pop the return address back into LR, + ; so we have to have a funny load from [sp-4] here to retrieve it. + + ldr r0, =$hijackFuncName + cmp r0, lr + beq RhpGCProbeForEHJump + + IF $isStress + ldr r0, =$stressFuncName + cmp r0, lr + beq RhpGCStressProbeForEHJump + ENDIF + + ;; We are not hijacked, so we can return to the handler. + ;; We return to keep the call/return prediction balanced. + mov lr, r2 ; Update the return address + bx lr + LEAF_END $funcName + MEND + +;; We need an instance of the helper for each possible hijack function. The binder has enough +;; information to determine which one we need to use for any function. + RTU_EH_JUMP_HELPER RhpEHJumpScalar, RhpGcProbeHijackScalar, {false}, 0 + RTU_EH_JUMP_HELPER RhpEHJumpObject, RhpGcProbeHijackObject, {false}, 0 + RTU_EH_JUMP_HELPER RhpEHJumpByref, RhpGcProbeHijackByref, {false}, 0 +#ifdef FEATURE_GC_STRESS + RTU_EH_JUMP_HELPER RhpEHJumpScalarGCStress, RhpGcProbeHijackScalar, {true}, RhpGcStressHijackScalar + RTU_EH_JUMP_HELPER RhpEHJumpObjectGCStress, RhpGcProbeHijackObject, {true}, RhpGcStressHijackObject + RTU_EH_JUMP_HELPER RhpEHJumpByrefGCStress, RhpGcProbeHijackByref, {true}, RhpGcStressHijackByref +#endif + +;; +;; Macro to setup our frame and adjust the location of the EH object reference for EH jump probe funcs. +;; +;; Register state on entry: +;; r0: scratch +;; r1: reference to the exception object. +;; r2: handler address we want to jump to. +;; Non-volatile registers are all already correct for return to the caller. +;; The stack is as if we are just about to returned from the call +;; +;; Register state on exit: +;; r0: reference to the exception object +;; r2: thread pointer +;; + MACRO + EHJumpProbeProlog + + PROLOG_PUSH {r1,r2} ; save the handler address so we can jump to it later (save r1 just for alignment) + PROLOG_NOP mov r0, r1 ; move the ex object reference into r0 so we can report it + ALLOC_PROBE_FRAME + + ;; r2 <- GetThread(), TRASHES r1 + INLINE_GETTHREAD r2, r1 + + ;; Recover the original return address and update the frame + ldr lr, [r2, #OFFSETOF__Thread__m_pvHijackedReturnAddress] + str lr, [sp, #OFFSETOF__PInvokeTransitionFrame__m_RIP] + + ;; ClearHijackState expects thread in r2 (trashes r12). + ClearHijackState + + ; TRASHES r1 + INIT_PROBE_FRAME r2, r1, #PROBE_SAVE_FLAGS_R0_IS_GCREF, (PROBE_FRAME_SIZE + 8) + str sp, [r2, #OFFSETOF__Thread__m_pHackPInvokeTunnel] + MEND + +;; +;; Macro to re-adjust the location of the EH object reference, cleanup the frame, and make the +;; final jump to the handler for EH jump probe funcs. +;; +;; Register state on entry: +;; r0: reference to the exception object +;; r1-r3: scratch +;; +;; Register state on exit: +;; sp: correct for return to the caller +;; r1: reference to the exception object +;; + MACRO + EHJumpProbeEpilog + + FREE_PROBE_FRAME ; This restores exception object back into r0 + EPILOG_NOP mov r1, r0 ; Move the Exception object back into r1 where the catch handler expects it + EPILOG_POP {r0,pc} ; Recover the handler address and jump to it + MEND + +;; +;; We are hijacked for a normal GC (not GC stress), so we need to unhijack and wait for the GC to complete. +;; +;; Register state on entry: +;; r0: reference to the exception object. +;; r2: thread +;; Non-volatile registers are all already correct for return to the caller. +;; The stack is as if we have tail called to this function (lr points to return address). +;; +;; Register state on exit: +;; r7: previous frame pointer +;; r0: reference to the exception object +;; + NESTED_ENTRY RhpGCProbeForEHJump + EHJumpProbeProlog + +#ifdef _DEBUG + ;; + ;; If we get here, then we have been hijacked for a real GC, and our SyncState must + ;; reflect that we've been requested to synchronize. + + ldr r1, =RhpTrapThreads + ldr r1, [r1] + tst r1, #TrapThreadsFlags_TrapThreads + bne %0 + + bl RhDebugBreak +0 +#endif ;; _DEBUG + + mov r4, r2 + WaitForGCCompletion + + EHJumpProbeEpilog + NESTED_END RhpGCProbeForEHJump + +#ifdef FEATURE_GC_STRESS +;; +;; We are hijacked for GC Stress (not a normal GC) so we need to invoke the GC stress helper. +;; +;; Register state on entry: +;; r1: reference to the exception object. +;; r2: thread +;; Non-volatile registers are all already correct for return to the caller. +;; The stack is as if we have tail called to this function (lr points to return address). +;; +;; Register state on exit: +;; r7: previous frame pointer +;; r0: reference to the exception object +;; + NESTED_ENTRY RhpGCStressProbeForEHJump + EHJumpProbeProlog + + bl $REDHAWKGCINTERFACE__STRESSGC + + EHJumpProbeEpilog + NESTED_END RhpGCStressProbeForEHJump + +;; +;; INVARIANT: Don't trash the argument registers, the binder codegen depends on this. +;; + LEAF_ENTRY RhpSuppressGcStress + + push {r0-r2} + INLINE_GETTHREAD r0, r1 + +Retry + ldrex r1, [r0, #OFFSETOF__Thread__m_ThreadStateFlags] + orr r1, #TSF_SuppressGcStress + strex r2, r1, [r0, #OFFSETOF__Thread__m_ThreadStateFlags] + cbz r2, Success + b Retry + +Success + pop {r0-r2} + bx lr + + LEAF_END RhpSuppressGcStress +#endif ;; FEATURE_GC_STRESS + + INLINE_GETTHREAD_CONSTANT_POOL + + end diff --git a/src/coreclr/src/nativeaot/Runtime/arm/GetThread.asm b/src/coreclr/src/nativeaot/Runtime/arm/GetThread.asm new file mode 100644 index 0000000000000..b78319f8f15c2 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/arm/GetThread.asm @@ -0,0 +1,32 @@ +;; Licensed to the .NET Foundation under one or more agreements. +;; The .NET Foundation licenses this file to you under the MIT license. + +#include "AsmMacros.h" + + TEXTAREA + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; RhpGetThread +;; +;; +;; INPUT: none +;; +;; OUTPUT: r0: Thread pointer +;; +;; MUST PRESERVE ARGUMENT REGISTERS +;; @todo check the actual requirements here, r0 is both return and argument register +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + LEAF_ENTRY RhpGetThread + + ;; r0 = GetThread(), TRASHES r12 + INLINE_GETTHREAD r0, r12 + bx lr + + LEAF_END +FASTCALL_ENDFUNC + + INLINE_GETTHREAD_CONSTANT_POOL + + end diff --git a/src/coreclr/src/nativeaot/Runtime/arm/Interlocked.S b/src/coreclr/src/nativeaot/Runtime/arm/Interlocked.S new file mode 100644 index 0000000000000..c3aada2e9d6c5 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/arm/Interlocked.S @@ -0,0 +1,57 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +.syntax unified +.thumb + +#include // generated by the build from AsmOffsets.cpp +#include + +// WARNING: Code in EHHelpers.cpp makes assumptions about this helper, in particular: +// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at RhpLockCmpXchg32AVLocation +// - Function "UnwindWriteBarrierToCaller" assumes the stack contains just the pushed return address +// r0 = destination address +// r1 = value +// r2 = comparand +LEAF_ENTRY RhpLockCmpXchg32, _TEXT + dmb +ALTERNATE_ENTRY RhpLockCmpXchg32AVLocation +LOCAL_LABEL(CmpXchg32Retry): + ldrex r3, [r0] + cmp r2, r3 + bne LOCAL_LABEL(CmpXchg32Exit) + strex r12, r1, [r0] + cmp r12, #0 + bne LOCAL_LABEL(CmpXchg32Retry) +LOCAL_LABEL(CmpXchg32Exit): + mov r0, r3 + dmb + bx lr +LEAF_END RhpLockCmpXchg32, _TEXT + +// WARNING: Code in EHHelpers.cpp makes assumptions about this helper, in particular: +// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at RhpLockCmpXchg64AVLocation +// - Function "UnwindWriteBarrierToCaller" assumes the stack contains just the pushed return address +// r0 = destination address +// {r2,r3} = value +// sp[0+8] = comparand +LEAF_ENTRY RhpLockCmpXchg64, _TEXT +ALTERNATE_ENTRY RhpLockCmpXchg64AVLocation + ldr r12, [r0] // dummy read for null check + PROLOG_PUSH "{r4-r6,lr}" + dmb + ldrd r4, r5, [sp,#0x10] +LOCAL_LABEL(CmpXchg64Retry): + ldrexd r6, r1, [r0] + cmp r6, r4 + bne LOCAL_LABEL(CmpXchg64Exit) + cmp r1, r5 + bne LOCAL_LABEL(CmpXchg64Exit) + strexd r12, r2, r3, [r0] + cmp r12, #0 + bne LOCAL_LABEL(CmpXchg64Retry) +LOCAL_LABEL(CmpXchg64Exit): + mov r0, r6 + dmb + EPILOG_POP "{r4-r6,pc}" +LEAF_END RhpLockCmpXchg64, _TEXT diff --git a/src/coreclr/src/nativeaot/Runtime/arm/InteropThunksHelpers.S b/src/coreclr/src/nativeaot/Runtime/arm/InteropThunksHelpers.S new file mode 100644 index 0000000000000..d8012f088a6ec --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/arm/InteropThunksHelpers.S @@ -0,0 +1,60 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +.syntax unified +.thumb + +#include // generated by the build from AsmOffsets.cpp +#include + +#define POINTER_SIZE 4 + +// +// RhCommonStub +// +NESTED_ENTRY RhCommonStub, _TEXT, NoHandler + // Custom calling convention: + // red zone has pointer to the current thunk's data block (data contains 2 pointer values: context + target pointers) + // Copy red zone value into r12 so that the PROLOG_PUSH doesn't destroy it + ldr r12, [sp, #-4] + PROLOG_PUSH "{r0-r4, lr}" + PROLOG_VPUSH {d0-d7} // Capture the floating point argument registers + + mov r4, r12 + + INLINE_GET_TLS_VAR tls_thunkData + + // r0 = base address of TLS data + // r4 = address of context cell in thunk's data + + ldr r12, [r4] + str r12, [r0] + + // Now load the target address and jump to it. + ldr r12, [r4, #POINTER_SIZE] + EPILOG_VPOP {d0-d7} + EPILOG_POP "{r0-r4, lr}" + bx r12 + +NESTED_END RhCommonStub, _TEXT + +// +// IntPtr RhGetCommonStubAddress() +// +LEAF_ENTRY RhGetCommonStubAddress, _TEXT + ldr r0, =C_FUNC(RhCommonStub) + bx lr +LEAF_END RhGetCommonStubAddress, _TEXT + +// +// IntPtr RhGetCurrentThunkContext() +// +LEAF_ENTRY RhGetCurrentThunkContext, _TEXT + + PROLOG_PUSH "{r12, lr}" + + INLINE_GET_TLS_VAR tls_thunkData + + ldr r0, [r0] + EPILOG_POP "{r12, pc}" +LEAF_END RhGetCurrentThunkContext, _TEXT diff --git a/src/coreclr/src/nativeaot/Runtime/arm/InteropThunksHelpers.asm b/src/coreclr/src/nativeaot/Runtime/arm/InteropThunksHelpers.asm new file mode 100644 index 0000000000000..3d652ecf924fd --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/arm/InteropThunksHelpers.asm @@ -0,0 +1,83 @@ +;; Licensed to the .NET Foundation under one or more agreements. +;; The .NET Foundation licenses this file to you under the MIT license. + + +#include "kxarm.h" + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; DATA SECTIONS ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +__tls_array equ 0x2C ;; offsetof(TEB, ThreadLocalStoragePointer) + +POINTER_SIZE equ 0x04 + +;; TLS variables + AREA |.tls$|, DATA +ThunkParamSlot % 0x4 + + TEXTAREA + + EXTERN _tls_index + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; Interop Thunks Helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ;; + ;; RhCommonStub + ;; + NESTED_ENTRY RhCommonStub + ;; Custom calling convention: + ;; red zone has pointer to the current thunk's data block (data contains 2 pointer values: context + target pointers) + ;; Copy red zone value into r12 so that the PROLOG_PUSH doesn't destroy it + PROLOG_NOP ldr r12, [sp, #-4] + PROLOG_PUSH {r0-r3} + + ;; Save context data into the ThunkParamSlot thread-local variable + ;; A pointer to the delegate and function pointer for open static delegate should have been saved in the thunk's context cell during thunk allocation + ldr r3, =_tls_index + ldr r2, [r3] + mrc p15, #0, r3, c13, c0, #2 + ldr r3, [r3, #__tls_array] + ldr r2, [r3, r2, lsl #2] ;; r2 <- our TLS base + + ;; r2 = base address of TLS data + ;; r12 = address of context cell in thunk's data + + ;; store thunk address in thread static + ldr r1, [r12] + ldr r3, =ThunkParamSlot + str r1, [r2, r3] ;; ThunkParamSlot <- context slot data + + ;; Now load the target address and jump to it. + ldr r12, [r12, #POINTER_SIZE] + EPILOG_POP {r0-r3} + bx r12 + NESTED_END RhCommonStub + + + ;; + ;; IntPtr RhGetCommonStubAddress() + ;; + LEAF_ENTRY RhGetCommonStubAddress + ldr r0, =RhCommonStub + bx lr + LEAF_END RhGetCommonStubAddress + + + ;; + ;; IntPtr RhGetCurrentThunkContext() + ;; + LEAF_ENTRY RhGetCurrentThunkContext + + ldr r3, =_tls_index + ldr r2, [r3] + mrc p15, #0, r3, c13, c0, #2 + ldr r3, [r3, #__tls_array] + ldr r2, [r3, r2, lsl #2] ;; r2 <- our TLS base + + ldr r3, =ThunkParamSlot + ldr r0, [r2, r3] ;; r0 <- ThunkParamSlot + + bx lr + LEAF_END RhGetCurrentThunkContext + + END diff --git a/src/coreclr/src/nativeaot/Runtime/arm/MiscStubs.S b/src/coreclr/src/nativeaot/Runtime/arm/MiscStubs.S new file mode 100644 index 0000000000000..53616c2269615 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/arm/MiscStubs.S @@ -0,0 +1,2 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. diff --git a/src/coreclr/src/nativeaot/Runtime/arm/MiscStubs.asm b/src/coreclr/src/nativeaot/Runtime/arm/MiscStubs.asm new file mode 100644 index 0000000000000..462f31f00294d --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/arm/MiscStubs.asm @@ -0,0 +1,243 @@ +;; Licensed to the .NET Foundation under one or more agreements. +;; The .NET Foundation licenses this file to you under the MIT license. + +#include "AsmMacros.h" + + EXTERN GetClasslibCCtorCheck + EXTERN memcpy + EXTERN memcpyGCRefs + EXTERN memcpyGCRefsWithWriteBarrier + EXTERN memcpyAnyWithWriteBarrier + + TEXTAREA + +;; +;; Checks whether the static class constructor for the type indicated by the context structure has been +;; executed yet. If not the classlib is called via their CheckStaticClassConstruction callback which will +;; execute the cctor and update the context to record this fact. +;; +;; Input: +;; r0 : Address of StaticClassConstructionContext structure +;; +;; Output: +;; All volatile registers and the condition codes may be trashed. +;; + LEAF_ENTRY RhpCheckCctor + + ;; Check the m_initialized field of the context. The cctor has been run only if this equals 1 (the + ;; initial state is 0 and the remaining values are reserved for classlib use). This check is + ;; unsynchronized; if we go down the slow path and call the classlib then it is responsible for + ;; synchronizing with other threads and re-checking the value. + ldr r12, [r0, #OFFSETOF__StaticClassConstructionContext__m_initialized] + cmp r12, #1 + bne RhpCheckCctor__SlowPath + bx lr +RhpCheckCctor__SlowPath + mov r1, r0 + b RhpCheckCctor2 ; tail-call the check cctor helper that actually has an implementation to call + ; the cctor + LEAF_END RhpCheckCctor + +;; +;; Checks whether the static class constructor for the type indicated by the context structure has been +;; executed yet. If not the classlib is called via their CheckStaticClassConstruction callback which will +;; execute the cctor and update the context to record this fact. +;; +;; Input: +;; r0 : Value that must be preserved in this register across the cctor check. +;; r1 : Address of StaticClassConstructionContext structure +;; +;; Output: +;; All volatile registers other than r0 may be trashed and the condition codes may also be trashed. +;; + LEAF_ENTRY RhpCheckCctor2 + + ;; Check the m_initialized field of the context. The cctor has been run only if this equals 1 (the + ;; initial state is 0 and the remaining values are reserved for classlib use). This check is + ;; unsynchronized; if we go down the slow path and call the classlib then it is responsible for + ;; synchronizing with other threads and re-checking the value. + ldr r12, [r1, #OFFSETOF__StaticClassConstructionContext__m_initialized] + cmp r12, #1 + bne RhpCheckCctor2__SlowPath + bx lr + + LEAF_END RhpCheckCctor2 + +;; +;; Slow path helper for RhpCheckCctor. +;; +;; Input: +;; r0 : Value that must be preserved in this register across the cctor check. +;; r1 : Address of StaticClassConstructionContext structure +;; +;; Output: +;; All volatile registers other than r0 may be trashed and the condition codes may also be trashed. +;; + NESTED_ENTRY RhpCheckCctor2__SlowPath + + ;; Need to preserve r0, r1 and lr across helper call. r2 is also pushed to keep the stack 8 byte aligned. + PROLOG_PUSH {r0-r2,lr} + + ;; Call a C++ helper to retrieve the address of the classlib callback. The caller's return address is + ;; passed as the argument to the helper; it's an address in the module and is used by the helper to + ;; locate the classlib. + mov r0, lr + bl GetClasslibCCtorCheck + + ;; R0 now contains the address of the classlib method to call. The single argument is the context + ;; structure address currently in stashed on the stack. Clean up and tail call to the classlib + ;; callback so we're not on the stack should a GC occur (so we don't need to worry about transition + ;; frames). + mov r12, r0 + EPILOG_POP {r0-r2,lr} + ;; tail-call the class lib cctor check function. This function is required to return its first + ;; argument, so that r0 can be preserved. + EPILOG_BRANCH_REG r12 + + NESTED_END RhpCheckCctor__SlowPath2 + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; void* RhpCopyMultibyteNoGCRefs(void*, void*, size_t) +;; +;; The purpose of this wrapper is to hoist the potential null reference exceptions of copying memory up to a place where +;; the stack unwinder and exception dispatch can properly transform the exception into a managed exception and dispatch +;; it to managed code. +;; + + LEAF_ENTRY RhpCopyMultibyteNoGCRefs + + ; r0 dest + ; r1 src + ; r2 count + + cmp r2, #0 ; check for a zero-length copy + beq NothingToCopy_NoGCRefs + + ; Now check the dest and src pointers. If they AV, the EH subsystem will recognize the address of the AV, + ; unwind the frame, and fixup the stack to make it look like the (managed) caller AV'ed, which will be + ; translated to a managed exception as usual. + ALTERNATE_ENTRY RhpCopyMultibyteNoGCRefsDestAVLocation + ldrb r3, [r0] + ALTERNATE_ENTRY RhpCopyMultibyteNoGCRefsSrcAVLocation + ldrb r3, [r1] + + ; tail-call to plain-old-memcpy + b memcpy + +NothingToCopy_NoGCRefs + ; dest is already still in r0 + bx lr + + LEAF_END + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; void* RhpCopyMultibyte(void*, void*, size_t) +;; +;; The purpose of this wrapper is to hoist the potential null reference exceptions of copying memory up to a place where +;; the stack unwinder and exception dispatch can properly transform the exception into a managed exception and dispatch +;; it to managed code. +;; + + LEAF_ENTRY RhpCopyMultibyte + + ; r0 dest + ; r1 src + ; r2 count + + cmp r2, #0 ; check for a zero-length copy + beq NothingToCopy_RhpCopyMultibyte + + ; Now check the dest and src pointers. If they AV, the EH subsystem will recognize the address of the AV, + ; unwind the frame, and fixup the stack to make it look like the (managed) caller AV'ed, which will be + ; translated to a managed exception as usual. + ALTERNATE_ENTRY RhpCopyMultibyteDestAVLocation + ldrb r3, [r0] + ALTERNATE_ENTRY RhpCopyMultibyteSrcAVLocation + ldrb r3, [r1] + + ; tail-call to the GC-safe memcpy implementation + b memcpyGCRefs + +NothingToCopy_RhpCopyMultibyte + ; dest is already still in r0 + bx lr + + LEAF_END + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; void* RhpCopyMultibyteWithWriteBarrier(void*, void*, size_t) +;; +;; The purpose of this wrapper is to hoist the potential null reference exceptions of copying memory up to a place where +;; the stack unwinder and exception dispatch can properly transform the exception into a managed exception and dispatch +;; it to managed code. +;; Runs a card table update via RhpBulkWriteBarrier after the copy +;; + + LEAF_ENTRY RhpCopyMultibyteWithWriteBarrier + + ; r0 dest + ; r1 src + ; r2 count + + cmp r2, #0 ; check for a zero-length copy + beq NothingToCopy_RhpCopyMultibyteWithWriteBarrier + + ; Now check the dest and src pointers. If they AV, the EH subsystem will recognize the address of the AV, + ; unwind the frame, and fixup the stack to make it look like the (managed) caller AV'ed, which will be + ; translated to a managed exception as usual. + ALTERNATE_ENTRY RhpCopyMultibyteWithWriteBarrierDestAVLocation + ldrb r3, [r0] + ALTERNATE_ENTRY RhpCopyMultibyteWithWriteBarrierSrcAVLocation + ldrb r3, [r1] + + ; tail-call to the GC-safe memcpy implementation + b memcpyGCRefsWithWriteBarrier + +NothingToCopy_RhpCopyMultibyteWithWriteBarrier + ; dest is already still in r0 + bx lr + + LEAF_END + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; void* RhpCopyAnyWithWriteBarrier(void*, void*, size_t) +;; +;; The purpose of this wrapper is to hoist the potential null reference exceptions of copying memory up to a place where +;; the stack unwinder and exception dispatch can properly transform the exception into a managed exception and dispatch +;; it to managed code. +;; Runs a card table update via RhpBulkWriteBarrier after the copy if it contained GC pointers +;; + + LEAF_ENTRY RhpCopyAnyWithWriteBarrier + + ; r0 dest + ; r1 src + ; r2 count + + cmp r2, #0 ; check for a zero-length copy + beq NothingToCopy_RhpCopyAnyWithWriteBarrier + + ; Now check the dest and src pointers. If they AV, the EH subsystem will recognize the address of the AV, + ; unwind the frame, and fixup the stack to make it look like the (managed) caller AV'ed, which will be + ; translated to a managed exception as usual. + ALTERNATE_ENTRY RhpCopyAnyWithWriteBarrierDestAVLocation + ldrb r3, [r0] + ALTERNATE_ENTRY RhpCopyAnyWithWriteBarrierSrcAVLocation + ldrb r3, [r1] + + ; tail-call to the GC-safe memcpy implementation + b memcpyAnyWithWriteBarrier + +NothingToCopy_RhpCopyAnyWithWriteBarrier + ; dest is already still in r0 + bx lr + + LEAF_END + + end diff --git a/src/coreclr/src/nativeaot/Runtime/arm/PInvoke.S b/src/coreclr/src/nativeaot/Runtime/arm/PInvoke.S new file mode 100644 index 0000000000000..6be1544876c7b --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/arm/PInvoke.S @@ -0,0 +1,67 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include +#include + +.syntax unified +.thumb + +// +// RhpPInvoke +// +// IN: R0: address of pinvoke frame +// +// This helper assumes that its callsite is as good to start the stackwalk as the actual PInvoke callsite. +// The codegenerator must treat the callsite of this helper as GC triggering and generate the GC info for it. +// Also, the codegenerator must ensure that there are no live GC references in callee saved registers. +// + +NESTED_ENTRY RhpPInvoke, _TEXT, NoHandler + str lr, [r0, #OFFSETOF__PInvokeTransitionFrame__m_RIP] + str r11, [r0, #OFFSETOF__PInvokeTransitionFrame__m_FramePointer] + str sp, [r0, #OFFSETOF__PInvokeTransitionFrame__m_PreservedRegs] + mov r3, #PTFF_SAVE_SP + str r3, [r0, #OFFSETOF__PInvokeTransitionFrame__m_Flags] + + PROLOG_PUSH "{r5,lr}" + + mov r5, r0 + // get TLS global variable address + // r0 = GetThread() + INLINE_GETTHREAD + str r0, [r5, #OFFSETOF__PInvokeTransitionFrame__m_pThread] + str r5, [r0, #OFFSETOF__Thread__m_pTransitionFrame] + + ldr r3, =C_FUNC(RhpTrapThreads) + ldr r3, [r3] + cbnz r3, LOCAL_LABEL(InvokeRareTrapThread) // TrapThreadsFlags_None = 0 + + EPILOG_POP "{r5,pc}" + +LOCAL_LABEL(InvokeRareTrapThread): + EPILOG_POP "{r5,lr}" + b C_FUNC(RhpWaitForSuspend2) +NESTED_END RhpPInvoke, _TEXT + + +// +// RhpPInvokeReturn +// +// IN: R0: address of pinvoke frame +// +LEAF_ENTRY RhpPInvokeReturn, _TEXT + ldr r3, [r0, #OFFSETOF__PInvokeTransitionFrame__m_pThread] + + mov r2, #0 + str r2, [r3, #OFFSETOF__Thread__m_pTransitionFrame] + + ldr r3, =C_FUNC(RhpTrapThreads) + ldr r3, [r3] + cbnz r3, LOCAL_LABEL(ReturnRareTrapThread) // TrapThreadsFlags_None = 0 + + bx lr +LOCAL_LABEL(ReturnRareTrapThread): + // passing transition frame pointer in r0 + b C_FUNC(RhpWaitForGC2) +LEAF_END RhpPInvokeReturn, _TEXT diff --git a/src/coreclr/src/nativeaot/Runtime/arm/PInvoke.asm b/src/coreclr/src/nativeaot/Runtime/arm/PInvoke.asm new file mode 100644 index 0000000000000..7ead71ac6ccc1 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/arm/PInvoke.asm @@ -0,0 +1,238 @@ +;; Licensed to the .NET Foundation under one or more agreements. +;; The .NET Foundation licenses this file to you under the MIT license. + +#include "AsmMacros.h" + + TEXTAREA + + IMPORT RhpReversePInvokeBadTransition + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; RhpWaitForSuspend -- rare path for RhpPInvoke and RhpReversePInvokeReturn +;; +;; +;; INPUT: none +;; +;; TRASHES: none +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + NESTED_ENTRY RhpWaitForSuspend + + PROLOG_PUSH {r0-r4,lr} ; Need to save argument registers r0-r3 and lr, r4 is just for alignment + PROLOG_VPUSH {d0-d7} ; Save float argument registers as well since they're volatile + + bl RhpWaitForSuspend2 + + EPILOG_VPOP {d0-d7} + EPILOG_POP {r0-r4,pc} + + NESTED_END RhpWaitForSuspend + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; RhpWaitForGCNoAbort +;; +;; +;; INPUT: r2: transition frame +;; +;; OUTPUT: +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + NESTED_ENTRY RhpWaitForGCNoAbort + + PROLOG_PUSH {r0-r6,lr} ; Even number of registers to maintain 8-byte stack alignment + PROLOG_VPUSH {d0-d3} ; Save float return value registers as well + + ldr r5, [r2, #OFFSETOF__PInvokeTransitionFrame__m_pThread] + + ldr r0, [r5, #OFFSETOF__Thread__m_ThreadStateFlags] + tst r0, #TSF_DoNotTriggerGc + bne Done + + mov r0, r2 ; passing transition frame in r0 + bl RhpWaitForGC2 + +Done + EPILOG_VPOP {d0-d3} + EPILOG_POP {r0-r6,pc} + + NESTED_END RhpWaitForGCNoAbort + + EXTERN RhpThrowHwEx + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; RhpWaitForGC +;; +;; +;; INPUT: r2: transition frame +;; +;; OUTPUT: +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + NESTED_ENTRY RhpWaitForGC + PROLOG_PUSH {r0,lr} + + ldr r0, =RhpTrapThreads + ldr r0, [r0] + tst r0, #TrapThreadsFlags_TrapThreads + beq NoWait + bl RhpWaitForGCNoAbort +NoWait + tst r0, #TrapThreadsFlags_AbortInProgress + beq NoAbort + ldr r0, [r2, #OFFSETOF__PInvokeTransitionFrame__m_Flags] + tst r0, #PTFF_THREAD_ABORT + beq NoAbort + EPILOG_POP {r0,r1} ; hijack target address as exception PC + EPILOG_NOP mov r0, #STATUS_REDHAWK_THREAD_ABORT + EPILOG_BRANCH RhpThrowHwEx +NoAbort + EPILOG_POP {r0,pc} + NESTED_END RhpWaitForGC + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; RhpReversePInvoke +;; +;; IN: r4: address of reverse pinvoke frame +;; 0: save slot for previous M->U transition frame +;; 4: save slot for thread pointer to avoid re-calc in epilog sequence +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + NESTED_ENTRY RhpReversePInvoke + + PROLOG_PUSH {r5-r7,lr} ; Even number of registers to maintain 8-byte stack alignment + + INLINE_GETTHREAD r5, r6 ; r5 = Thread, r6 trashed + str r5, [r4, #4] ; save Thread pointer for RhpReversePInvokeReturn + + ; r4 = prev save slot + ; r5 = thread + ; r6 = scratch + + ldr r6, [r5, #OFFSETOF__Thread__m_ThreadStateFlags] + tst r6, #TSF_Attached + beq AttachThread + +ThreadAttached + ;; + ;; Check for the correct mode. This is accessible via various odd things that we cannot completely + ;; prevent such as : + ;; 1) Registering a reverse pinvoke entrypoint as a vectored exception handler + ;; 2) Performing a managed delegate invoke on a reverse pinvoke delegate. + ;; + ldr r6, [r5, #OFFSETOF__Thread__m_pTransitionFrame] + cbz r6, CheckBadTransition + + ;; Save previous TransitionFrame prior to making the mode transition so that it is always valid + ;; whenever we might attempt to hijack this thread. + str r6, [r4] + + mov r6, #0 + str r6, [r5, #OFFSETOF__Thread__m_pTransitionFrame] + dmb + + ldr r6, =RhpTrapThreads + ldr r6, [r6] + tst r6, #TrapThreadsFlags_TrapThreads + bne TrapThread + +AllDone + EPILOG_POP {r5-r7,lr} + EPILOG_RETURN + + +CheckBadTransition + ;; Allow 'bad transitions' in when the TSF_DoNotTriggerGc mode is set. This allows us to have + ;; [UnmanagedCallersOnly] methods that are called via the "restricted GC callouts" as well as from native, + ;; which is necessary because the methods are CCW vtable methods on interfaces passed to native. + ldr r7, [r5, #OFFSETOF__Thread__m_ThreadStateFlags] + tst r7, #TSF_DoNotTriggerGc + beq BadTransition + + ;; zero-out our 'previous transition frame' save slot + mov r7, #0 + str r7, [r4] + + ;; nothing more to do + b AllDone + +TrapThread + ;; put the previous frame back (sets us back to preemptive mode) + ldr r6, [r4] + str r6, [r5, #OFFSETOF__Thread__m_pTransitionFrame] + dmb + +AttachThread + ; passing address of reverse pinvoke frame in r4 + EPILOG_POP {r5-r7,lr} + EPILOG_BRANCH RhpReversePInvokeAttachOrTrapThread + +BadTransition + EPILOG_POP {r5-r7,lr} + EPILOG_NOP mov r0, lr ; arg <- return address + EPILOG_BRANCH RhpReversePInvokeBadTransition + + NESTED_END RhpReversePInvoke + + INLINE_GETTHREAD_CONSTANT_POOL + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; RhpReversePInvokeAttachOrTrapThread -- rare path for RhpPInvoke +;; +;; +;; INPUT: r4: address of reverse pinvoke frame +;; +;; TRASHES: none +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + NESTED_ENTRY RhpReversePInvokeAttachOrTrapThread + + PROLOG_PUSH {r0-r4,lr} ; Need to save argument registers r0-r3 and lr, r4 is just for alignment + PROLOG_VPUSH {d0-d7} ; Save float argument registers as well since they're volatile + + mov r0, r4 ; passing reverse pinvoke frame pointer in r0 + bl RhpReversePInvokeAttachOrTrapThread2 + + EPILOG_VPOP {d0-d7} + EPILOG_POP {r0-r4,pc} + + NESTED_END RhpReversePInvokeTrapThread + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; RhpReversePInvokeReturn +;; +;; IN: r3: address of reverse pinvoke frame +;; 0: save slot for previous M->U transition frame +;; 4: save slot for thread pointer to avoid re-calc in epilog sequence +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + LEAF_ENTRY RhpReversePInvokeReturn + + ldr r2, [r3, #4] ; get Thread pointer + ldr r3, [r3, #0] ; get previous M->U transition frame + + str r3, [r2, #OFFSETOF__Thread__m_pTransitionFrame] + dmb + + ldr r3, =RhpTrapThreads + ldr r3, [r3] + tst r3, #TrapThreadsFlags_TrapThreads + bne RareTrapThread + + bx lr + +RareTrapThread + b RhpWaitForSuspend + + LEAF_END RhpReversePInvokeReturn + + + end diff --git a/src/coreclr/src/nativeaot/Runtime/arm/StubDispatch.S b/src/coreclr/src/nativeaot/Runtime/arm/StubDispatch.S new file mode 100644 index 0000000000000..6b7cf11a4a58a --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/arm/StubDispatch.S @@ -0,0 +1,145 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +.syntax unified +.thumb + +#include // generated by the build from AsmOffsets.cpp +#include + +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH + +// TODO: Implement Arm support +#ifdef _DEBUG +.rodata +AssertMsg: .asciz "__FILE__:%s: %s is not implemented\n" +FileName: .asciz "StubDispatch.S" +RhpVTableOffsetDispatchName: .asciz "RhpVTableOffsetDispatch" +.text +.macro GEN_ASSERT_FUNC func + GEN_ASSERT AssertMsg, FileName, \func +.endm +#endif + + +// Macro that generates a stub consuming a cache with the given number of entries. +.macro DEFINE_INTERFACE_DISPATCH_STUB entries + +LEAF_ENTRY RhpInterfaceDispatch\entries, _TEXT + // r12 currently contains the indirection cell address. But we need more scratch registers and + // we may A/V on a null this. Both of these suggest we need a real prolog and epilog. + PROLOG_PUSH {r1-r2} + + // r12 currently holds the indirection cell address. We need to get the cache structure instead. + ldr r2, [r12, #OFFSETOF__InterfaceDispatchCell__m_pCache] + + // Load the EEType from the object instance in r0. + ldr r1, [r0] + + CurrentOffset = OFFSETOF__InterfaceDispatchCache__m_rgEntries + // For each entry in the cache, see if its EEType type matches the EEType in r1. + // If so, call the second cache entry. If not, skip the InterfaceDispatchCacheEntry. + // R1 : Instance EEType* + // R2: Cache data structure + // R12 : Trashed. On succesful check, set to the target address to jump to. + .rept \entries + ldr r12, [r2, #CurrentOffset] + cmp r1, r12 + bne 0f + ldr r12, [r2, #(CurrentOffset + 4)] + b LOCAL_LABEL(99_\entries) + 0: + CurrentOffset = CurrentOffset + 8 + .endr + + // Point r12 to the indirection cell using the back pointer in the cache block + ldr r12, [r2, #OFFSETOF__InterfaceDispatchCache__m_pCell] + + EPILOG_POP {r1-r2} + b C_FUNC(RhpInterfaceDispatchSlow) + + // Common epilog for cache hits. Have to out of line it here due to limitation on the number of + // epilogs imposed by the unwind code macros. +LOCAL_LABEL(99_\entries): + // R2 contains address of the cache block. We store it in the red zone in case the target we jump + // to needs it. + // R12 contains the target address to jump to + EPILOG_POP {r1} + // The red zone is only 8 bytes long, so we have to store r2 into it between the pops. + str r2, [sp, #-4] + EPILOG_POP {r2} + EPILOG_BRANCH_REG r12 + +LEAF_END RhpInterfaceDispatch\entries, _TEXT + +.endm // DEFINE_INTERFACE_DISPATCH_STUB + +// Define all the stub routines we currently need. +// +// The mrt100dbi requires these be exported to identify mrt100 code that dispatches back into managed. +// If you change or add any new dispatch stubs, please also change slr.def and dbi\process.cpp CordbProcess::GetExportStepInfo +// +DEFINE_INTERFACE_DISPATCH_STUB 1 +DEFINE_INTERFACE_DISPATCH_STUB 2 +DEFINE_INTERFACE_DISPATCH_STUB 4 +DEFINE_INTERFACE_DISPATCH_STUB 8 +DEFINE_INTERFACE_DISPATCH_STUB 16 +DEFINE_INTERFACE_DISPATCH_STUB 32 +DEFINE_INTERFACE_DISPATCH_STUB 64 + +// Stub dispatch routine for dispatch to a vtable slot +LEAF_ENTRY RhpVTableOffsetDispatch, _TEXT + // On input we have the indirection cell data structure in r12. But we need more scratch registers and + // we may A/V on a null this. Both of these suggest we need a real prolog and epilog. + PROLOG_PUSH {r1} + + // r12 currently holds the indirection cell address. We need to update it to point to the vtable + // offset instead. + ldr r12, [r12, #OFFSETOF__InterfaceDispatchCell__m_pCache] + + // Load the EEType from the object instance in r0. + ldr r1, [r0] + + // add the vtable offset to the EEType pointer + add r12, r1, r12 + + // Load the target address of the vtable into r12 + ldr r12, [r12] + + EPILOG_POP {r1} + EPILOG_BRANCH_REG r12 +LEAF_END RhpVTableOffsetDispatch, _TEXT + +// Initial dispatch on an interface when we don't have a cache yet. +LEAF_ENTRY RhpInitialInterfaceDispatch, _TEXT + // The stub that jumped here pushed r12, which contains the interface dispatch cell + // we need to pop it here + pop { r12 } + + // Just tail call to the cache miss helper. + b C_FUNC(RhpInterfaceDispatchSlow) +LEAF_END RhpInitialInterfaceDispatch, _TEXT + +// No as alternate entry due to missed thumb bit in this case +// See https://github.com/dotnet/runtime/issues/8608 +LEAF_ENTRY RhpInitialDynamicInterfaceDispatch, _TEXT + // Just tail call to the cache miss helper. + b C_FUNC(RhpInterfaceDispatchSlow) +LEAF_END RhpInitialDynamicInterfaceDispatch, _TEXT + +// Cache miss case, call the runtime to resolve the target and update the cache. +// Use universal transition helper to allow an exception to flow out of resolution +LEAF_ENTRY RhpInterfaceDispatchSlow, _TEXT + // r12 has the interface dispatch cell address in it. + // The calling convention of the universal thunk is that the parameter + // for the universal thunk target is to be placed in sp-8 + // and the universal thunk target address is to be placed in sp-4 + str r12, [sp, #-8] + ldr r12, =C_FUNC(RhpCidResolve) + str r12, [sp, #-4] + + // jump to universal transition thunk + b C_FUNC(RhpUniversalTransition_DebugStepTailCall) +LEAF_END RhpInterfaceDispatchSlow, _TEXT + +#endif // FEATURE_CACHED_INTERFACE_DISPATCH diff --git a/src/coreclr/src/nativeaot/Runtime/arm/StubDispatch.asm b/src/coreclr/src/nativeaot/Runtime/arm/StubDispatch.asm new file mode 100644 index 0000000000000..600d2776e19e9 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/arm/StubDispatch.asm @@ -0,0 +1,141 @@ +;; Licensed to the .NET Foundation under one or more agreements. +;; The .NET Foundation licenses this file to you under the MIT license. + +#include "AsmMacros.h" + + TEXTAREA + + +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH + + EXTERN RhpCidResolve + EXTERN RhpUniversalTransition_DebugStepTailCall + + ;; Macro that generates code to check a single cache entry. + MACRO + CHECK_CACHE_ENTRY $entry + ;; Check a single entry in the cache. + ;; R1 : Instance EEType* + ;; R2: Cache data structure + ;; R12 : Trashed. On succesful check, set to the target address to jump to. + + ldr r12, [r2, #(OFFSETOF__InterfaceDispatchCache__m_rgEntries + ($entry * 8))] + cmp r1, r12 + bne %ft0 + ldr r12, [r2, #(OFFSETOF__InterfaceDispatchCache__m_rgEntries + ($entry * 8) + 4)] + b %fa99 +0 + MEND + + +;; Macro that generates a stub consuming a cache with the given number of entries. + GBLS StubName + + MACRO + DEFINE_INTERFACE_DISPATCH_STUB $entries + +StubName SETS "RhpInterfaceDispatch$entries" + + NESTED_ENTRY $StubName + ;; On input we have the indirection cell data structure in r12. But we need more scratch registers and + ;; we may A/V on a null this. Both of these suggest we need a real prolog and epilog. + PROLOG_PUSH {r1-r2} + + ;; r12 currently holds the indirection cell address. We need to get the cache structure instead. + ldr r2, [r12, #OFFSETOF__InterfaceDispatchCell__m_pCache] + + ;; Load the EEType from the object instance in r0. + ldr r1, [r0] + + GBLA CurrentEntry +CurrentEntry SETA 0 + WHILE CurrentEntry < $entries + CHECK_CACHE_ENTRY CurrentEntry +CurrentEntry SETA CurrentEntry + 1 + WEND + + ;; Point r12 to the indirection cell using the back pointer in the cache block + ldr r12, [r2, #OFFSETOF__InterfaceDispatchCache__m_pCell] + + EPILOG_POP {r1-r2} + EPILOG_BRANCH RhpInterfaceDispatchSlow + + ;; Common epilog for cache hits. Have to out of line it here due to limitation on the number of + ;; epilogs imposed by the unwind code macros. +99 + ;; R2 contains address of the cache block. We store it in the red zone in case the target we jump + ;; to needs it. + ;; R12 contains the target address to jump to + EPILOG_POP r1 + ;; The red zone is only 8 bytes long, so we have to store r2 into it between the pops. + EPILOG_NOP str r2, [sp, #-4] + EPILOG_POP r2 + EPILOG_BRANCH_REG r12 + + NESTED_END $StubName + + MEND + +;; Define all the stub routines we currently need. + DEFINE_INTERFACE_DISPATCH_STUB 1 + DEFINE_INTERFACE_DISPATCH_STUB 2 + DEFINE_INTERFACE_DISPATCH_STUB 4 + DEFINE_INTERFACE_DISPATCH_STUB 8 + DEFINE_INTERFACE_DISPATCH_STUB 16 + DEFINE_INTERFACE_DISPATCH_STUB 32 + DEFINE_INTERFACE_DISPATCH_STUB 64 + + +;; Initial dispatch on an interface when we don't have a cache yet. + LEAF_ENTRY RhpInitialInterfaceDispatch + + ;; The stub that jumped here pushed r12, which contains the interface dispatch cell + ;; we need to pop it here + pop { r12 } + + ;; Simply tail call the slow dispatch helper. + b RhpInterfaceDispatchSlow + + LEAF_END RhpInitialInterfaceDispatch + + LEAF_ENTRY RhpVTableOffsetDispatch + ;; On input we have the indirection cell data structure in r12. But we need more scratch registers and + ;; we may A/V on a null this. Both of these suggest we need a real prolog and epilog. + PROLOG_PUSH {r1} + + ;; r12 currently holds the indirection cell address. We need to update it to point to the vtable + ;; offset instead. + ldr r12, [r12, #OFFSETOF__InterfaceDispatchCell__m_pCache] + + ;; Load the EEType from the object instance in r0. + ldr r1, [r0] + + ;; add the vtable offset to the EEType pointer + add r12, r1, r12 + + ;; Load the target address of the vtable into r12 + ldr r12, [r12] + + EPILOG_POP {r1} + EPILOG_BRANCH_REG r12 + LEAF_END RhpVTableOffsetDispatch + +;; Cache miss case, call the runtime to resolve the target and update the cache. + LEAF_ENTRY RhpInterfaceDispatchSlow + ALTERNATE_ENTRY RhpInitialDynamicInterfaceDispatch + ;; r12 has the interface dispatch cell address in it. + ;; The calling convention of the universal thunk is that the parameter + ;; for the universal thunk target is to be placed in sp-8 + ;; and the universal thunk target address is to be placed in sp-4 + str r12, [sp, #-8] + ldr r12, =RhpCidResolve + str r12, [sp, #-4] + + ;; jump to universal transition thunk + b RhpUniversalTransition_DebugStepTailCall + LEAF_END RhpInterfaceDispatchSlow + + +#endif // FEATURE_CACHED_INTERFACE_DISPATCH + + end diff --git a/src/coreclr/src/nativeaot/Runtime/arm/ThunkPoolThunks.asm b/src/coreclr/src/nativeaot/Runtime/arm/ThunkPoolThunks.asm new file mode 100644 index 0000000000000..59086e36681ec --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/arm/ThunkPoolThunks.asm @@ -0,0 +1,273 @@ +;; Licensed to the .NET Foundation under one or more agreements. +;; The .NET Foundation licenses this file to you under the MIT license. + +#include "kxarm.h" + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; STUBS & DATA SECTIONS ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +THUNK_CODESIZE equ 0x10 ;; 4-byte mov, 2-byte add, 4-byte str, 4-byte ldr, 2-byte branch +THUNK_DATASIZE equ 0x08 ;; 2 dwords + +THUNK_POOL_NUM_THUNKS_PER_PAGE equ 0xFA ;; 250 thunks per page + +PAGE_SIZE equ 0x1000 ;; 4K +POINTER_SIZE equ 0x04 + + MACRO + NAMED_READONLY_DATA_SECTION $name, $areaAlias + AREA $areaAlias,DATA,READONLY +RO$name % 4 + MEND + + MACRO + NAMED_READWRITE_DATA_SECTION $name, $areaAlias + AREA $areaAlias,DATA +RW$name % 4 + MEND + + MACRO + LOAD_DATA_ADDRESS $groupIndex, $index + ALIGN 0x10 ;; make sure we align to 16-byte boundary for CFG table + + ;; set r12 to begining of data page : r12 <- pc - (THUNK_CODESIZE * current thunk's index - sizeof(mov+add instructions)) + PAGE_SIZE + ;; fix offset of the data : r12 <- r12 + (THUNK_DATASIZE * current thunk's index) + mov.w r12, PAGE_SIZE + ($groupIndex * THUNK_DATASIZE * 10 + THUNK_DATASIZE * $index) - (8 + $groupIndex * THUNK_CODESIZE * 10 + THUNK_CODESIZE * $index) + add.n r12, r12, pc + MEND + + MACRO + JUMP_TO_COMMON $groupIndex, $index + ;; start : r12 points to the current thunks first data cell in the data page + ;; put r12 into the red zone : r12 isn't changed + ;; set r12 to begining of data page : r12 <- r12 - (THUNK_DATASIZE * current thunk's index) + ;; fix offset to point to last DWROD in page : r12 <- r11 + PAGE_SIZE - POINTER_SIZE + ;; jump to the location pointed at by the last dword in the data page + str.w r12, [sp, #-4] + ldr.w r12, [r12, #(PAGE_SIZE - POINTER_SIZE - ($groupIndex * THUNK_DATASIZE * 10 + THUNK_DATASIZE * $index))] + bx.n r12 + MEND + + MACRO + TenThunks $groupIndex + ;; Each thunk will load the address of its corresponding data (from the page that immediately follows) + ;; and call a common stub. The address of the common stub is setup by the caller (last dword + ;; in the thunks data section) depending on the 'kind' of thunks needed (interop, fat function pointers, etc...) + + ;; Each data block used by a thunk consists of two dword values: + ;; - Context: some value given to the thunk as context (passed in eax). Example for fat-fptrs: context = generic dictionary + ;; - Target : target code that the thunk eventually jumps to. + + LOAD_DATA_ADDRESS $groupIndex,0 + JUMP_TO_COMMON $groupIndex,0 + + LOAD_DATA_ADDRESS $groupIndex,1 + JUMP_TO_COMMON $groupIndex,1 + + LOAD_DATA_ADDRESS $groupIndex,2 + JUMP_TO_COMMON $groupIndex,2 + + LOAD_DATA_ADDRESS $groupIndex,3 + JUMP_TO_COMMON $groupIndex,3 + + LOAD_DATA_ADDRESS $groupIndex,4 + JUMP_TO_COMMON $groupIndex,4 + + LOAD_DATA_ADDRESS $groupIndex,5 + JUMP_TO_COMMON $groupIndex,5 + + LOAD_DATA_ADDRESS $groupIndex,6 + JUMP_TO_COMMON $groupIndex,6 + + LOAD_DATA_ADDRESS $groupIndex,7 + JUMP_TO_COMMON $groupIndex,7 + + LOAD_DATA_ADDRESS $groupIndex,8 + JUMP_TO_COMMON $groupIndex,8 + + LOAD_DATA_ADDRESS $groupIndex,9 + JUMP_TO_COMMON $groupIndex,9 + MEND + + MACRO + THUNKS_PAGE_BLOCK + + TenThunks 0 + TenThunks 1 + TenThunks 2 + TenThunks 3 + TenThunks 4 + TenThunks 5 + TenThunks 6 + TenThunks 7 + TenThunks 8 + TenThunks 9 + TenThunks 10 + TenThunks 11 + TenThunks 12 + TenThunks 13 + TenThunks 14 + TenThunks 15 + TenThunks 16 + TenThunks 17 + TenThunks 18 + TenThunks 19 + TenThunks 20 + TenThunks 21 + TenThunks 22 + TenThunks 23 + TenThunks 24 + MEND + + ;; + ;; The first thunks section should be 64K aligned because it can get + ;; mapped multiple times in memory, and mapping works on allocation + ;; granularity boundaries (we don't want to map more than what we need) + ;; + ;; The easiest way to do so is by having the thunks section at the + ;; first 64K aligned virtual address in the binary. We provide a section + ;; layout file to the linker to tell it how to layout the thunks sections + ;; that we care about. (ndp\rh\src\runtime\DLLs\app\mrt100_app_sectionlayout.txt) + ;; + ;; The PE spec says images cannot have gaps between sections (other + ;; than what is required by the section alignment value in the header), + ;; therefore we need a couple of padding data sections (otherwise the + ;; OS will not load the image). + ;; + + NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment0, "|.pad0|" + NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment1, "|.pad1|" + NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment2, "|.pad2|" + NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment3, "|.pad3|" + NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment4, "|.pad4|" + NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment5, "|.pad5|" + NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment6, "|.pad6|" + NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment7, "|.pad7|" + NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment8, "|.pad8|" + NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment9, "|.pad9|" + NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment10, "|.pad10|" + NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment11, "|.pad11|" + NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment12, "|.pad12|" + NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment13, "|.pad13|" + NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment14, "|.pad14|" + + ;; + ;; Thunk Stubs + ;; NOTE: Keep number of blocks in sync with macro/constant named 'NUM_THUNK_BLOCKS' in: + ;; - ndp\FxCore\src\System.Private.CoreLib\System\Runtime\InteropServices\ThunkPool.cs + ;; - ndp\rh\src\tools\rhbind\zapimage.h + ;; + LEAF_ENTRY ThunkPool, "|.tks0|" + THUNKS_PAGE_BLOCK + LEAF_END ThunkPool + + NAMED_READWRITE_DATA_SECTION ThunkData0, "|.tkd0|" + + LEAF_ENTRY ThunkPool1, "|.tks1|" + THUNKS_PAGE_BLOCK + LEAF_END ThunkPool1 + + NAMED_READWRITE_DATA_SECTION ThunkData1, "|.tkd1|" + + LEAF_ENTRY ThunkPool2, "|.tks2|" + THUNKS_PAGE_BLOCK + LEAF_END ThunkPool2 + + NAMED_READWRITE_DATA_SECTION ThunkData2, "|.tkd2|" + + LEAF_ENTRY ThunkPool3, "|.tks3|" + THUNKS_PAGE_BLOCK + LEAF_END ThunkPool3 + + NAMED_READWRITE_DATA_SECTION ThunkData3, "|.tkd3|" + + LEAF_ENTRY ThunkPool4, "|.tks4|" + THUNKS_PAGE_BLOCK + LEAF_END ThunkPool4 + + NAMED_READWRITE_DATA_SECTION ThunkData4, "|.tkd4|" + + LEAF_ENTRY ThunkPool5, "|.tks5|" + THUNKS_PAGE_BLOCK + LEAF_END ThunkPool5 + + NAMED_READWRITE_DATA_SECTION ThunkData5, "|.tkd5|" + + LEAF_ENTRY ThunkPool6, "|.tks6|" + THUNKS_PAGE_BLOCK + LEAF_END ThunkPool6 + + NAMED_READWRITE_DATA_SECTION ThunkData6, "|.tkd6|" + + LEAF_ENTRY ThunkPool7, "|.tks7|" + THUNKS_PAGE_BLOCK + LEAF_END ThunkPool7 + + NAMED_READWRITE_DATA_SECTION ThunkData7, "|.tkd7|" + + + ;; + ;; IntPtr RhpGetThunksBase() + ;; + LEAF_ENTRY RhpGetThunksBase + ;; Return the address of the first thunk pool to the caller (this is really the base address) + ldr r0, =ThunkPool + sub r0, r0, #1 + bx lr + LEAF_END RhpGetThunksBase + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; General Helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ;; + ;; int RhpGetNumThunksPerBlock() + ;; + LEAF_ENTRY RhpGetNumThunksPerBlock + mov r0, THUNK_POOL_NUM_THUNKS_PER_PAGE + bx lr + LEAF_END RhpGetNumThunksPerBlock + + ;; + ;; int RhpGetThunkSize() + ;; + LEAF_ENTRY RhpGetThunkSize + mov r0, THUNK_CODESIZE + bx lr + LEAF_END RhpGetThunkSize + + ;; + ;; int RhpGetNumThunkBlocksPerMapping() + ;; + LEAF_ENTRY RhpGetNumThunkBlocksPerMapping + mov r0, 8 + bx lr + LEAF_END RhpGetNumThunkBlocksPerMapping + + ;; + ;; int RhpGetThunkBlockSize + ;; + LEAF_ENTRY RhpGetThunkBlockSize + mov r0, PAGE_SIZE * 2 + bx lr + LEAF_END RhpGetThunkBlockSize + + ;; + ;; IntPtr RhpGetThunkDataBlockAddress(IntPtr thunkStubAddress) + ;; + LEAF_ENTRY RhpGetThunkDataBlockAddress + mov r12, PAGE_SIZE - 1 + bic r0, r0, r12 + add r0, PAGE_SIZE + bx lr + LEAF_END RhpGetThunkDataBlockAddress + + ;; + ;; IntPtr RhpGetThunkStubsBlockAddress(IntPtr thunkDataAddress) + ;; + LEAF_ENTRY RhpGetThunkStubsBlockAddress + mov r12, PAGE_SIZE - 1 + bic r0, r0, r12 + sub r0, PAGE_SIZE + bx lr + LEAF_END RhpGetThunkStubsBlockAddress + + END diff --git a/src/coreclr/src/nativeaot/Runtime/arm/UniversalTransition.S b/src/coreclr/src/nativeaot/Runtime/arm/UniversalTransition.S new file mode 100644 index 0000000000000..45c137f9f13d1 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/arm/UniversalTransition.S @@ -0,0 +1,157 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +.syntax unified +.thumb + +#include // generated by the build from AsmOffsets.cpp +#include + +#ifdef FEATURE_DYNAMIC_CODE + +#ifdef _DEBUG +#define TRASH_SAVED_ARGUMENT_REGISTERS +#endif + +#define COUNT_ARG_REGISTERS (4) +#define INTEGER_REGISTER_SIZE (4) +#define ARGUMENT_REGISTERS_SIZE (COUNT_ARG_REGISTERS * INTEGER_REGISTER_SIZE) + +// Largest return block is 4 doubles +#define RETURN_BLOCK_SIZE (32) + +#define COUNT_FLOAT_ARG_REGISTERS (8) +#define FLOAT_REGISTER_SIZE (8) +#define FLOAT_ARG_REGISTERS_SIZE (COUNT_FLOAT_ARG_REGISTERS * FLOAT_REGISTER_SIZE) + +#define PUSHED_LR_SIZE (4) +#define PUSHED_R11_SIZE (4) + +// +// From CallerSP to ChildSP, the stack frame is composed of the following adjacent regions: +// +// ARGUMENT_REGISTERS_SIZE +// RETURN_BLOCK_SIZE +// FLOAT_ARG_REGISTERS_SIZE +// PUSHED_LR +// PUSHED_R11 + + +#define DISTANCE_FROM_CHILDSP_TO_RETURN_BLOCK (PUSHED_R11_SIZE + PUSHED_LR_SIZE + FLOAT_ARG_REGISTERS_SIZE) + +////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// +// RhpUniversalTransition +// +// At input to this function, r0-3, d0-7 and the stack may contain any number of arguments. +// +// In addition, there are 2 extra arguments passed in the RED ZONE (8 byte negative space +// off of sp). +// sp-4 will contain the managed function that is to be called by this transition function +// sp-8 will contain the pointer sized extra argument to the managed function +// +// When invoking the callee: +// +// r0 shall contain a pointer to the TransitionBlock +// r1 shall contain the value that was in sp-8 at entry to this function +// +// Frame layout is: +// +// {StackPassedArgs} ChildSP+078 CallerSP+000 +// {IntArgRegs (r0-r3) (0x10 bytes)} ChildSP+068 CallerSP-010 +// {ReturnBlock (0x20 bytes)} ChildSP+048 CallerSP-030 +// -- The base address of the Return block is the TransitionBlock pointer, the floating point args are +// in the neg space of the TransitionBlock pointer. Note that the callee has knowledge of the exact +// layout of all pieces of the frame that lie at or above the pushed floating point registers. +// {FpArgRegs (d0-d7) (0x40 bytes)} ChildSP+008 CallerSP-070 +// {PushedLR} ChildSP+004 CallerSP-074 +// {PushedR11} ChildSP+000 CallerSP-078 +// +// NOTE: If the frame layout ever changes, the C++ UniversalTransitionStackFrame structure +// must be updated as well. +// +// NOTE: The callee receives a pointer to the base of the ReturnBlock, and the callee has +// knowledge of the exact layout of all pieces of the frame that lie at or above the pushed +// FpArgRegs. +// +// NOTE: The stack walker guarantees that conservative GC reporting will be applied to +// everything between the base of the ReturnBlock and the top of the StackPassedArgs. +// +////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +.macro UNIVERSAL_TRANSITION FunctionName + +NESTED_ENTRY Rhp\FunctionName, _TEXT, NoHandler + // Save argument registers (including floating point) and the return address. + // NOTE: While we do that, capture the two arguments in the red zone into r12 and r3. + ldr r12, [sp, #-4] // Capture first argument from red zone into r12 + PROLOG_PUSH "{r3}" // Push r3 + ldr r3, [sp, #-4] // Capture second argument from red zone into r3 + PROLOG_PUSH "{r0-r2}" // Push the rest of the registers + PROLOG_STACK_ALLOC RETURN_BLOCK_SIZE // Save space a buffer to be used to hold return buffer data. + PROLOG_VPUSH {d0-d7} // Capture the floating point argument registers + PROLOG_PUSH "{r11,lr}" // Save caller's frame chain pointer and PC + + // Setup the arguments to the transition thunk. + mov r1, r3 + +#ifdef TRASH_SAVED_ARGUMENT_REGISTERS + + // Before calling out, trash all of the argument registers except the ones (r0, r1) that + // hold outgoing arguments. All of these registers have been saved to the transition + // frame, and the code at the call target is required to use only the transition frame + // copies when dispatching this call to the eventual callee. + + ldr r3, =C_FUNC(RhpFpTrashValues) + vldr d0, [r3, #(0 * 8)] + vldr d1, [r3, #(1 * 8)] + vldr d2, [r3, #(2 * 8)] + vldr d3, [r3, #(3 * 8)] + vldr d4, [r3, #(4 * 8)] + vldr d5, [r3, #(5 * 8)] + vldr d6, [r3, #(6 * 8)] + vldr d7, [r3, #(7 * 8)] + + ldr r3, =C_FUNC(RhpIntegerTrashValues) + ldr r2, [r3, #(2 * 4)] + ldr r3, [r3, #(3 * 4)] + +#endif // TRASH_SAVED_ARGUMENT_REGISTERS + + // Make the ReturnFromUniversalTransition alternate entry 4 byte aligned + .balign 4 + add r0, sp, #DISTANCE_FROM_CHILDSP_TO_RETURN_BLOCK // First parameter to target function is a pointer to the return block + blx r12 + + EXPORT_POINTER_TO_ADDRESS PointerToReturnFrom\FunctionName + + // We cannot make the label public as that tricks DIA stackwalker into thinking + // it's the beginning of a method. For this reason we export an auxiliary variable + // holding the address instead. + + // Move the result (the target address) to r12 so it doesn't get overridden when we restore the + // argument registers. Additionally make sure the thumb2 bit is set. + orr r12, r0, #1 + + // Restore caller's frame chain pointer and PC. + EPILOG_POP "{r11,lr}" + + // Restore the argument registers. + EPILOG_VPOP {d0-d7} + EPILOG_STACK_FREE RETURN_BLOCK_SIZE // pop return block conservatively reported area + EPILOG_POP "{r0-r3}" + + // Tailcall to the target address. + EPILOG_BRANCH_REG r12 + +NESTED_END Rhp\FunctionName, _TEXT + +.endm + +// To enable proper step-in behavior in the debugger, we need to have two instances +// of the thunk. For the first one, the debugger steps into the call in the function, +// for the other, it steps over it. +UNIVERSAL_TRANSITION UniversalTransition +UNIVERSAL_TRANSITION UniversalTransition_DebugStepTailCall + +#endif // FEATURE_DYNAMIC_CODE diff --git a/src/coreclr/src/nativeaot/Runtime/arm/UniversalTransition.asm b/src/coreclr/src/nativeaot/Runtime/arm/UniversalTransition.asm new file mode 100644 index 0000000000000..db01d02eb0c2f --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/arm/UniversalTransition.asm @@ -0,0 +1,157 @@ +;; Licensed to the .NET Foundation under one or more agreements. +;; The .NET Foundation licenses this file to you under the MIT license. + +#include "AsmMacros.h" + +#ifdef _DEBUG +#define TRASH_SAVED_ARGUMENT_REGISTERS +#endif + +#ifdef TRASH_SAVED_ARGUMENT_REGISTERS + EXTERN RhpIntegerTrashValues + EXTERN RhpFpTrashValues +#endif ;; TRASH_SAVED_ARGUMENT_REGISTERS + +#define COUNT_ARG_REGISTERS (4) +#define INTEGER_REGISTER_SIZE (4) +#define ARGUMENT_REGISTERS_SIZE (COUNT_ARG_REGISTERS * INTEGER_REGISTER_SIZE) + +;; Largest return block is 4 doubles +#define RETURN_BLOCK_SIZE (32) + +#define COUNT_FLOAT_ARG_REGISTERS (8) +#define FLOAT_REGISTER_SIZE (8) +#define FLOAT_ARG_REGISTERS_SIZE (COUNT_FLOAT_ARG_REGISTERS * FLOAT_REGISTER_SIZE) + +#define PUSHED_LR_SIZE (4) +#define PUSHED_R11_SIZE (4) + +;; +;; From CallerSP to ChildSP, the stack frame is composed of the following adjacent regions: +;; +;; ARGUMENT_REGISTERS_SIZE +;; RETURN_BLOCK_SIZE +;; FLOAT_ARG_REGISTERS_SIZE +;; PUSHED_LR +;; PUSHED_R11 +;; + +#define DISTANCE_FROM_CHILDSP_TO_RETURN_BLOCK (PUSHED_R11_SIZE + PUSHED_LR_SIZE + FLOAT_ARG_REGISTERS_SIZE) + + TEXTAREA + +;; +;; RhpUniversalTransition +;; +;; At input to this function, r0-3, d0-7 and the stack may contain any number of arguments. +;; +;; In addition, there are 2 extra arguments passed in the RED ZONE (8 byte negative space +;; off of sp). +;; sp-4 will contain the managed function that is to be called by this transition function +;; sp-8 will contain the pointer sized extra argument to the managed function +;; +;; When invoking the callee: +;; +;; r0 shall contain a pointer to the TransitionBlock +;; r1 shall contain the value that was in sp-8 at entry to this function +;; +;; Frame layout is: +;; +;; {StackPassedArgs} ChildSP+078 CallerSP+000 +;; {IntArgRegs (r0-r3) (0x10 bytes)} ChildSP+068 CallerSP-010 +;; {ReturnBlock (0x20 bytes)} ChildSP+048 CallerSP-030 +;; -- The base address of the Return block is the TransitionBlock pointer, the floating point args are +;; in the neg space of the TransitionBlock pointer. Note that the callee has knowledge of the exact +;; layout of all pieces of the frame that lie at or above the pushed floating point registers. +;; {FpArgRegs (d0-d7) (0x40 bytes)} ChildSP+008 CallerSP-070 +;; {PushedLR} ChildSP+004 CallerSP-074 +;; {PushedR11} ChildSP+000 CallerSP-078 +;; +;; NOTE: If the frame layout ever changes, the C++ UniversalTransitionStackFrame structure +;; must be updated as well. +;; +;; NOTE: The callee receives a pointer to the base of the ReturnBlock, and the callee has +;; knowledge of the exact layout of all pieces of the frame that lie at or above the pushed +;; FpArgRegs. +;; +;; NOTE: The stack walker guarantees that conservative GC reporting will be applied to +;; everything between the base of the ReturnBlock and the top of the StackPassedArgs. +;; + + MACRO + UNIVERSAL_TRANSITION $FunctionName + + NESTED_ENTRY Rhp$FunctionName + ;; Save argument registers (including floating point) and the return address. + ;; NOTE: While we do that, capture the two arguments in the red zone into r12 and r3. + PROLOG_NOP ldr r12, [sp, #-4] ; Capture first argument from red zone into r12 + PROLOG_PUSH {r3} ; Push r3 + PROLOG_NOP ldr r3, [sp, #-4] ; Capture second argument from red zone into r3 + PROLOG_PUSH {r0-r2} ; Push the rest of the registers + PROLOG_STACK_ALLOC RETURN_BLOCK_SIZE ; Save space a buffer to be used to hold return buffer data. + PROLOG_VPUSH {d0-d7} ; Capture the floating point argument registers + PROLOG_PUSH {r11,lr} ; Save caller's frame chain pointer and PC + + ;; Setup the arguments to the transition thunk. + mov r1, r3 + +#ifdef TRASH_SAVED_ARGUMENT_REGISTERS + + ;; Before calling out, trash all of the argument registers except the ones (r0, r1) that + ;; hold outgoing arguments. All of these registers have been saved to the transition + ;; frame, and the code at the call target is required to use only the transition frame + ;; copies when dispatching this call to the eventual callee. + + ldr r3, =RhpFpTrashValues + vldr d0, [r3, #(0 * 8)] + vldr d1, [r3, #(1 * 8)] + vldr d2, [r3, #(2 * 8)] + vldr d3, [r3, #(3 * 8)] + vldr d4, [r3, #(4 * 8)] + vldr d5, [r3, #(5 * 8)] + vldr d6, [r3, #(6 * 8)] + vldr d7, [r3, #(7 * 8)] + + ldr r3, =RhpIntegerTrashValues + ldr r2, [r3, #(2 * 4)] + ldr r3, [r3, #(3 * 4)] + +#endif // TRASH_SAVED_ARGUMENT_REGISTERS + + ;; Make the ReturnFromUniversalTransition alternate entry 4 byte aligned + ALIGN 4 + add r0, sp, #DISTANCE_FROM_CHILDSP_TO_RETURN_BLOCK ;; First parameter to target function is a pointer to the return block + blx r12 + + EXPORT_POINTER_TO_ADDRESS PointerToReturnFrom$FunctionName + + ; We cannot make the label public as that tricks DIA stackwalker into thinking + ; it's the beginning of a method. For this reason we export an auxiliary variable + ; holding the address instead. + + ;; Move the result (the target address) to r12 so it doesn't get overridden when we restore the + ;; argument registers. Additionally make sure the thumb2 bit is set. + orr r12, r0, #1 + + ;; Restore caller's frame chain pointer and PC. + EPILOG_POP {r11,lr} + + ;; Restore the argument registers. + EPILOG_VPOP {d0-d7} + EPILOG_STACK_FREE RETURN_BLOCK_SIZE ; pop return block conservatively reported area + EPILOG_POP {r0-r3} + + ;; Tailcall to the target address. + EPILOG_BRANCH_REG r12 + + NESTED_END Rhp$FunctionName + + MEND + + ; To enable proper step-in behavior in the debugger, we need to have two instances + ; of the thunk. For the first one, the debugger steps into the call in the function, + ; for the other, it steps over it. + UNIVERSAL_TRANSITION UniversalTransition + UNIVERSAL_TRANSITION UniversalTransition_DebugStepTailCall + + END diff --git a/src/coreclr/src/nativeaot/Runtime/arm/WriteBarriers.S b/src/coreclr/src/nativeaot/Runtime/arm/WriteBarriers.S new file mode 100644 index 0000000000000..e69839ed92ade --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/arm/WriteBarriers.S @@ -0,0 +1,382 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +.syntax unified +.thumb + +#include // generated by the build from AsmOffsets.cpp +#include + +#ifdef WRITE_BARRIER_CHECK + +.macro UPDATE_GC_SHADOW BASENAME, REFREG, DESTREG + + // If g_GCShadow is 0, don't perform the check. + ldr r12, =C_FUNC(g_GCShadow) + ldr r12, [r12] + cbz r12, LOCAL_LABEL(\BASENAME\()_UpdateShadowHeap_Done_\REFREG) + + // Save DESTREG since we're about to modify it (and we need the original value both within the macro and + // once we exit the macro). Note that this is naughty since we're altering the stack pointer outside of + // the prolog inside a method without a frame. But given that this is only debug code and generally we + // shouldn't be walking the stack at this point it seems preferable to recoding the all the barrier + // variants to set up frames. The compiler knows exactly which registers are trashed in the simple write + // barrier case, so we don't have any more scratch registers to play with (and doing so would only make + // things harder if at a later stage we want to allow multiple barrier versions based on the input + // registers). + push \DESTREG + + // Transform DESTREG into the equivalent address in the shadow heap. + ldr r12, =C_FUNC(g_lowest_address) + ldr r12, [r12] + sub \DESTREG, r12 + cmp \DESTREG, #0 + blo LOCAL_LABEL(\BASENAME\()_UpdateShadowHeap_PopThenDone_\REFREG) + ldr r12, =C_FUNC(g_GCShadow) + ldr r12, [r12] + add \DESTREG, r12 + ldr r12, =C_FUNC(g_GCShadowEnd) + ldr r12, [r12] + cmp \DESTREG, r12 + jhi LOCAL_LABEL(\BASENAME\()_UpdateShadowHeap_PopThenDone_\REFREG) + + // Update the shadow heap. + str \REFREG, [\DESTREG] + + // The following read must be strongly ordered wrt to the write we've just performed in order to + // prevent race conditions. + dmb + + // Now check that the real heap location still contains the value we just wrote into the shadow heap. + mov r12, \DESTREG + ldr \DESTREG, [sp] + str r12, [sp] + ldr r12, [\DESTREG] + cmp r12, \REFREG + bne LOCAL_LABEL(\BASENAME\()_UpdateShadowHeap_Invalidate_\REFREG) + + // The original DESTREG value is now restored but the stack has a value (the shadow version of the + // location) pushed. Need to discard this push before we are done. + add sp, #4 + b LOCAL_LABEL(\BASENAME\()_UpdateShadowHeap_Done_\REFREG) + +LOCAL_LABEL(\BASENAME\()_UpdateShadowHeap_Invalidate_\REFREG): + // Someone went and updated the real heap. We need to invalidate the shadow location since we can't + // guarantee whose shadow update won. + + // Retrieve shadow location from the stack and restore original DESTREG to the stack. This is an + // additional memory barrier we don't require but it's on the rare path and x86 doesn't have an xchg + // variant that doesn't implicitly specify the lock prefix. Note that INVALIDGCVALUE is a 32-bit + // immediate and therefore must be moved into a register before it can be written to the shadow + // location. + mov r12, \DESTREG + ldr \DESTREG, [sp] + str r12, [sp] + push \REFREG + movw \REFREG, #(INVALIDGCVALUE & 0xFFFF) + movt \REFREG, #(INVALIDGCVALUE >> 16) + str \REFREG, [\DESTREG] + pop \REFREG + +LOCAL_LABEL(\BASENAME\()_UpdateShadowHeap_PopThenDone_\REFREG): + // Restore original DESTREG value from the stack. + pop \DESTREG + +LOCAL_LABEL(\BASENAME\()_UpdateShadowHeap_Done_\REFREG): + +.endm + +#else // WRITE_BARRIER_CHECK + +.macro UPDATE_GC_SHADOW BASENAME, REFREG, DESTREG +.endm + +#endif // WRITE_BARRIER_CHECK + +// There are several different helpers used depending on which register holds the object reference. Since all +// the helpers have identical structure we use a macro to define this structure. Two arguments are taken, the +// name of the register that points to the location to be updated and the name of the register that holds the +// object reference (this should be in upper case as it's used in the definition of the name of the helper). +.macro DEFINE_UNCHECKED_WRITE_BARRIER_CORE BASENAME, REFREG + + // Update the shadow copy of the heap with the same value just written to the same heap. (A no-op unless + // we're in a debug build and write barrier checking has been enabled). + UPDATE_GC_SHADOW \BASENAME, \REFREG, r0 + + // If the reference is to an object that's not in an ephemeral generation we have no need to track it + // (since the object won't be collected or moved by an ephemeral collection). + ldr r12, =C_FUNC(g_ephemeral_low) + ldr r12, [r12] + cmp \REFREG, r12 + blo LOCAL_LABEL(\BASENAME\()_EXIT_\REFREG) + + ldr r12, =C_FUNC(g_ephemeral_high) + ldr r12, [r12] + cmp \REFREG, r12 + bhi LOCAL_LABEL(\BASENAME\()_EXIT_\REFREG) + + // We have a location on the GC heap being updated with a reference to an ephemeral object so we must + // track this write. The location address is translated into an offset in the card table bitmap. We set + // an entire byte in the card table since it's quicker than messing around with bitmasks and we only write + // the byte if it hasn't already been done since writes are expensive and impact scaling. + ldr r12, =C_FUNC(g_card_table) + ldr r12, [r12] + add r0, r12, r0, lsr #LOG2_CLUMP_SIZE + ldrb r12, [r0] + cmp r12, #0x0FF + bne LOCAL_LABEL(\BASENAME\()_UpdateCardTable_\REFREG) + +LOCAL_LABEL(\BASENAME\()_NoBarrierRequired_\REFREG): + b LOCAL_LABEL(\BASENAME\()_EXIT_\REFREG) + +// We get here if it's necessary to update the card table. +LOCAL_LABEL(\BASENAME\()_UpdateCardTable_\REFREG): + mov r12, #0x0FF + strb r12, [r0] + +LOCAL_LABEL(\BASENAME\()_EXIT_\REFREG): + +.endm + +// There are several different helpers used depending on which register holds the object reference. Since all +// the helpers have identical structure we use a macro to define this structure. One argument is taken, the +// name of the register that will hold the object reference (this should be in upper case as it's used in the +// definition of the name of the helper). +.macro DEFINE_UNCHECKED_WRITE_BARRIER REFREG, EXPORT_REG_NAME + +// Define a helper with a name of the form RhpAssignRefEAX etc. (along with suitable calling standard +// decoration). The location to be updated is in DESTREG. The object reference that will be assigned into that +// location is in one of the other general registers determined by the value of REFREG. + +// WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular: +// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at WriteBarrierFunctionAvLOC +// - Function "UnwindWriteBarrierToCaller" assumes the stack contains just the pushed return address +LEAF_ENTRY RhpAssignRef\EXPORT_REG_NAME, _TEXT + +// Export the canonical write barrier under unqualified name as well +.ifc \REFREG, r1 +ALTERNATE_ENTRY RhpAssignRef +.endif + + // Use the GC write barrier as a convenient place to implement the managed memory model for ARM. The + // intent is that writes to the target object ($REFREG) will be visible across all CPUs before the + // write to the destination ($DESTREG). This covers most of the common scenarios where the programmer + // might assume strongly ordered accessess, namely where the preceding writes are used to initialize + // the object and the final write, made by this barrier in the instruction following the DMB, + // publishes that object for other threads/cpus to see. + // + // Note that none of this is relevant for single cpu machines. We may choose to implement a + // uniprocessor specific version of this barrier if uni-proc becomes a significant scenario again. + dmb + + // Write the reference into the location. Note that we rely on the fact that no GC can occur between here + // and the card table update we may perform below. +ALTERNATE_ENTRY "RhpAssignRefAvLocation"\EXPORT_REG_NAME // WriteBarrierFunctionAvLocation +.ifc \REFREG, r1 +ALTERNATE_ENTRY RhpAssignRefAVLocation +.endif + str \REFREG, [r0] + + DEFINE_UNCHECKED_WRITE_BARRIER_CORE RhpAssignRef, \REFREG + + bx lr +LEAF_END RhpAssignRef\EXPORT_REG_NAME, _TEXT +.endm + +// One day we might have write barriers for all the possible argument registers but for now we have +// just one write barrier that assumes the input register is RSI. +DEFINE_UNCHECKED_WRITE_BARRIER r1, r1 + +// +// Define the helpers used to implement the write barrier required when writing an object reference into a +// location residing on the GC heap. Such write barriers allow the GC to optimize which objects in +// non-ephemeral generations need to be scanned for references to ephemeral objects during an ephemeral +// collection. +// + +.macro DEFINE_CHECKED_WRITE_BARRIER_CORE BASENAME, REFREG + + // The location being updated might not even lie in the GC heap (a handle or stack location for instance), + // in which case no write barrier is required. + ldr r12, =C_FUNC(g_lowest_address) + ldr r12, [r12] + cmp r0, r12 + blo LOCAL_LABEL(\BASENAME\()_NoBarrierRequired_\REFREG) + ldr r12, =C_FUNC(g_highest_address) + ldr r12, [r12] + cmp r0, r12 + bhi LOCAL_LABEL(\BASENAME\()_NoBarrierRequired_\REFREG) + + DEFINE_UNCHECKED_WRITE_BARRIER_CORE \BASENAME, \REFREG + +.endm + +// There are several different helpers used depending on which register holds the object reference. Since all +// the helpers have identical structure we use a macro to define this structure. One argument is taken, the +// name of the register that will hold the object reference (this should be in upper case as it's used in the +// definition of the name of the helper). +.macro DEFINE_CHECKED_WRITE_BARRIER REFREG, EXPORT_REG_NAME + +// Define a helper with a name of the form RhpCheckedAssignRefEAX etc. (along with suitable calling standard +// decoration). The location to be updated is always in R0. The object reference that will be assigned into +// that location is in one of the other general registers determined by the value of REFREG. + +// WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular: +// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen on the first instruction +// - Function "UnwindWriteBarrierToCaller" assumes the stack contains just the pushed return address +LEAF_ENTRY RhpCheckedAssignRef\EXPORT_REG_NAME, _TEXT + +// Export the canonical write barrier under unqualified name as well +.ifc \REFREG, r1 +ALTERNATE_ENTRY RhpCheckedAssignRef +.endif + + // Use the GC write barrier as a convenient place to implement the managed memory model for ARM. The + // intent is that writes to the target object ($REFREG) will be visible across all CPUs before the + // write to the destination ($DESTREG). This covers most of the common scenarios where the programmer + // might assume strongly ordered accessess, namely where the preceding writes are used to initialize + // the object and the final write, made by this barrier in the instruction following the DMB, + // publishes that object for other threads/cpus to see. + // + // Note that none of this is relevant for single cpu machines. We may choose to implement a + // uniprocessor specific version of this barrier if uni-proc becomes a significant scenario again. + dmb + // Write the reference into the location. Note that we rely on the fact that no GC can occur between here + // and the card table update we may perform below. +ALTERNATE_ENTRY "RhpCheckedAssignRefAvLocation"\EXPORT_REG_NAME // WriteBarrierFunctionAvLocation +.ifc \REFREG, r1 +ALTERNATE_ENTRY RhpCheckedAssignRefAVLocation +.endif + str \REFREG, [r0] + + DEFINE_CHECKED_WRITE_BARRIER_CORE RhpCheckedAssignRef, \REFREG + + bx lr +LEAF_END RhpCheckedAssignRef\EXPORT_REG_NAME, _TEXT +.endm + +// One day we might have write barriers for all the possible argument registers but for now we have +// just one write barrier that assumes the input register is RSI. +DEFINE_CHECKED_WRITE_BARRIER r1, r1 + +// WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular: +// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at RhpCheckedLockCmpXchgAVLocation +// - Function "UnwindWriteBarrierToCaller" assumes the stack contains just the pushed return address +// r0 = destination address +// r1 = value +// r2 = comparand +LEAF_ENTRY RhpCheckedLockCmpXchg, _TEXT + // To implement our chosen memory model for ARM we insert a memory barrier at GC write brriers. This + // barrier must occur before the object reference update, so we have to do it unconditionally even + // though the update may fail below. + dmb +ALTERNATE_ENTRY RhpCheckedLockCmpXchgAVLocation +LOCAL_LABEL(RhpCheckedLockCmpXchgRetry): + ldrex r3, [r0] + cmp r2, r3 + bne LOCAL_LABEL(RhpCheckedLockCmpXchg_NoBarrierRequired_r1) + strex r3, r1, [r0] + cmp r3, #0 + bne LOCAL_LABEL(RhpCheckedLockCmpXchgRetry) + mov r3, r2 + + DEFINE_CHECKED_WRITE_BARRIER_CORE RhpCheckedLockCmpXchg, r1 + + mov r0, r3 + bx lr +LEAF_END RhpCheckedLockCmpXchg, _TEXT + +// WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular: +// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at RhpCheckedXchgAVLocation +// - Function "UnwindWriteBarrierToCaller" assumes the stack contains just the pushed return address +// r0 = destination address +// r1 = value +LEAF_ENTRY RhpCheckedXchg, _TEXT + // To implement our chosen memory model for ARM we insert a memory barrier at GC write barriers. This + // barrier must occur before the object reference update. + dmb +ALTERNATE_ENTRY RhpCheckedXchgAVLocation +LOCAL_LABEL(RhpCheckedXchgRetry): + ldrex r2, [r0] + strex r3, r1, [r0] + cmp r3, #0 + bne LOCAL_LABEL(RhpCheckedXchgRetry) + + DEFINE_CHECKED_WRITE_BARRIER_CORE RhpCheckedXchg, r1 + + // The original value is currently in r2. We need to return it in r0. + mov r0, r2 + + bx lr +LEAF_END RhpCheckedXchg, _TEXT + +// +// RhpByRefAssignRef simulates movs instruction for object references. +// +// On entry: +// r0: address of ref-field (assigned to) +// r1: address of the data (source) +// r2, r3: be trashed +// +// On exit: +// r0, r1 are incremented by 4, +// r2, r3: trashed +// +LEAF_ENTRY RhpByRefAssignRef, _TEXT + ldr r2, [r1] + str r2, [r0] + + // Check whether the writes were even into the heap. If not there's no card update required. + ldr r3, =C_FUNC(g_lowest_address) + ldr r3, [r3] + cmp r0, r3 + blo LOCAL_LABEL(RhpByRefAssignRef_NotInHeap) + ldr r3, =C_FUNC(g_highest_address) + ldr r3, [r3] + cmp r0, r3 + bhi LOCAL_LABEL(RhpByRefAssignRef_NotInHeap) + + // Update the shadow copy of the heap with the same value just written to the same heap. (A no-op unless + // we're in a debug build and write barrier checking has been enabled). + UPDATE_GC_SHADOW BASENAME, r2, r0 + + // If the reference is to an object that's not in an ephemeral generation we have no need to track it + // (since the object won't be collected or moved by an ephemeral collection). + ldr r3, =C_FUNC(g_ephemeral_low) + ldr r3, [r3] + cmp r2, r3 + blo LOCAL_LABEL(RhpByRefAssignRef_NotInHeap) + ldr r3, =C_FUNC(g_ephemeral_high) + ldr r3, [r3] + cmp r2, r3 + bhi LOCAL_LABEL(RhpByRefAssignRef_NotInHeap) + + // move current r0 value into r2 and then increment the pointers + mov r2, r0 + add r1, #4 + add r0, #4 + + // We have a location on the GC heap being updated with a reference to an ephemeral object so we must + // track this write. The location address is translated into an offset in the card table bitmap. We set + // an entire byte in the card table since it's quicker than messing around with bitmasks and we only write + // the byte if it hasn't already been done since writes are expensive and impact scaling. + ldr r3, =C_FUNC(g_card_table) + ldr r3, [r3] + add r2, r3, r2, lsr #LOG2_CLUMP_SIZE + ldrb r3, [r2] + cmp r3, #0x0FF + bne LOCAL_LABEL(RhpByRefAssignRef_UpdateCardTable) + bx lr + +// We get here if it's necessary to update the card table. +LOCAL_LABEL(RhpByRefAssignRef_UpdateCardTable): + mov r3, #0x0FF + strb r3, [r2] + bx lr + +LOCAL_LABEL(RhpByRefAssignRef_NotInHeap): + // Increment the pointers before leaving + add r0, #4 + add r1, #4 + bx lr +LEAF_END RhpByRefAssignRef, _TEXT diff --git a/src/coreclr/src/nativeaot/Runtime/arm/WriteBarriers.asm b/src/coreclr/src/nativeaot/Runtime/arm/WriteBarriers.asm new file mode 100644 index 0000000000000..48b3ab83d6562 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/arm/WriteBarriers.asm @@ -0,0 +1,421 @@ +;; Licensed to the .NET Foundation under one or more agreements. +;; The .NET Foundation licenses this file to you under the MIT license. + +;; +;; Define the helpers used to implement the write barrier required when writing an object reference into a +;; location residing on the GC heap. Such write barriers allow the GC to optimize which objects in +;; non-ephemeral generations need to be scanned for references to ephemeral objects during an ephemeral +;; collection. +;; + +#include "AsmMacros.h" + + TEXTAREA + +;; Macro used to copy contents of newly updated GC heap locations to a shadow copy of the heap. This is used +;; during garbage collections to verify that object references where never written to the heap without using a +;; write barrier. Note that we're potentially racing to update the shadow heap while other threads are writing +;; new references to the real heap. Since this can't be solved perfectly without critical sections around the +;; entire update process, we instead update the shadow location and then re-check the real location (as two +;; ordered operations) and if there is a disparity we'll re-write the shadow location with a special value +;; (INVALIDGCVALUE) which disables the check for that location. Since the shadow heap is only validated at GC +;; time and these write barrier operations are atomic wrt to GCs this is sufficient to guarantee that the +;; shadow heap contains only valid copies of real heap values or INVALIDGCVALUE. +#ifdef WRITE_BARRIER_CHECK + + SETALIAS g_GCShadow, ?g_GCShadow@@3PAEA + SETALIAS g_GCShadowEnd, ?g_GCShadowEnd@@3PAEA + + EXTERN $g_GCShadow + EXTERN $g_GCShadowEnd + + MACRO + ;; On entry: + ;; $DESTREG: location to be updated + ;; $REFREG: objectref to be stored + ;; + ;; On exit: + ;; r12: trashed + ;; other registers are preserved + ;; + UPDATE_GC_SHADOW $DESTREG, $REFREG + + ;; If g_GCShadow is 0, don't perform the check. + ldr r12, =$g_GCShadow + ldr r12, [r12] + cmp r12, 0 + beq %ft1 + + ;; Save $DESTREG since we're about to modify it (and we need the original value both within the macro and + ;; once we exit the macro). Note that this is naughty since we're altering the stack pointer outside of + ;; the prolog inside a method without a frame. But given that this is only debug code and generally we + ;; shouldn't be walking the stack at this point it seems preferable to recoding the all the barrier + ;; variants to set up frames. The compiler knows exactly which registers are trashed in the simple write + ;; barrier case, so we don't have any more scratch registers to play with (and doing so would only make + ;; things harder if at a later stage we want to allow multiple barrier versions based on the input + ;; registers). + push $DESTREG + + ;; Transform $DESTREG into the equivalent address in the shadow heap. + ldr r12, =$G_LOWEST_ADDRESS + ldr r12, [r12] + subs $DESTREG, r12 + blo %ft0 + + ldr r12, =$g_GCShadow + ldr r12, [r12] + add $DESTREG, r12 + + ldr r12, =$g_GCShadowEnd + ldr r12, [r12] + cmp $DESTREG, r12 + bhs %ft0 + + ;; Update the shadow heap. + str $REFREG, [$DESTREG] + + ;; The following read must be strongly ordered wrt to the write we've just performed in order to + ;; prevent race conditions. + dmb + + ;; Now check that the real heap location still contains the value we just wrote into the shadow heap. + ldr r12, [sp] + ldr r12, [r12] + cmp r12, $REFREG + beq %ft0 + + ;; Someone went and updated the real heap. We need to invalidate the shadow location since we can't + ;; guarantee whose shadow update won. + movw r12, #0xcccd + movt r12, #0xcccc + str r12, [$DESTREG] + +0 + ;; Restore original $DESTREG value from the stack. + pop $DESTREG + +1 + MEND + +#else // WRITE_BARRIER_CHECK + + MACRO + UPDATE_GC_SHADOW $DESTREG, $REFREG + MEND + +#endif // WRITE_BARRIER_CHECK + +;; There are several different helpers used depending on which register holds the object reference. Since all +;; the helpers have identical structure we use a macro to define this structure. Two arguments are taken, the +;; name of the register that points to the location to be updated and the name of the register that holds the +;; object reference (this should be in upper case as it's used in the definition of the name of the helper). + +;; Define a sub-macro first that expands to the majority of the barrier implementation. This is used below for +;; some interlocked helpers that need an inline barrier. + MACRO + ;; On entry: + ;; $DESTREG: location to be updated + ;; $REFREG: objectref to be stored + ;; + ;; On exit: + ;; $DESTREG, r12: trashed + ;; other registers are preserved + ;; + INSERT_CHECKED_WRITE_BARRIER_CORE $DESTREG, $REFREG + + ;; The location being updated might not even lie in the GC heap (a handle or stack location for + ;; instance), in which case no write barrier is required. + ldr r12, =$G_LOWEST_ADDRESS + ldr r12, [r12] + cmp $DESTREG, r12 + blo %ft0 + ldr r12, =$G_HIGHEST_ADDRESS + ldr r12, [r12] + cmp $DESTREG, r12 + bhs %ft0 + + ;; Update the shadow copy of the heap with the same value just written to the same heap. (A no-op unless + ;; we're in a debug build and write barrier checking has been enabled). + UPDATE_GC_SHADOW $DESTREG, $REFREG + + ;; If the reference is to an object that's not in an ephemeral generation we have no need to track it + ;; (since the object won't be collected or moved by an ephemeral collection). + ldr r12, =$G_EPHEMERAL_LOW + ldr r12, [r12] + cmp $REFREG, r12 + blo %ft0 + ldr r12, =$G_EPHEMERAL_HIGH + ldr r12, [r12] + cmp $REFREG, r12 + bhs %ft0 + + ;; All tests pass, so update the card table. + ldr r12, =$G_CARD_TABLE + ldr r12, [r12] + add r12, r12, $DESTREG, lsr #10 + + ;; Check that this card hasn't already been written. Avoiding useless writes is a big win on + ;; multi-proc systems since it avoids cache thrashing. + ldrb $DESTREG, [r12] + cmp $DESTREG, #0xFF + beq %ft0 + mov $DESTREG, #0xFF + strb $DESTREG, [r12] + +0 + + MEND + +;; There are several different helpers used depending on which register holds the object reference. Since all +;; the helpers have identical structure we use a macro to define this structure. Two arguments are taken, the +;; name of the register that points to the location to be updated and the name of the register that holds the +;; object reference (this should be in upper case as it's used in the definition of the name of the helper). + +;; Define a sub-macro first that expands to the majority of the barrier implementation. This is used below for +;; some interlocked helpers that need an inline barrier. + MACRO + ;; On entry: + ;; $DESTREG: location to be updated + ;; $REFREG: objectref to be stored + ;; + ;; On exit: + ;; $DESTREG, r12: trashed + ;; other registers are preserved + ;; + INSERT_UNCHECKED_WRITE_BARRIER_CORE $DESTREG, $REFREG + + ;; Update the shadow copy of the heap with the same value just written to the same heap. (A no-op unless + ;; we're in a debug build and write barrier checking has been enabled). + UPDATE_GC_SHADOW $DESTREG, $REFREG + + ;; If the reference is to an object that's not in an ephemeral generation we have no need to track it + ;; (since the object won't be collected or moved by an ephemeral collection). + ldr r12, =$G_EPHEMERAL_LOW + ldr r12, [r12] + cmp $REFREG, r12 + blo %ft0 + ldr r12, =$G_EPHEMERAL_HIGH + ldr r12, [r12] + cmp $REFREG, r12 + bhs %ft0 + + ;; All tests pass, so update the card table. + ldr r12, =$G_CARD_TABLE + ldr r12, [r12] + add r12, r12, $DESTREG, lsr #10 + + ;; Check that this card hasn't already been written. Avoiding useless writes is a big win on + ;; multi-proc systems since it avoids cache thrashing. + ldrb $DESTREG, [r12] + cmp $DESTREG, #0xFF + beq %ft0 + mov $DESTREG, #0xFF + strb $DESTREG, [r12] + +0 + + MEND + + MACRO + ;; Define a helper with a name of the form RhpCheckedAssignRefR0 etc. The location to be updated is in + ;; $DESTREG. The object reference that will be assigned into that location is in one of the other + ;; general registers determined by the value of $REFREG. R12 is used as a scratch register. + + ;; WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular: + ;; - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at WriteBarrierFunctionAvLocation + ;; - Function "UnwindWriteBarrierToCaller" assumes no registers where pushed and LR contains the return address + + DEFINE_CHECKED_WRITE_BARRIER $DESTREG, $REFREG + + gbls WriteBarrierFunction + gbls WriteBarrierFunctionAvLocation +WriteBarrierFunction SETS "RhpCheckedAssignRef":cc:"$REFREG" +WriteBarrierFunctionAvLocation SETS "RhpCheckedAssignRefAvLocation":cc:"$REFREG" + + EXPORT $WriteBarrierFunction +$WriteBarrierFunction + + ;; Export the canonical write barrier under unqualified name as well + IF "$REFREG" == "R1" + ALTERNATE_ENTRY RhpCheckedAssignRef + ENDIF + + ;; Use the GC write barrier as a convenient place to implement the managed memory model for ARM. The + ;; intent is that writes to the target object ($REFREG) will be visible across all CPUs before the + ;; write to the destination ($DESTREG). This covers most of the common scenarios where the programmer + ;; might assume strongly ordered accessess, namely where the preceding writes are used to initialize + ;; the object and the final write, made by this barrier in the instruction following the DMB, + ;; publishes that object for other threads/cpus to see. + ;; + ;; Note that none of this is relevant for single cpu machines. We may choose to implement a + ;; uniprocessor specific version of this barrier if uni-proc becomes a significant scenario again. + dmb + + ;; Write the reference into the location. Note that we rely on the fact that no GC can occur between + ;; here and the card table update we may perform below. + ALTERNATE_ENTRY $WriteBarrierFunctionAvLocation + IF "$REFREG" == "R1" + ALTERNATE_ENTRY RhpCheckedAssignRefAVLocation + ENDIF + str $REFREG, [$DESTREG] + + INSERT_CHECKED_WRITE_BARRIER_CORE $DESTREG, $REFREG + + bx lr + + MEND + + + MACRO + ;; Define a helper with a name of the form RhpAssignRefR0 etc. The location to be updated is in + ;; $DESTREG. The object reference that will be assigned into that location is in one of the other + ;; general registers determined by the value of $REFREG. R12 is used as a scratch register. + + ;; WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular: + ;; - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at WriteBarrierFunctionAvLocation + ;; - Function "UnwindWriteBarrierToCaller" assumes no registers where pushed and LR contains the return address + + DEFINE_UNCHECKED_WRITE_BARRIER $DESTREG, $REFREG + + gbls WriteBarrierFunction + gbls WriteBarrierFunctionAvLocation +WriteBarrierFunction SETS "RhpAssignRef":cc:"$REFREG" +WriteBarrierFunctionAvLocation SETS "RhpAssignRefAvLocation":cc:"$REFREG" + + ;; Export the canonical write barrier under unqualified name as well + IF "$REFREG" == "R1" + ALTERNATE_ENTRY RhpAssignRef + ENDIF + + EXPORT $WriteBarrierFunction +$WriteBarrierFunction + + ;; Use the GC write barrier as a convenient place to implement the managed memory model for ARM. The + ;; intent is that writes to the target object ($REFREG) will be visible across all CPUs before the + ;; write to the destination ($DESTREG). This covers most of the common scenarios where the programmer + ;; might assume strongly ordered accessess, namely where the preceding writes are used to initialize + ;; the object and the final write, made by this barrier in the instruction following the DMB, + ;; publishes that object for other threads/cpus to see. + ;; + ;; Note that none of this is relevant for single cpu machines. We may choose to implement a + ;; uniprocessor specific version of this barrier if uni-proc becomes a significant scenario again. + dmb + + ;; Write the reference into the location. Note that we rely on the fact that no GC can occur between + ;; here and the card table update we may perform below. + ALTERNATE_ENTRY $WriteBarrierFunctionAvLocation + IF "$REFREG" == "R1" + ALTERNATE_ENTRY RhpAssignRefAVLocation + ENDIF + str $REFREG, [$DESTREG] + + INSERT_UNCHECKED_WRITE_BARRIER_CORE $DESTREG, $REFREG + + bx lr + + MEND + +;; One day we might have write barriers for all the possible argument registers but for now we have +;; just one write barrier that assumes the input register is R1. + DEFINE_CHECKED_WRITE_BARRIER R0, R1 + + DEFINE_UNCHECKED_WRITE_BARRIER R0, R1 + +;; Interlocked operation helpers where the location is an objectref, thus requiring a GC write barrier upon +;; successful updates. + +;; WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular: +;; - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at RhpCheckedLockCmpXchgAVLocation +;; - Function "UnwindWriteBarrierToCaller" assumes no registers where pushed and LR contains the return address + + ;; Interlocked compare exchange on objectref. + ;; + ;; On entry: + ;; r0: pointer to objectref + ;; r1: exchange value + ;; r2: comparand + ;; + ;; On exit: + ;; r0: original value of objectref + ;; r1,r2,r3,r12: trashed + ;; + LEAF_ENTRY RhpCheckedLockCmpXchg + + ;; To implement our chosen memory model for ARM we insert a memory barrier at GC write barriers. This + ;; barrier must occur before the object reference update, so we have to do it unconditionally even + ;; though the update may fail below. + dmb + +CX_Retry + ;; Check location value is what we expect. + ALTERNATE_ENTRY RhpCheckedLockCmpXchgAVLocation + ldrex r3, [r0] + cmp r2, r3 + bne CX_NoUpdate + + ;; Current value matches comparand, attempt to update with the new value. + strex r3, r1, [r0] + cmp r3, #0 + bne CX_Retry ; Retry the operation if another write beat us there + + ;; We've successfully updated the value of the objectref so now we need a GC write barrier. + ;; The following barrier code takes the destination in r0 and the value in r1 so the arguments are + ;; already correctly set up. + + INSERT_CHECKED_WRITE_BARRIER_CORE r0, r1 + + ;; The original value was equal to the comparand which is still in r2 so we can return that. + mov r0, r2 + bx lr + +CX_NoUpdate + ;; Location value didn't match comparand, return that value. + mov r0, r3 + bx lr + + LEAF_END RhpCheckedLockCmpXchg + + ;; Interlocked exchange on objectref. + ;; + ;; On entry: + ;; r0: pointer to objectref + ;; r1: exchange value + ;; + ;; On exit: + ;; r0: original value of objectref + ;; r1,r2,r3,r12: trashed + ;; + + ;; WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular: + ;; - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen within at RhpCheckedXchgAVLocation + ;; - Function "UnwindWriteBarrierToCaller" assumes no registers where pushed and LR contains the return address + + LEAF_ENTRY RhpCheckedXchg + + ;; To implement our chosen memory model for ARM we insert a memory barrier at GC write barriers. This + ;; barrier must occur before the object reference update. + dmb + +X_Retry + ALTERNATE_ENTRY RhpCheckedXchgAVLocation + ;; Read the original contents of the location. + ldrex r2, [r0] + + ;; Attempt to update with the new value. + strex r3, r1, [r0] + cmp r3, #0 + bne X_Retry ; Retry the operation if another write beat us there + + ;; We've successfully updated the value of the objectref so now we need a GC write barrier. + ;; The following barrier code takes the destination in r0 and the value in r1 so the arguments are + ;; already correctly set up. + + INSERT_CHECKED_WRITE_BARRIER_CORE r0, r1 + + ;; The original value is currently in r2. We need to return it in r0. + mov r0, r2 + bx lr + + LEAF_END RhpCheckedXchg + + end diff --git a/src/coreclr/src/nativeaot/Runtime/arm64/AllocFast.S b/src/coreclr/src/nativeaot/Runtime/arm64/AllocFast.S new file mode 100644 index 0000000000000..876f2dfbcb80d --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/arm64/AllocFast.S @@ -0,0 +1,4 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// TODO: Implement diff --git a/src/coreclr/src/nativeaot/Runtime/arm64/AllocFast.asm b/src/coreclr/src/nativeaot/Runtime/arm64/AllocFast.asm new file mode 100644 index 0000000000000..6571bda58c401 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/arm64/AllocFast.asm @@ -0,0 +1,290 @@ +;; Licensed to the .NET Foundation under one or more agreements. +;; The .NET Foundation licenses this file to you under the MIT license. + +#include "AsmMacros.h" + + TEXTAREA + +;; Allocate non-array, non-finalizable object. If the allocation doesn't fit into the current thread's +;; allocation context then automatically fallback to the slow allocation path. +;; x0 == EEType + LEAF_ENTRY RhpNewFast + + ;; x1 = GetThread(), TRASHES x2 + INLINE_GETTHREAD x1, x2 + + ;; + ;; x0 contains EEType pointer + ;; + ldr w2, [x0, #OFFSETOF__EEType__m_uBaseSize] + + ;; + ;; x0: EEType pointer + ;; x1: Thread pointer + ;; x2: base size + ;; + + ;; Load potential new object address into x12. + ldr x12, [x1, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] + + ;; Determine whether the end of the object would lie outside of the current allocation context. If so, + ;; we abandon the attempt to allocate the object directly and fall back to the slow helper. + add x2, x2, x12 + ldr x13, [x1, #OFFSETOF__Thread__m_alloc_context__alloc_limit] + cmp x2, x13 + bhi RhpNewFast_RarePath + + ;; Update the alloc pointer to account for the allocation. + str x2, [x1, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] + + ;; Set the new object's EEType pointer + str x0, [x12, #OFFSETOF__Object__m_pEEType] + + mov x0, x12 + ret + +RhpNewFast_RarePath + mov x1, #0 + b RhpNewObject + LEAF_END RhpNewFast + + INLINE_GETTHREAD_CONSTANT_POOL + +;; Allocate non-array object with finalizer. +;; x0 == EEType + LEAF_ENTRY RhpNewFinalizable + mov x1, #GC_ALLOC_FINALIZE + b RhpNewObject + LEAF_END RhpNewFinalizable + +;; Allocate non-array object. +;; x0 == EEType +;; x1 == alloc flags + NESTED_ENTRY RhpNewObject + + PUSH_COOP_PINVOKE_FRAME x3 + + ;; x3: transition frame + + ;; Preserve the EEType in x19 + mov x19, x0 + + ldr w2, [x0, #OFFSETOF__EEType__m_uBaseSize] + + ;; Call the rest of the allocation helper. + ;; void* RhpGcAlloc(EEType *pEEType, UInt32 uFlags, UIntNative cbSize, void * pTransitionFrame) + bl RhpGcAlloc + + ;; Set the new object's EEType pointer on success. + cbz x0, NewOutOfMemory + str x19, [x0, #OFFSETOF__Object__m_pEEType] + + ;; If the object is bigger than RH_LARGE_OBJECT_SIZE, we must publish it to the BGC + ldr w1, [x19, #OFFSETOF__EEType__m_uBaseSize] + movk x2, #(RH_LARGE_OBJECT_SIZE & 0xFFFF) + movk x2, #(RH_LARGE_OBJECT_SIZE >> 16), lsl #16 + cmp x1, x2 + blo New_SkipPublish + + ;; x0: object + ;; x1: already contains object size + bl RhpPublishObject ;; x0: this function returns the object that was passed-in + +New_SkipPublish + + POP_COOP_PINVOKE_FRAME + EPILOG_RETURN + +NewOutOfMemory + ;; This is the OOM failure path. We're going to tail-call to a managed helper that will throw + ;; an out of memory exception that the caller of this allocator understands. + + mov x0, x19 ; EEType pointer + mov x1, 0 ; Indicate that we should throw OOM. + + POP_COOP_PINVOKE_FRAME + EPILOG_NOP b RhExceptionHandling_FailedAllocation + + NESTED_END RhpNewObject + +;; Allocate a string. +;; x0 == EEType +;; x1 == element/character count + LEAF_ENTRY RhNewString + ;; Make sure computing the overall allocation size won't overflow + ;; TODO: this should be actually MAX_STRING_LENGTH + mov x2, 0x7FFFFFFF + cmp x1, x2 + bhi StringSizeOverflow + + ;; Compute overall allocation size (align(base size + (element size * elements), 8)). + mov w2, #STRING_COMPONENT_SIZE + mov x3, #(STRING_BASE_SIZE + 7) + umaddl x2, w1, w2, x3 ; x2 = w1 * w2 + x3 + and x2, x2, #-8 + + ; x0 == EEType + ; x1 == element count + ; x2 == string size + + INLINE_GETTHREAD x3, x5 + + ;; Load potential new object address into x12. + ldr x12, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] + + ;; Determine whether the end of the object would lie outside of the current allocation context. If so, + ;; we abandon the attempt to allocate the object directly and fall back to the slow helper. + add x2, x2, x12 + ldr x12, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_limit] + cmp x2, x12 + bhi RhpNewArrayRare + + ;; Reload new object address into r12. + ldr x12, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] + + ;; Update the alloc pointer to account for the allocation. + str x2, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] + + ;; Set the new object's EEType pointer and element count. + str x0, [x12, #OFFSETOF__Object__m_pEEType] + str x1, [x12, #OFFSETOF__Array__m_Length] + + ;; Return the object allocated in x0. + mov x0, x12 + + ret + +StringSizeOverflow + ; We get here if the length of the final string object can't be represented as an unsigned + ; 32-bit value. We're going to tail-call to a managed helper that will throw + ; an OOM exception that the caller of this allocator understands. + + ; x0 holds EEType pointer already + mov x1, #1 ; Indicate that we should throw OverflowException + b RhExceptionHandling_FailedAllocation + LEAF_END RhNewString + + INLINE_GETTHREAD_CONSTANT_POOL + + +;; Allocate one dimensional, zero based array (SZARRAY). +;; x0 == EEType +;; x1 == element count + LEAF_ENTRY RhpNewArray + + ;; We want to limit the element count to the non-negative 32-bit int range. + ;; If the element count is <= 0x7FFFFFFF, no overflow is possible because the component + ;; size is <= 0xffff (it's an unsigned 16-bit value), and the base size for the worst + ;; case (32 dimensional MdArray) is less than 0xffff, and thus the product fits in 64 bits. + mov x2, #0x7FFFFFFF + cmp x1, x2 + bhi ArraySizeOverflow + + ldrh w2, [x0, #OFFSETOF__EEType__m_usComponentSize] + umull x2, w1, w2 + ldr w3, [x0, #OFFSETOF__EEType__m_uBaseSize] + add x2, x2, x3 + add x2, x2, #7 + and x2, x2, #-8 + + ; x0 == EEType + ; x1 == element count + ; x2 == array size + + INLINE_GETTHREAD x3, x5 + + ;; Load potential new object address into x12. + ldr x12, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] + + ;; Determine whether the end of the object would lie outside of the current allocation context. If so, + ;; we abandon the attempt to allocate the object directly and fall back to the slow helper. + add x2, x2, x12 + ldr x12, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_limit] + cmp x2, x12 + bhi RhpNewArrayRare + + ;; Reload new object address into x12. + ldr x12, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] + + ;; Update the alloc pointer to account for the allocation. + str x2, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] + + ;; Set the new object's EEType pointer and element count. + str x0, [x12, #OFFSETOF__Object__m_pEEType] + str x1, [x12, #OFFSETOF__Array__m_Length] + + ;; Return the object allocated in r0. + mov x0, x12 + + ret + +ArraySizeOverflow + ; We get here if the size of the final array object can't be represented as an unsigned + ; 32-bit value. We're going to tail-call to a managed helper that will throw + ; an overflow exception that the caller of this allocator understands. + + ; x0 holds EEType pointer already + mov x1, #1 ; Indicate that we should throw OverflowException + b RhExceptionHandling_FailedAllocation + LEAF_END RhpNewArray + + INLINE_GETTHREAD_CONSTANT_POOL + +;; Allocate one dimensional, zero based array (SZARRAY) using the slow path that calls a runtime helper. +;; x0 == EEType +;; x1 == element count +;; x2 == array size + Thread::m_alloc_context::alloc_ptr +;; x3 == Thread + NESTED_ENTRY RhpNewArrayRare + + ; Recover array size by subtracting the alloc_ptr from x2. + PROLOG_NOP ldr x12, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] + PROLOG_NOP sub x2, x2, x12 + + PUSH_COOP_PINVOKE_FRAME x3 + + ; Preserve data we'll need later into the callee saved registers + mov x19, x0 ; Preserve EEType + mov x20, x1 ; Preserve element count + mov x21, x2 ; Preserve array size + + mov x1, #0 + + ;; void* RhpGcAlloc(EEType *pEEType, UInt32 uFlags, UIntNative cbSize, void * pTransitionFrame) + bl RhpGcAlloc + + ; Set the new object's EEType pointer and length on success. + cbz x0, ArrayOutOfMemory + + ; Success, set the array's type and element count in the new object. + str x19, [x0, #OFFSETOF__Object__m_pEEType] + str x20, [x0, #OFFSETOF__Array__m_Length] + + ;; If the object is bigger than RH_LARGE_OBJECT_SIZE, we must publish it to the BGC + movk x2, #(RH_LARGE_OBJECT_SIZE & 0xFFFF) + movk x2, #(RH_LARGE_OBJECT_SIZE >> 16), lsl #16 + cmp x21, x2 + blo NewArray_SkipPublish + + ;; x0 = newly allocated array. x1 = size + mov x1, x21 + bl RhpPublishObject + +NewArray_SkipPublish + + POP_COOP_PINVOKE_FRAME + EPILOG_RETURN + +ArrayOutOfMemory + ;; This is the OOM failure path. We're going to tail-call to a managed helper that will throw + ;; an out of memory exception that the caller of this allocator understands. + + mov x0, x19 ; EEType Pointer + mov x1, 0 ; Indicate that we should throw OOM. + + POP_COOP_PINVOKE_FRAME + EPILOG_NOP b RhExceptionHandling_FailedAllocation + + NESTED_END RhpNewArrayRare + + END diff --git a/src/coreclr/src/nativeaot/Runtime/arm64/AsmMacros.h b/src/coreclr/src/nativeaot/Runtime/arm64/AsmMacros.h new file mode 100644 index 0000000000000..950d8befc6ab0 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/arm64/AsmMacros.h @@ -0,0 +1,316 @@ +;; Licensed to the .NET Foundation under one or more agreements. +;; The .NET Foundation licenses this file to you under the MIT license. + +;; OS provided macros +#include +;; generated by the build from AsmOffsets.cpp +#include "AsmOffsets.inc" + +;; +;; CONSTANTS -- INTEGER +;; +TSF_Attached equ 0x01 +TSF_SuppressGcStress equ 0x08 +TSF_DoNotTriggerGc equ 0x10 +TSF_SuppressGcStress__OR__TSF_DoNotTriggerGC equ 0x18 + +;; Bit position for the flags above, to be used with tbz/tbnz instructions +TSF_Attached_Bit equ 0 +TSF_SuppressGcStress_Bit equ 3 +TSF_DoNotTriggerGc_Bit equ 4 + +;; GC type flags +GC_ALLOC_FINALIZE equ 1 +GC_ALLOC_ALIGN8_BIAS equ 4 +GC_ALLOC_ALIGN8 equ 8 + +;; Note: these must match the defs in PInvokeTransitionFrameFlags defined in rhbinder.h +PTFF_SAVE_X19 equ 0x00000001 +PTFF_SAVE_X20 equ 0x00000002 +PTFF_SAVE_X21 equ 0x00000004 +PTFF_SAVE_X22 equ 0x00000008 +PTFF_SAVE_X23 equ 0x00000010 +PTFF_SAVE_X24 equ 0x00000020 +PTFF_SAVE_X25 equ 0x00000040 +PTFF_SAVE_X26 equ 0x00000080 +PTFF_SAVE_X27 equ 0x00000100 +PTFF_SAVE_X28 equ 0x00000200 +PTFF_SAVE_SP equ 0x00000400 +PTFF_SAVE_ALL_PRESERVED equ 0x000003FF ;; NOTE: x19-x28 +PTFF_SAVE_X0 equ 0x00000800 +PTFF_SAVE_X1 equ 0x00001000 +PTFF_SAVE_X2 equ 0x00002000 +PTFF_SAVE_X3 equ 0x00004000 +PTFF_SAVE_X4 equ 0x00008000 +PTFF_SAVE_X5 equ 0x00010000 +PTFF_SAVE_X6 equ 0x00020000 +PTFF_SAVE_X7 equ 0x00040000 +PTFF_SAVE_X8 equ 0x00080000 +PTFF_SAVE_X9 equ 0x00100000 +PTFF_SAVE_X10 equ 0x00200000 +PTFF_SAVE_X11 equ 0x00400000 +PTFF_SAVE_X12 equ 0x00800000 +PTFF_SAVE_X13 equ 0x01000000 +PTFF_SAVE_X14 equ 0x02000000 +PTFF_SAVE_X15 equ 0x04000000 +PTFF_SAVE_X16 equ 0x08000000 +PTFF_SAVE_X17 equ 0x10000000 +PTFF_SAVE_X18 equ 0x20000000 +PTFF_SAVE_ALL_SCRATCH equ 0x3FFFF800 ;; NOTE: X0-X18 +PTFF_SAVE_FP equ 0x40000000 +PTFF_SAVE_LR equ 0x80000000 + +;; NOTE: The following flags represent the upper 32 bits of the PInvokeTransitionFrameFlags. +;; Since the assembler doesn't support 64 bit constants in any way, we need to define just +;; the upper bits here +PTFF_X0_IS_GCREF_HI equ 0x00000001 ;; iff PTFF_SAVE_X0 : set->x0 is Object, clear->x0 is scalar +PTFF_X0_IS_BYREF_HI equ 0x00000002 ;; iff PTFF_SAVE_X0 : set->x0 is ByRef, clear->x0 is Object or scalar +PTFF_X1_IS_GCREF_HI equ 0x00000004 ;; iff PTFF_SAVE_X1 : set->x1 is Object, clear->x1 is scalar +PTFF_X1_IS_BYREF_HI equ 0x00000008 ;; iff PTFF_SAVE_X1 : set->x1 is ByRef, clear->x1 is Object or scalar +PTFF_THREAD_ABORT_HI equ 0x00000010 ;; indicates that ThreadAbortException should be thrown when returning from the transition + +;; Bit position for the flags above, to be used with tbz / tbnz instructions +PTFF_THREAD_ABORT_BIT equ 36 + +;; These must match the TrapThreadsFlags enum +TrapThreadsFlags_None equ 0 +TrapThreadsFlags_AbortInProgress equ 1 +TrapThreadsFlags_TrapThreads equ 2 + +;; Bit position for the flags above, to be used with tbz / tbnz instructions +TrapThreadsFlags_AbortInProgress_Bit equ 0 +TrapThreadsFlags_TrapThreads_Bit equ 1 + +;; This must match HwExceptionCode.STATUS_REDHAWK_THREAD_ABORT +STATUS_REDHAWK_THREAD_ABORT equ 0x43 + +;; +;; Rename fields of nested structs +;; +OFFSETOF__Thread__m_alloc_context__alloc_ptr equ OFFSETOF__Thread__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr +OFFSETOF__Thread__m_alloc_context__alloc_limit equ OFFSETOF__Thread__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_limit + +;; +;; IMPORTS +;; + EXTERN RhpGcAlloc + EXTERN RhpPublishObject + EXTERN RhExceptionHandling_FailedAllocation + EXTERN RhDebugBreak + EXTERN RhpWaitForSuspend2 + EXTERN RhpWaitForGC2 + EXTERN RhpReversePInvokeAttachOrTrapThread2 + EXTERN RhpCalculateStackTraceWorker + EXTERN RhThrowHwEx + EXTERN RhThrowEx + EXTERN RhRethrow + + EXTERN RhpTrapThreads + EXTERN g_lowest_address + EXTERN g_highest_address + EXTERN g_ephemeral_low + EXTERN g_ephemeral_high + EXTERN g_card_table + + +;; ----------------------------------------------------------------------------- +;; Macro used to assign an alternate name to a symbol containing characters normally disallowed in a symbol +;; name (e.g. C++ decorated names). + MACRO + SETALIAS $name, $symbol + GBLS $name +$name SETS "|$symbol|" + MEND + +;;----------------------------------------------------------------------------- +;; Macro for loading a 64-bit constant by a minimal number of instructions +;; Since the asssembles doesn't support 64 bit arithmetics in expressions, +;; the value is passed in as lo, hi pair. + MACRO + MOVL64 $Reg, $ConstantLo, $ConstantHi + + LCLS MovInstr +MovInstr SETS "movz" + + IF ((($ConstantHi):SHR:16):AND:0xffff) != 0 + $MovInstr $Reg, #((($Constant):SHR:16):AND:0xffff), lsl #48 +MovInstr SETS "movk" + ENDIF + + IF (($ConstantHi):AND:0xffff) != 0 + $MovInstr $Reg, #(($ConstantHi):AND:0xffff), lsl #32 +MovInstr SETS "movk" + ENDIF + + IF ((($ConstantLo):SHR:16):AND:0xffff) != 0 + $MovInstr $Reg, #((($ConstantLo):SHR:16):AND:0xffff), lsl #16 +MovInstr SETS "movk" + ENDIF + + $MovInstr $Reg, #(($ConstantLo):AND:0xffff) + MEND + +;; ----------------------------------------------------------------------------- +;; +;; Macro to export a pointer to an address inside a stub as a 64-bit variable +;; + MACRO + EXPORT_POINTER_TO_ADDRESS $Name + LCLS CodeLbl +CodeLbl SETS "$Name":CC:"Lbl" +$CodeLbl + AREA | .rdata | , ALIGN = 8, DATA, READONLY +$Name + DCQ $CodeLbl + EXPORT $Name + TEXTAREA + ROUT + + MEND + +;; ----------------------------------------------------------------------------- +;; +;; Macro for indicating an alternate entry point into a function. +;; + + MACRO + LABELED_RETURN_ADDRESS $ReturnAddressName + + ; export the return address name, but do not perturb the code by forcing alignment +$ReturnAddressName + EXPORT $ReturnAddressName + + ; flush any pending literal pool stuff + ROUT + + MEND + +;; ----------------------------------------------------------------------------- +;; +;; Macro to get a pointer to the Thread* object for the currently executing thread +;; + +__tls_array equ 0x58 ;; offsetof(TEB, ThreadLocalStoragePointer) + + EXTERN _tls_index + + GBLS __SECTIONREL_tls_CurrentThread +__SECTIONREL_tls_CurrentThread SETS "SECTIONREL_tls_CurrentThread" + + MACRO + INLINE_GETTHREAD $destReg, $trashReg + + ;; The following macro variables are just some assembler magic to get the name of the 32-bit version + ;; of $trashReg. It does it by string manipulation. Replaces something like x3 with w3. + LCLS TrashRegister32Bit +TrashRegister32Bit SETS "$trashReg" +TrashRegister32Bit SETS "w":CC:("$TrashRegister32Bit":RIGHT:((:LEN:TrashRegister32Bit) - 1)) + + ldr $trashReg, =_tls_index + ldr $TrashRegister32Bit, [$trashReg] + ldr $destReg, [xpr, #__tls_array] + ldr $destReg, [$destReg, $trashReg lsl #3] + ldr $trashReg, =$__SECTIONREL_tls_CurrentThread + ldr $trashReg, [$trashReg] + add $destReg, $destReg, $trashReg + MEND + + ;; INLINE_GETTHREAD_CONSTANT_POOL macro has to be used after the last function in the .asm file that used + ;; INLINE_GETTHREAD. Optionally, it can be also used after any function that used INLINE_GETTHREAD + ;; to improve density, or to reduce distance betweeen the constant pool and its use. + MACRO + INLINE_GETTHREAD_CONSTANT_POOL + EXTERN tls_CurrentThread + + ;; Section relocs are 32 bits. Using an extra DCD initialized to zero for 8-byte alignment. +$__SECTIONREL_tls_CurrentThread + DCD tls_CurrentThread + RELOC 8, tls_CurrentThread ;; SECREL + DCD 0 + +__SECTIONREL_tls_CurrentThread SETS "$__SECTIONREL_tls_CurrentThread":CC:"_" + + MEND + + MACRO + INLINE_THREAD_UNHIJACK $threadReg, $trashReg1, $trashReg2 + ;; + ;; Thread::Unhijack() + ;; + ldr $trashReg1, [$threadReg, #OFFSETOF__Thread__m_pvHijackedReturnAddress] + cbz $trashReg1, %ft0 + + ldr $trashReg2, [$threadReg, #OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation] + str $trashReg1, [$trashReg2] + str xzr, [$threadReg, #OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation] + str xzr, [$threadReg, #OFFSETOF__Thread__m_pvHijackedReturnAddress] +0 + MEND + +;; ----------------------------------------------------------------------------- +;; +;; Macro used from unmanaged helpers called from managed code where the helper does not transition immediately +;; into pre-emptive mode but may cause a GC and thus requires the stack is crawlable. This is typically the +;; case for helpers that meddle in GC state (e.g. allocation helpers) where the code must remain in +;; cooperative mode since it handles object references and internal GC state directly but a garbage collection +;; may be inevitable. In these cases we need to be able to transition to pre-meptive mode deep within the +;; unmanaged code but still be able to initialize the stack iterator at the first stack frame which may hold +;; interesting GC references. In all our helper cases this corresponds to the most recent managed frame (e.g. +;; the helper's caller). +;; +;; This macro builds a frame describing the current state of managed code. +;; +;; INVARIANTS +;; - The macro assumes it defines the method prolog, it should typically be the first code in a method and +;; certainly appear before any attempt to alter the stack pointer. +;; - This macro uses trashReg (after its initial value has been saved in the frame) and upon exit trashReg +;; will contain the address of transition frame. +;; + +DEFAULT_FRAME_SAVE_FLAGS equ PTFF_SAVE_ALL_PRESERVED + PTFF_SAVE_SP + + MACRO + PUSH_COOP_PINVOKE_FRAME $trashReg + + PROLOG_SAVE_REG_PAIR fp, lr, #-0x80! ;; Push down stack pointer and store FP and LR + + ;; 0x10 bytes reserved for Thread* and flags + + ;; Save callee saved registers + PROLOG_SAVE_REG_PAIR x19, x20, #0x20 + PROLOG_SAVE_REG_PAIR x21, x22, #0x30 + PROLOG_SAVE_REG_PAIR x23, x24, #0x40 + PROLOG_SAVE_REG_PAIR x25, x26, #0x50 + PROLOG_SAVE_REG_PAIR x27, x28, #0x60 + + ;; Save the value of SP before stack allocation to the last slot in the frame (slot #15) + add $trashReg, sp, #0x80 + str $trashReg, [sp, #0x70] + + ;; Record the bitmask of saved registers in the frame (slot #3) + mov $trashReg, #DEFAULT_FRAME_SAVE_FLAGS + str $trashReg, [sp, #0x18] + + mov $trashReg, sp + MEND + +;; Pop the frame and restore register state preserved by PUSH_COOP_PINVOKE_FRAME + MACRO + POP_COOP_PINVOKE_FRAME + + EPILOG_RESTORE_REG_PAIR x19, x20, #0x20 + EPILOG_RESTORE_REG_PAIR x21, x22, #0x30 + EPILOG_RESTORE_REG_PAIR x23, x24, #0x40 + EPILOG_RESTORE_REG_PAIR x25, x26, #0x50 + EPILOG_RESTORE_REG_PAIR x27, x28, #0x60 + EPILOG_RESTORE_REG_PAIR fp, lr, #0x80! + MEND + + +#ifdef FEATURE_GC_STRESS + SETALIAS THREAD__HIJACKFORGCSTRESS, ?HijackForGcStress@Thread@@SAXPEAUPAL_LIMITED_CONTEXT@@@Z + SETALIAS REDHAWKGCINTERFACE__STRESSGC, ?StressGc@RedhawkGCInterface@@SAXXZ + + EXTERN $REDHAWKGCINTERFACE__STRESSGC + EXTERN $THREAD__HIJACKFORGCSTRESS +#endif ;; FEATURE_GC_STRESS diff --git a/src/coreclr/src/nativeaot/Runtime/arm64/AsmOffsetsCpu.h b/src/coreclr/src/nativeaot/Runtime/arm64/AsmOffsetsCpu.h new file mode 100644 index 0000000000000..6e59ade597ad4 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/arm64/AsmOffsetsCpu.h @@ -0,0 +1,65 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// +// This file is used by AsmOffsets.h to validate that our +// assembly-code offsets always match their C++ counterparts. +// +// NOTE: the offsets MUST be in hex notation WITHOUT the 0x prefix + +PLAT_ASM_SIZEOF(290, ExInfo) +PLAT_ASM_OFFSET(0, ExInfo, m_pPrevExInfo) +PLAT_ASM_OFFSET(8, ExInfo, m_pExContext) +PLAT_ASM_OFFSET(10, ExInfo, m_exception) +PLAT_ASM_OFFSET(18, ExInfo, m_kind) +PLAT_ASM_OFFSET(19, ExInfo, m_passNumber) +PLAT_ASM_OFFSET(1c, ExInfo, m_idxCurClause) +PLAT_ASM_OFFSET(20, ExInfo, m_frameIter) +PLAT_ASM_OFFSET(288, ExInfo, m_notifyDebuggerSP) + +PLAT_ASM_OFFSET(0, PInvokeTransitionFrame, m_FramePointer) +PLAT_ASM_OFFSET(8, PInvokeTransitionFrame, m_RIP) +PLAT_ASM_OFFSET(10, PInvokeTransitionFrame, m_pThread) +PLAT_ASM_OFFSET(18, PInvokeTransitionFrame, m_Flags) +PLAT_ASM_OFFSET(20, PInvokeTransitionFrame, m_PreservedRegs) + +PLAT_ASM_SIZEOF(268, StackFrameIterator) +PLAT_ASM_OFFSET(10, StackFrameIterator, m_FramePointer) +PLAT_ASM_OFFSET(18, StackFrameIterator, m_ControlPC) +PLAT_ASM_OFFSET(20, StackFrameIterator, m_RegDisplay) +PLAT_ASM_OFFSET(260, StackFrameIterator, m_OriginalControlPC) + +PLAT_ASM_SIZEOF(C0, PAL_LIMITED_CONTEXT) + +PLAT_ASM_OFFSET(0, PAL_LIMITED_CONTEXT, FP) +PLAT_ASM_OFFSET(8, PAL_LIMITED_CONTEXT, LR) +PLAT_ASM_OFFSET(10, PAL_LIMITED_CONTEXT, X0) +PLAT_ASM_OFFSET(18, PAL_LIMITED_CONTEXT, X1) +PLAT_ASM_OFFSET(20, PAL_LIMITED_CONTEXT, X19) +PLAT_ASM_OFFSET(28, PAL_LIMITED_CONTEXT, X20) +PLAT_ASM_OFFSET(30, PAL_LIMITED_CONTEXT, X21) +PLAT_ASM_OFFSET(38, PAL_LIMITED_CONTEXT, X22) +PLAT_ASM_OFFSET(40, PAL_LIMITED_CONTEXT, X23) +PLAT_ASM_OFFSET(48, PAL_LIMITED_CONTEXT, X24) +PLAT_ASM_OFFSET(50, PAL_LIMITED_CONTEXT, X25) +PLAT_ASM_OFFSET(58, PAL_LIMITED_CONTEXT, X26) +PLAT_ASM_OFFSET(60, PAL_LIMITED_CONTEXT, X27) +PLAT_ASM_OFFSET(68, PAL_LIMITED_CONTEXT, X28) +PLAT_ASM_OFFSET(70, PAL_LIMITED_CONTEXT, SP) +PLAT_ASM_OFFSET(78, PAL_LIMITED_CONTEXT, IP) + +PLAT_ASM_SIZEOF(150, REGDISPLAY) +PLAT_ASM_OFFSET(f8, REGDISPLAY, SP) + +PLAT_ASM_OFFSET(98, REGDISPLAY, pX19) +PLAT_ASM_OFFSET(a0, REGDISPLAY, pX20) +PLAT_ASM_OFFSET(a8, REGDISPLAY, pX21) +PLAT_ASM_OFFSET(b0, REGDISPLAY, pX22) +PLAT_ASM_OFFSET(b8, REGDISPLAY, pX23) +PLAT_ASM_OFFSET(c0, REGDISPLAY, pX24) +PLAT_ASM_OFFSET(c8, REGDISPLAY, pX25) +PLAT_ASM_OFFSET(d0, REGDISPLAY, pX26) +PLAT_ASM_OFFSET(d8, REGDISPLAY, pX27) +PLAT_ASM_OFFSET(e0, REGDISPLAY, pX28) +PLAT_ASM_OFFSET(e8, REGDISPLAY, pFP) +PLAT_ASM_OFFSET(110, REGDISPLAY, D) diff --git a/src/coreclr/src/nativeaot/Runtime/arm64/CallDescrWorker.S b/src/coreclr/src/nativeaot/Runtime/arm64/CallDescrWorker.S new file mode 100644 index 0000000000000..876f2dfbcb80d --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/arm64/CallDescrWorker.S @@ -0,0 +1,4 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// TODO: Implement diff --git a/src/coreclr/src/nativeaot/Runtime/arm64/CallDescrWorker.asm b/src/coreclr/src/nativeaot/Runtime/arm64/CallDescrWorker.asm new file mode 100644 index 0000000000000..2da05b7a0c538 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/arm64/CallDescrWorker.asm @@ -0,0 +1,143 @@ +;; Licensed to the .NET Foundation under one or more agreements. +;; The .NET Foundation licenses this file to you under the MIT license. + +#include "AsmMacros.h" + + TEXTAREA + +;;----------------------------------------------------------------------------- +;; This helper routine enregisters the appropriate arguments and makes the +;; actual call. +;; +;; INPUT: x0: pointer to CallDescrData struct +;; +;;----------------------------------------------------------------------------- +;;void RhCallDescrWorker(CallDescrData * pCallDescrData); + NESTED_ENTRY RhCallDescrWorker + + PROLOG_SAVE_REG_PAIR fp, lr, #-32! + PROLOG_SAVE_REG_PAIR x19, x20, #16 + + ;; Save the value of SP before we start pushing any arguments + mov x20, sp + + mov x19, x0 ; save pCallDescrData in x19 + + ldr w1, [x19, #OFFSETOF__CallDescrData__numStackSlots] + cbz w1, Ldonestack + + ;; Add frame padding to ensure frame size is a multiple of 16 (a requirement of the OS ABI). + ;; We push two registers (above) and numStackSlots arguments (below). If this comes to an odd number + ;; of slots we must pad with another. This simplifies to "if the low bit of numStackSlots is set, + ;; extend the stack another eight bytes". + ldr x0, [x19, #OFFSETOF__CallDescrData__pSrc] + add x0, x0, x1 lsl #3 ; pSrcEnd=pSrc+8*numStackSlots + ands x2, x1, #1 + beq Lstackloop + + ;; This loop copies numStackSlots words + ;; from [pSrcEnd-8,pSrcEnd-16,...] to [sp-8,sp-16,...] + + ;; Pad and store one stack slot as number of slots are odd + ldr x4, [x0,#-8]! + str x4, [sp,#-16]! + subs x1, x1, #1 + beq Ldonestack +Lstackloop + ldp x2, x4, [x0,#-16]! + stp x2, x4, [sp,#-16]! + subs x1, x1, #2 + bne Lstackloop +Ldonestack + + ;; If FP arguments are supplied in registers (x9 != NULL) then initialize all of them from the pointer + ;; given in x9. + ldr x9, [x19, #OFFSETOF__CallDescrData__pFloatArgumentRegisters] + cbz x9, LNoFloatingPoint + ldp d0, d1, [x9] + ldp d2, d3, [x9, #16] + ldp d4, d5, [x9, #32] + ldp d6, d7, [x9, #48] +LNoFloatingPoint + + ;; Copy [pArgumentRegisters, ..., pArgumentRegisters + 64] + ;; into x0, ..., x7, x8 + + ldr x9, [x19, #OFFSETOF__CallDescrData__pArgumentRegisters] + ldp x0, x1, [x9] + ldp x2, x3, [x9, #16] + ldp x4, x5, [x9, #32] + ldp x6, x7, [x9, #48] + ldr x8, [x9, #64] + + ;; call pTarget + ldr x9, [x19, #OFFSETOF__CallDescrData__pTarget] + blr x9 + + EXPORT_POINTER_TO_ADDRESS PointerToReturnFromCallDescrThunk + + ;; Symbol used to identify thunk call to managed function so the special + ;; case unwinder can unwind through this function. Sadly we cannot directly + ;; export this symbol right now because it confuses DIA unwinder to believe + ;; it's the beginning of a new method, therefore we export the address + ;; of an auxiliary variable holding the address instead. + + ldr w3, [x19, #OFFSETOF__CallDescrData__fpReturnSize] + + ;; Unlike desktop returnValue is a pointer to a return buffer, not the buffer itself + ldr x19, [x19, #OFFSETOF__CallDescrData__pReturnBuffer] + + ;; Int return case + cbz w3, LIntReturn + + ;; Float return case + cmp w3, #4 + beq LFloatOrDoubleReturn + + ;; Double return case + cmp w3, #8 + bne LCheckHFAReturn + +LFloatOrDoubleReturn + str d0, [x19] + b LReturnDone + +LCheckHFAReturn + cmp w3, #16 + beq LFloatOrDoubleHFAReturn + cmp w3, #32 + beq LFloatOrDoubleHFAReturn + b LNoHFAReturn + +LFloatOrDoubleHFAReturn + ;;Single/Double HFAReturn return case + stp d0, d1, [x19, #00] + stp d2, d3, [x19, #16] + b LReturnDone + +LNoHFAReturn + + EMIT_BREAKPOINT ; Unreachable + +LIntReturn + ;; Save return value(s) into retbuf for int + stp x0, x1, [x19] + +LReturnDone + +#ifdef _DEBUG + ;; Trash the floating point registers to ensure that the HFA return values + ;; won't survive by accident + ldp d0, d1, [sp] + ldp d2, d3, [sp, #16] +#endif + ;; Restore the value of SP + mov sp, x20 + + EPILOG_RESTORE_REG_PAIR x19, x20, #16 + EPILOG_RESTORE_REG_PAIR fp, lr, #32! + EPILOG_RETURN + + NESTED_END RhCallDescrWorker + + END diff --git a/src/coreclr/src/nativeaot/Runtime/arm64/CallingConventionConverterHelpers.S b/src/coreclr/src/nativeaot/Runtime/arm64/CallingConventionConverterHelpers.S new file mode 100644 index 0000000000000..876f2dfbcb80d --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/arm64/CallingConventionConverterHelpers.S @@ -0,0 +1,4 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// TODO: Implement diff --git a/src/coreclr/src/nativeaot/Runtime/arm64/CallingConventionConverterHelpers.asm b/src/coreclr/src/nativeaot/Runtime/arm64/CallingConventionConverterHelpers.asm new file mode 100644 index 0000000000000..d826c1b908c87 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/arm64/CallingConventionConverterHelpers.asm @@ -0,0 +1,63 @@ +;; Licensed to the .NET Foundation under one or more agreements. +;; The .NET Foundation licenses this file to you under the MIT license. + +#include "ksarm64.h" + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; CallingConventionCoverter Helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +POINTER_SIZE equ 0x08 + +;; +;; Note: The "__jmpstub__" prefix is used to indicate to debugger +;; that it must step-through this stub when it encounters it while +;; stepping. +;; + + ;; + ;; void CallingConventionConverter_ReturnThunk() + ;; + LEAF_ENTRY CallingConventionConverter_ReturnThunk + ret + LEAF_END CallingConventionConverter_ReturnThunk + + ;; + ;; __jmpstub__CallingConventionConverter_CommonCallingStub + ;; + ;; struct CallingConventionConverter_CommonCallingStub_PointerData + ;; { + ;; void *ManagedCallConverterThunk; + ;; void *UniversalThunk; + ;; } + ;; + ;; struct CommonCallingStubInputData + ;; { + ;; ULONG_PTR CallingConventionId; + ;; CallingConventionConverter_CommonCallingStub_PointerData *commonData; // Only the ManagedCallConverterThunk field is used + ;; // However, it is specified just like other platforms, so the behavior of the common + ;; // calling stub is easier to debug + ;; } + ;; + ;; xip0 - Points at CommonCallingStubInputData + ;; + ;; + LEAF_ENTRY __jmpstub__CallingConventionConverter_CommonCallingStub + ldr xip1, [xip0] ; put CallingConventionId into xip1 as "parameter" to universal transition thunk + ldr xip0, [xip0, #POINTER_SIZE] ; get pointer to CallingConventionConverter_CommonCallingStub_PointerData into xip0 + ldr x12, [xip0, #POINTER_SIZE] ; get address of UniversalTransitionThunk (which we'll tailcall to later) + ldr xip0, [xip0] ; get address of ManagedCallConverterThunk (target for universal thunk to call) + br x12 + LEAF_END __jmpstub__CallingConventionConverter_CommonCallingStub + + ;; + ;; void CallingConventionConverter_GetStubs(IntPtr *returnVoidStub, IntPtr *returnIntegerStub, IntPtr *commonCallingStub) + ;; + LEAF_ENTRY CallingConventionConverter_GetStubs + ldr x12, =CallingConventionConverter_ReturnThunk + str x12, [x0] ;; ARM doesn't need different return thunks. + str x12, [x1] + ldr x12, =__jmpstub__CallingConventionConverter_CommonCallingStub + str x12, [x2] + ret + LEAF_END CallingConventionConverter_GetStubs + + END diff --git a/src/coreclr/src/nativeaot/Runtime/arm64/Dummies.asm b/src/coreclr/src/nativeaot/Runtime/arm64/Dummies.asm new file mode 100644 index 0000000000000..ea6c21fc810d0 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/arm64/Dummies.asm @@ -0,0 +1,18 @@ +;; Licensed to the .NET Foundation under one or more agreements. +;; The .NET Foundation licenses this file to you under the MIT license. + +#include "AsmMacros.h" + + TEXTAREA + + LEAF_ENTRY RhpLMod + DCW 0xdefe + bx lr + LEAF_END RhpLMod + + LEAF_ENTRY RhpLMul + DCW 0xdefe + bx lr + LEAF_END RhpLMul + + END diff --git a/src/coreclr/src/nativeaot/Runtime/arm64/ExceptionHandling.S b/src/coreclr/src/nativeaot/Runtime/arm64/ExceptionHandling.S new file mode 100644 index 0000000000000..876f2dfbcb80d --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/arm64/ExceptionHandling.S @@ -0,0 +1,4 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// TODO: Implement diff --git a/src/coreclr/src/nativeaot/Runtime/arm64/ExceptionHandling.asm b/src/coreclr/src/nativeaot/Runtime/arm64/ExceptionHandling.asm new file mode 100644 index 0000000000000..ab70efbd3d9d6 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/arm64/ExceptionHandling.asm @@ -0,0 +1,629 @@ +;; Licensed to the .NET Foundation under one or more agreements. +;; The .NET Foundation licenses this file to you under the MIT license. + +#include "AsmMacros.h" + + TEXTAREA + +#define STACKSIZEOF_ExInfo ((SIZEOF__ExInfo + 15)&(~15)) + +#define HARDWARE_EXCEPTION 1 +#define SOFTWARE_EXCEPTION 0 + +;; ----------------------------------------------------------------------------- +;; Macro used to create frame of exception throwing helpers (RhpThrowEx, RhpThrowHwEx) + MACRO + ALLOC_THROW_FRAME $exceptionType + + PROLOG_NOP mov x3, sp + + ;; Setup a PAL_LIMITED_CONTEXT on the stack { + IF $exceptionType == HARDWARE_EXCEPTION + PROLOG_NOP sub sp,sp,#0x50 + PROLOG_NOP stp x3, x1, [sp] ; x3 is the SP and x1 is the IP of the fault site + PROLOG_PUSH_MACHINE_FRAME + ELSE + PROLOG_STACK_ALLOC 0x50 + PROLOG_NOP stp x3, lr, [sp] ; x3 is the SP and lr is the IP of the fault site + ENDIF + PROLOG_NOP stp d8, d9, [sp, #0x10] + PROLOG_NOP stp d10, d11, [sp, #0x20] + PROLOG_NOP stp d12, d13, [sp, #0x30] + PROLOG_NOP stp d14, d15, [sp, #0x40] + PROLOG_SAVE_REG_PAIR fp, lr, #-0x70! + PROLOG_NOP stp xzr, xzr, [sp, #0x10] ; locations reserved for return value, not used for exception handling + PROLOG_SAVE_REG_PAIR x19, x20, #0x20 + PROLOG_SAVE_REG_PAIR x21, x22, #0x30 + PROLOG_SAVE_REG_PAIR x23, x24, #0x40 + PROLOG_SAVE_REG_PAIR x25, x26, #0x50 + PROLOG_SAVE_REG_PAIR x27, x28, #0x60 + ;; } end PAL_LIMITED_CONTEXT + + PROLOG_STACK_ALLOC STACKSIZEOF_ExInfo + MEND + +;; ----------------------------------------------------------------------------- +;; Macro used to create frame of funclet calling helpers (RhpCallXXXXFunclet) +;; $extraStackSize - extra stack space that the user of the macro can use to +;; store additional registers + MACRO + ALLOC_CALL_FUNCLET_FRAME $extraStackSize + + ; Using below prolog instead of PROLOG_SAVE_REG_PAIR fp,lr, #-60! + ; is intentional. Above statement would also emit instruction to save + ; sp in fp. If sp is saved in fp in prolog then it is not expected that fp can change in the body + ; of method. However, this method needs to be able to change fp before calling funclet. + ; This is required to access locals in funclet. + PROLOG_SAVE_REG_PAIR_NO_FP fp,lr, #-0x60! + PROLOG_SAVE_REG_PAIR x19, x20, #0x10 + PROLOG_SAVE_REG_PAIR x21, x22, #0x20 + PROLOG_SAVE_REG_PAIR x23, x24, #0x30 + PROLOG_SAVE_REG_PAIR x25, x26, #0x40 + PROLOG_SAVE_REG_PAIR x27, x28, #0x50 + PROLOG_NOP mov fp, sp + + IF $extraStackSize != 0 + PROLOG_STACK_ALLOC $extraStackSize + ENDIF + MEND + +;; ----------------------------------------------------------------------------- +;; Macro used to free frame of funclet calling helpers (RhpCallXXXXFunclet) +;; $extraStackSize - extra stack space that the user of the macro can use to +;; store additional registers. +;; It needs to match the value passed to the corresponding +;; ALLOC_CALL_FUNCLET_FRAME. + MACRO + FREE_CALL_FUNCLET_FRAME $extraStackSize + + IF $extraStackSize != 0 + EPILOG_STACK_FREE $extraStackSize + ENDIF + + EPILOG_RESTORE_REG_PAIR x19, x20, #0x10 + EPILOG_RESTORE_REG_PAIR x21, x22, #0x20 + EPILOG_RESTORE_REG_PAIR x23, x24, #0x30 + EPILOG_RESTORE_REG_PAIR x25, x26, #0x40 + EPILOG_RESTORE_REG_PAIR x27, x28, #0x50 + EPILOG_RESTORE_REG_PAIR fp, lr, #0x60! + MEND + +;; ----------------------------------------------------------------------------- +;; Macro used to restore preserved general purpose and FP registers from REGDISPLAY +;; $regdisplayReg - register pointing to the REGDISPLAY structure + MACRO + RESTORE_PRESERVED_REGISTERS $regdisplayReg + + ldr x12, [$regdisplayReg, #OFFSETOF__REGDISPLAY__pX19] + ldr x19, [x12] + ldr x12, [$regdisplayReg, #OFFSETOF__REGDISPLAY__pX20] + ldr x20, [x12] + ldr x12, [$regdisplayReg, #OFFSETOF__REGDISPLAY__pX21] + ldr x21, [x12] + ldr x12, [$regdisplayReg, #OFFSETOF__REGDISPLAY__pX22] + ldr x22, [x12] + ldr x12, [$regdisplayReg, #OFFSETOF__REGDISPLAY__pX23] + ldr x23, [x12] + ldr x12, [$regdisplayReg, #OFFSETOF__REGDISPLAY__pX24] + ldr x24, [x12] + ldr x12, [$regdisplayReg, #OFFSETOF__REGDISPLAY__pX25] + ldr x25, [x12] + ldr x12, [$regdisplayReg, #OFFSETOF__REGDISPLAY__pX26] + ldr x26, [x12] + ldr x12, [$regdisplayReg, #OFFSETOF__REGDISPLAY__pX27] + ldr x27, [x12] + ldr x12, [$regdisplayReg, #OFFSETOF__REGDISPLAY__pX28] + ldr x28, [x12] + ldr x12, [$regdisplayReg, #OFFSETOF__REGDISPLAY__pFP] + ldr fp, [x12] + ;; + ;; load FP preserved regs + ;; + add x12, $regdisplayReg, #OFFSETOF__REGDISPLAY__D + ldp d8, d9, [x12, #0x00] + ldp d10, d11, [x12, #0x10] + ldp d12, d13, [x12, #0x20] + ldp d14, d15, [x12, #0x30] + MEND + +;; ----------------------------------------------------------------------------- +;; Macro used to save preserved general purpose and FP registers to REGDISPLAY +;; $regdisplayReg - register pointing to the REGDISPLAY structure + MACRO + SAVE_PRESERVED_REGISTERS $regdisplayReg + + ldr x12, [$regdisplayReg, #OFFSETOF__REGDISPLAY__pX19] + str x19, [x12] + ldr x12, [$regdisplayReg, #OFFSETOF__REGDISPLAY__pX20] + str x20, [x12] + ldr x12, [$regdisplayReg, #OFFSETOF__REGDISPLAY__pX21] + str x21, [x12] + ldr x12, [$regdisplayReg, #OFFSETOF__REGDISPLAY__pX22] + str x22, [x12] + ldr x12, [$regdisplayReg, #OFFSETOF__REGDISPLAY__pX23] + str x23, [x12] + ldr x12, [$regdisplayReg, #OFFSETOF__REGDISPLAY__pX24] + str x24, [x12] + ldr x12, [$regdisplayReg, #OFFSETOF__REGDISPLAY__pX25] + str x25, [x12] + ldr x12, [$regdisplayReg, #OFFSETOF__REGDISPLAY__pX26] + str x26, [x12] + ldr x12, [$regdisplayReg, #OFFSETOF__REGDISPLAY__pX27] + str x27, [x12] + ldr x12, [$regdisplayReg, #OFFSETOF__REGDISPLAY__pX28] + str x28, [x12] + ldr x12, [$regdisplayReg, #OFFSETOF__REGDISPLAY__pFP] + str fp, [x12] + ;; + ;; store vfp preserved regs + ;; + add x12, $regdisplayReg, #OFFSETOF__REGDISPLAY__D + stp d8, d9, [x12, #0x00] + stp d10, d11, [x12, #0x10] + stp d12, d13, [x12, #0x20] + stp d14, d15, [x12, #0x30] + MEND + +;; ----------------------------------------------------------------------------- +;; Macro used to thrash preserved general purpose registers in REGDISPLAY +;; to make sure nobody uses them +;; $regdisplayReg - register pointing to the REGDISPLAY structure + MACRO + TRASH_PRESERVED_REGISTERS_STORAGE $regdisplayReg + +#if 0 // def _DEBUG ;; @TODO: temporarily removed because trashing the frame pointer breaks the debugger + movz x3, #0xbaad, LSL #48 + movk x3, #0xdeed, LSL #32 + movk x3, #0xbaad, LSL #16 + movk x3, #0xdeed + ldr x12, [$regdisplayReg, #OFFSETOF__REGDISPLAY__pX19] + str x3, [x12] + ldr x12, [$regdisplayReg, #OFFSETOF__REGDISPLAY__pX20] + str x3, [x12] + ldr x12, [$regdisplayReg, #OFFSETOF__REGDISPLAY__pX21] + str x3, [x12] + ldr x12, [$regdisplayReg, #OFFSETOF__REGDISPLAY__pX22] + str x3, [x12] + ldr x12, [$regdisplayReg, #OFFSETOF__REGDISPLAY__pX23] + str x3, [x12] + ldr x12, [$regdisplayReg, #OFFSETOF__REGDISPLAY__pX24] + str x3, [x12] + ldr x12, [$regdisplayReg, #OFFSETOF__REGDISPLAY__pX25] + str x3, [x12] + ldr x12, [$regdisplayReg, #OFFSETOF__REGDISPLAY__pX26] + str x3, [x12] + ldr x12, [$regdisplayReg, #OFFSETOF__REGDISPLAY__pX27] + str x3, [x12] + ldr x12, [$regdisplayReg, #OFFSETOF__REGDISPLAY__pX28] + str x3, [x12] + ldr x12, [$regdisplayReg, #OFFSETOF__REGDISPLAY__pFP] + str x3, [x12] +#endif // _DEBUG + MEND + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; RhpThrowHwEx +;; +;; INPUT: W0: exception code of fault +;; X1: faulting IP +;; +;; OUTPUT: +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + NESTED_ENTRY RhpThrowHwEx + +#define rsp_offsetof_ExInfo 0 +#define rsp_offsetof_Context STACKSIZEOF_ExInfo + + ALLOC_THROW_FRAME HARDWARE_EXCEPTION + + ;; x2 = GetThread(), TRASHES x1 + INLINE_GETTHREAD x2, x1 + + add x1, sp, #rsp_offsetof_ExInfo ;; x1 <- ExInfo* + str xzr, [x1, #OFFSETOF__ExInfo__m_exception] ;; pExInfo->m_exception = null + mov w3, #1 + strb w3, [x1, #OFFSETOF__ExInfo__m_passNumber] ;; pExInfo->m_passNumber = 1 + mov w3, #0xFFFFFFFF + str w3, [x1, #OFFSETOF__ExInfo__m_idxCurClause] ;; pExInfo->m_idxCurClause = MaxTryRegionIdx + mov w3, #2 + strb w3, [x1, #OFFSETOF__ExInfo__m_kind] ;; pExInfo->m_kind = ExKind.HardwareFault + + ;; link the ExInfo into the thread's ExInfo chain + ldr x3, [x2, #OFFSETOF__Thread__m_pExInfoStackHead] + str x3, [x1, #OFFSETOF__ExInfo__m_pPrevExInfo] ;; pExInfo->m_pPrevExInfo = m_pExInfoStackHead + str x1, [x2, #OFFSETOF__Thread__m_pExInfoStackHead] ;; m_pExInfoStackHead = pExInfo + + ;; set the exception context field on the ExInfo + add x2, sp, #rsp_offsetof_Context ;; x2 <- PAL_LIMITED_CONTEXT* + str x2, [x1, #OFFSETOF__ExInfo__m_pExContext] ;; pExInfo->m_pExContext = pContext + + ;; w0: exception code + ;; x1: ExInfo* + bl RhThrowHwEx + + EXPORT_POINTER_TO_ADDRESS PointerToRhpThrowHwEx2 + + ;; no return + EMIT_BREAKPOINT + + NESTED_END RhpThrowHwEx + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; RhpThrowEx +;; +;; INPUT: X0: exception object +;; +;; OUTPUT: +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + NESTED_ENTRY RhpThrowEx + + ALLOC_THROW_FRAME SOFTWARE_EXCEPTION + + ;; x2 = GetThread(), TRASHES x1 + INLINE_GETTHREAD x2, x1 + + ;; There is runtime C# code that can tail call to RhpThrowEx using a binder intrinsic. So the return + ;; address could have been hijacked when we were in that C# code and we must remove the hijack and + ;; reflect the correct return address in our exception context record. The other throw helpers don't + ;; need this because they cannot be tail-called from C#. + + ;; NOTE: we cannot use INLINE_THREAD_UNHIJACK because it will write into the stack at the location + ;; where the tail-calling thread had saved LR, which may not match where we have saved LR. + + ldr x1, [x2, #OFFSETOF__Thread__m_pvHijackedReturnAddress] + cbz x1, NotHijacked + + ldr x3, [x2, #OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation] + + ;; x0: exception object + ;; x1: hijacked return address + ;; x2: pThread + ;; x3: hijacked return address location + + add x12, sp, #(STACKSIZEOF_ExInfo + SIZEOF__PAL_LIMITED_CONTEXT) ;; re-compute SP at callsite + cmp x3, x12 ;; if (m_ppvHijackedReturnAddressLocation < SP at callsite) + blo TailCallWasHijacked + + ;; normal case where a valid return address location is hijacked + str x1, [x3] + b ClearThreadState + +TailCallWasHijacked + + ;; Abnormal case where the return address location is now invalid because we ended up here via a tail + ;; call. In this case, our hijacked return address should be the correct caller of this method. + ;; + + ;; stick the previous return address in LR as well as in the right spots in our PAL_LIMITED_CONTEXT. + mov lr, x1 + str lr, [sp, #(rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__LR)] + str lr, [sp, #(rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__IP)] + +ClearThreadState + + ;; clear the Thread's hijack state + str xzr, [x2, #OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation] + str xzr, [x2, #OFFSETOF__Thread__m_pvHijackedReturnAddress] + +NotHijacked + + add x1, sp, #rsp_offsetof_ExInfo ;; x1 <- ExInfo* + str xzr, [x1, #OFFSETOF__ExInfo__m_exception] ;; pExInfo->m_exception = null + mov w3, #1 + strb w3, [x1, #OFFSETOF__ExInfo__m_passNumber] ;; pExInfo->m_passNumber = 1 + mov w3, #0xFFFFFFFF + str w3, [x1, #OFFSETOF__ExInfo__m_idxCurClause] ;; pExInfo->m_idxCurClause = MaxTryRegionIdx + mov w3, #1 + strb w3, [x1, #OFFSETOF__ExInfo__m_kind] ;; pExInfo->m_kind = ExKind.Throw + + ;; link the ExInfo into the thread's ExInfo chain + ldr x3, [x2, #OFFSETOF__Thread__m_pExInfoStackHead] + str x3, [x1, #OFFSETOF__ExInfo__m_pPrevExInfo] ;; pExInfo->m_pPrevExInfo = m_pExInfoStackHead + str x1, [x2, #OFFSETOF__Thread__m_pExInfoStackHead] ;; m_pExInfoStackHead = pExInfo + + ;; set the exception context field on the ExInfo + add x2, sp, #rsp_offsetof_Context ;; x2 <- PAL_LIMITED_CONTEXT* + str x2, [x1, #OFFSETOF__ExInfo__m_pExContext] ;; pExInfo->m_pExContext = pContext + + ;; x0: exception object + ;; x1: ExInfo* + bl RhThrowEx + + EXPORT_POINTER_TO_ADDRESS PointerToRhpThrowEx2 + + ;; no return + EMIT_BREAKPOINT + NESTED_END RhpThrowEx + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; void FASTCALL RhpRethrow() +;; +;; SUMMARY: Similar to RhpThrowEx, except that it passes along the currently active ExInfo +;; +;; INPUT: +;; +;; OUTPUT: +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + NESTED_ENTRY RhpRethrow + + ALLOC_THROW_FRAME SOFTWARE_EXCEPTION + + ;; x2 = GetThread(), TRASHES x1 + INLINE_GETTHREAD x2, x1 + + add x1, sp, #rsp_offsetof_ExInfo ;; x1 <- ExInfo* + str xzr, [x1, #OFFSETOF__ExInfo__m_exception] ;; pExInfo->m_exception = null + strb wzr, [x1, #OFFSETOF__ExInfo__m_kind] ;; init to a deterministic value (ExKind.None) + mov w3, #1 + strb w3, [x1, #OFFSETOF__ExInfo__m_passNumber] ;; pExInfo->m_passNumber = 1 + mov w3, #0xFFFFFFFF + str w3, [x1, #OFFSETOF__ExInfo__m_idxCurClause] ;; pExInfo->m_idxCurClause = MaxTryRegionIdx + + ;; link the ExInfo into the thread's ExInfo chain + ldr x3, [x2, #OFFSETOF__Thread__m_pExInfoStackHead] + mov x0, x3 ;; x0 <- current ExInfo + str x3, [x1, #OFFSETOF__ExInfo__m_pPrevExInfo] ;; pExInfo->m_pPrevExInfo = m_pExInfoStackHead + str x1, [x2, #OFFSETOF__Thread__m_pExInfoStackHead] ;; m_pExInfoStackHead = pExInfo + + ;; set the exception context field on the ExInfo + add x2, sp, #rsp_offsetof_Context ;; x2 <- PAL_LIMITED_CONTEXT* + str x2, [x1, #OFFSETOF__ExInfo__m_pExContext] ;; pExInfo->m_pExContext = pContext + + ;; x0 contains the currently active ExInfo + ;; x1 contains the address of the new ExInfo + bl RhRethrow + + EXPORT_POINTER_TO_ADDRESS PointerToRhpRethrow2 + + ;; no return + EMIT_BREAKPOINT + NESTED_END RhpRethrow + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; void* FASTCALL RhpCallCatchFunclet(RtuObjectRef exceptionObj, void* pHandlerIP, REGDISPLAY* pRegDisplay, +;; ExInfo* pExInfo) +;; +;; INPUT: X0: exception object +;; X1: handler funclet address +;; X2: REGDISPLAY* +;; X3: ExInfo* +;; +;; OUTPUT: +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + NESTED_ENTRY RhpCallCatchFunclet + + ALLOC_CALL_FUNCLET_FRAME 0x60 + stp d8, d9, [sp, #0x00] + stp d10, d11, [sp, #0x10] + stp d12, d13, [sp, #0x20] + stp d14, d15, [sp, #0x30] + stp x0, x2, [sp, #0x40] ;; x0, x2 & x3 are saved so we have the exception object, REGDISPLAY and + stp x3, xzr, [sp, #0x50] ;; ExInfo later, xzr makes space for the local "is_not_handling_thread_abort" + +#define rsp_offset_is_not_handling_thread_abort 0x58 +#define rsp_offset_x2 0x48 +#define rsp_offset_x3 0x50 + + ;; + ;; clear the DoNotTriggerGc flag, trashes x4-x6 + ;; + INLINE_GETTHREAD x5, x6 ;; x5 <- Thread*, x6 <- trashed + + ldr x4, [x5, #OFFSETOF__Thread__m_threadAbortException] + sub x4, x4, x0 + str x4, [sp, #rsp_offset_is_not_handling_thread_abort] ;; Non-zero if the exception is not ThreadAbortException + + add x12, x5, #OFFSETOF__Thread__m_ThreadStateFlags + +ClearRetry_Catch + ldxr w4, [x12] + bic w4, w4, #TSF_DoNotTriggerGc + stxr w6, w4, [x12] + cbz w6, ClearSuccess_Catch + b ClearRetry_Catch +ClearSuccess_Catch + + ;; + ;; set preserved regs to the values expected by the funclet + ;; + RESTORE_PRESERVED_REGISTERS x2 + ;; + ;; trash the values at the old homes to make sure nobody uses them + ;; + TRASH_PRESERVED_REGISTERS_STORAGE x2 + + ;; + ;; call the funclet + ;; + ;; x0 still contains the exception object + blr x1 + + EXPORT_POINTER_TO_ADDRESS PointerToRhpCallCatchFunclet2 + + ;; x0 contains resume IP + + ldr x2, [sp, #rsp_offset_x2] ;; x2 <- REGDISPLAY* + +;; @TODO: add debug-only validation code for ExInfo pop + + INLINE_GETTHREAD x1, x3 ;; x1 <- Thread*, x3 <- trashed + + ;; We must unhijack the thread at this point because the section of stack where the hijack is applied + ;; may go dead. If it does, then the next time we try to unhijack the thread, it will corrupt the stack. + INLINE_THREAD_UNHIJACK x1, x3, x12 ;; Thread in x1, trashes x3 and x12 + + ldr x3, [sp, #rsp_offset_x3] ;; x3 <- current ExInfo* + ldr x2, [x2, #OFFSETOF__REGDISPLAY__SP] ;; x2 <- resume SP value + +PopExInfoLoop + ldr x3, [x3, #OFFSETOF__ExInfo__m_pPrevExInfo] ;; x3 <- next ExInfo + cbz x3, DonePopping ;; if (pExInfo == null) { we're done } + cmp x3, x2 + blt PopExInfoLoop ;; if (pExInfo < resume SP} { keep going } + +DonePopping + str x3, [x1, #OFFSETOF__Thread__m_pExInfoStackHead] ;; store the new head on the Thread + + ldr x3, =RhpTrapThreads + ldr w3, [x3] + tbz x3, #TrapThreadsFlags_AbortInProgress_Bit, NoAbort + + ldr x3, [sp, #rsp_offset_is_not_handling_thread_abort] + cbnz x3, NoAbort + + ;; It was the ThreadAbortException, so rethrow it + ;; reset SP + mov x1, x0 ;; x1 <- continuation address as exception PC + mov w0, #STATUS_REDHAWK_THREAD_ABORT + mov sp, x2 + b RhpThrowHwEx + +NoAbort + ;; reset SP and jump to continuation address + mov sp, x2 + br x0 + + NESTED_END RhpCallCatchFunclet + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; void FASTCALL RhpCallFinallyFunclet(void* pHandlerIP, REGDISPLAY* pRegDisplay) +;; +;; INPUT: X0: handler funclet address +;; X1: REGDISPLAY* +;; +;; OUTPUT: +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + NESTED_ENTRY RhpCallFinallyFunclet + + ALLOC_CALL_FUNCLET_FRAME 0x50 + stp d8, d9, [sp, #0x00] + stp d10, d11, [sp, #0x10] + stp d12, d13, [sp, #0x20] + stp d14, d15, [sp, #0x30] + stp x0, x1, [sp, #0x40] ;; x1 is saved so we have the REGDISPLAY later, x0 is just alignment padding + +#define rsp_offset_x1 0x48 + + ;; + ;; We want to suppress hijacking between invocations of subsequent finallys. We do this because we + ;; cannot tolerate a GC after one finally has run (and possibly side-effected the GC state of the + ;; method) and then been popped off the stack, leaving behind no trace of its effect. + ;; + ;; So we clear the state before and set it after invocation of the handler. + ;; + + ;; + ;; clear the DoNotTriggerGc flag, trashes x2-x4 + ;; + INLINE_GETTHREAD x2, x3 ;; x2 <- Thread*, x3 <- trashed + + add x12, x2, #OFFSETOF__Thread__m_ThreadStateFlags + +ClearRetry + ldxr w4, [x12] + bic w4, w4, #TSF_DoNotTriggerGc + stxr w3, w4, [x12] + cbz w3, ClearSuccess + b ClearRetry +ClearSuccess + + ;; + ;; set preserved regs to the values expected by the funclet + ;; + RESTORE_PRESERVED_REGISTERS x1 + ;; + ;; trash the values at the old homes to make sure nobody uses them + ;; + TRASH_PRESERVED_REGISTERS_STORAGE x1 + + ;; + ;; call the funclet + ;; + blr x0 + + EXPORT_POINTER_TO_ADDRESS PointerToRhpCallFinallyFunclet2 + + ldr x1, [sp, #rsp_offset_x1] ;; reload REGDISPLAY pointer + + ;; + ;; save new values of preserved regs into REGDISPLAY + ;; + SAVE_PRESERVED_REGISTERS x1 + + ;; + ;; set the DoNotTriggerGc flag, trashes x1-x3 + ;; + INLINE_GETTHREAD x2, x3 ;; x2 <- Thread*, x3 <- trashed + + add x12, x2, #OFFSETOF__Thread__m_ThreadStateFlags +SetRetry + ldxr w1, [x12] + orr w1, w1, #TSF_DoNotTriggerGc + stxr w3, w1, [x12] + cbz w3, SetSuccess + b SetRetry +SetSuccess + + ldp d8, d9, [sp, #0x00] + ldp d10, d11, [sp, #0x10] + ldp d12, d13, [sp, #0x20] + ldp d14, d15, [sp, #0x30] + + FREE_CALL_FUNCLET_FRAME 0x50 + EPILOG_RETURN + + NESTED_END RhpCallFinallyFunclet + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; void* FASTCALL RhpCallFilterFunclet(RtuObjectRef exceptionObj, void* pFilterIP, REGDISPLAY* pRegDisplay) +;; +;; INPUT: X0: exception object +;; X1: filter funclet address +;; X2: REGDISPLAY* +;; +;; OUTPUT: +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + NESTED_ENTRY RhpCallFilterFunclet + ALLOC_CALL_FUNCLET_FRAME 0x40 + stp d8, d9, [sp, #0x00] + stp d10, d11, [sp, #0x10] + stp d12, d13, [sp, #0x20] + stp d14, d15, [sp, #0x30] + + ldr x12, [x2, #OFFSETOF__REGDISPLAY__pFP] + ldr fp, [x12] + + ;; + ;; call the funclet + ;; + ;; x0 still contains the exception object + blr x1 + + EXPORT_POINTER_TO_ADDRESS PointerToRhpCallFilterFunclet2 + + ldp d8, d9, [sp, #0x00] + ldp d10, d11, [sp, #0x10] + ldp d12, d13, [sp, #0x20] + ldp d14, d15, [sp, #0x30] + + FREE_CALL_FUNCLET_FRAME 0x40 + EPILOG_RETURN + + NESTED_END RhpCallFilterFunclet + + INLINE_GETTHREAD_CONSTANT_POOL + + end diff --git a/src/coreclr/src/nativeaot/Runtime/arm64/GcProbe.asm b/src/coreclr/src/nativeaot/Runtime/arm64/GcProbe.asm new file mode 100644 index 0000000000000..7dfa318291ff6 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/arm64/GcProbe.asm @@ -0,0 +1,752 @@ +;; Licensed to the .NET Foundation under one or more agreements. +;; The .NET Foundation licenses this file to you under the MIT license. + +#include "AsmMacros.h" + + TEXTAREA + + EXTERN g_fGcStressStarted + +PROBE_SAVE_FLAGS_EVERYTHING equ DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_ALL_SCRATCH + + ;; Build a map of symbols representing offsets into the transition frame (see PInvokeTransitionFrame in + ;; rhbinder.h) and keep these two in sync. + map 0 + field OFFSETOF__PInvokeTransitionFrame__m_PreservedRegs + field 10 * 8 ; x19..x28 +m_CallersSP field 8 ; SP at routine entry + field 19 * 8 ; x0..x18 + field 8 ; lr +m_SavedNZCV field 8 ; Saved condition flags + field 4 * 8 ; d0..d3 +PROBE_FRAME_SIZE field 0 + + ;; Support for setting up a transition frame when performing a GC probe. In many respects this is very + ;; similar to the logic in PUSH_COOP_PINVOKE_FRAME in AsmMacros.h. In most cases setting up the + ;; transition frame comprises the entirety of the caller's prolog (and initial non-prolog code) and + ;; similarly for the epilog. Those cases can be dealt with using PROLOG_PROBE_FRAME and EPILOG_PROBE_FRAME + ;; defined below. For the special cases where additional work has to be done in the prolog we also provide + ;; the lower level macros ALLOC_PROBE_FRAME, FREE_PROBE_FRAME and INIT_PROBE_FRAME that allow more control + ;; to be asserted. + ;; + ;; Note that we currently employ a significant simplification of frame setup: we always allocate a + ;; maximally-sized PInvokeTransitionFrame and save all of the registers. Depending on the caller this can + ;; lead to up to 20 additional register saves (x0-x18, lr) or 160 bytes of stack space. I have done no + ;; analysis to see whether any of the worst cases occur on performance sensitive paths and whether the + ;; additional saves will show any measurable degradation. + + ;; Perform the parts of setting up a probe frame that can occur during the prolog (and indeed this macro + ;; can only be called from within the prolog). + MACRO + ALLOC_PROBE_FRAME $extraStackSpace, $saveFPRegisters + + ;; First create PInvokeTransitionFrame + PROLOG_SAVE_REG_PAIR fp, lr, #-(PROBE_FRAME_SIZE + $extraStackSpace)! ;; Push down stack pointer and store FP and LR + + ;; Slot at [sp, #0x10] is reserved for Thread * + ;; Slot at [sp, #0x18] is reserved for bitmask of saved registers + + ;; Save callee saved registers + PROLOG_SAVE_REG_PAIR x19, x20, #0x20 + PROLOG_SAVE_REG_PAIR x21, x22, #0x30 + PROLOG_SAVE_REG_PAIR x23, x24, #0x40 + PROLOG_SAVE_REG_PAIR x25, x26, #0x50 + PROLOG_SAVE_REG_PAIR x27, x28, #0x60 + + ;; Slot at [sp, #0x70] is reserved for caller sp + + ;; Save the scratch registers + PROLOG_NOP str x0, [sp, #0x78] + PROLOG_NOP stp x1, x2, [sp, #0x80] + PROLOG_NOP stp x3, x4, [sp, #0x90] + PROLOG_NOP stp x5, x6, [sp, #0xA0] + PROLOG_NOP stp x7, x8, [sp, #0xB0] + PROLOG_NOP stp x9, x10, [sp, #0xC0] + PROLOG_NOP stp x11, x12, [sp, #0xD0] + PROLOG_NOP stp x13, x14, [sp, #0xE0] + PROLOG_NOP stp x15, x16, [sp, #0xF0] + PROLOG_NOP stp x17, x18, [sp, #0x100] + PROLOG_NOP str lr, [sp, #0x110] + + ;; Slot at [sp, #0x118] is reserved for NZCV + + ;; Save the floating return registers + IF $saveFPRegisters + PROLOG_NOP stp d0, d1, [sp, #0x120] + PROLOG_NOP stp d2, d3, [sp, #0x130] + ENDIF + + MEND + + ;; Undo the effects of an ALLOC_PROBE_FRAME. This may only be called within an epilog. Note that all + ;; registers are restored (apart for sp and pc), even volatiles. + MACRO + FREE_PROBE_FRAME $extraStackSpace, $restoreFPRegisters + + ;; Restore the scratch registers + PROLOG_NOP ldr x0, [sp, #0x78] + PROLOG_NOP ldp x1, x2, [sp, #0x80] + PROLOG_NOP ldp x3, x4, [sp, #0x90] + PROLOG_NOP ldp x5, x6, [sp, #0xA0] + PROLOG_NOP ldp x7, x8, [sp, #0xB0] + PROLOG_NOP ldp x9, x10, [sp, #0xC0] + PROLOG_NOP ldp x11, x12, [sp, #0xD0] + PROLOG_NOP ldp x13, x14, [sp, #0xE0] + PROLOG_NOP ldp x15, x16, [sp, #0xF0] + PROLOG_NOP ldp x17, x18, [sp, #0x100] + PROLOG_NOP ldr lr, [sp, #0x110] + + ; Restore the floating return registers + IF $restoreFPRegisters + EPILOG_NOP ldp d0, d1, [sp, #0x120] + EPILOG_NOP ldp d2, d3, [sp, #0x130] + ENDIF + + ;; Restore callee saved registers + EPILOG_RESTORE_REG_PAIR x19, x20, #0x20 + EPILOG_RESTORE_REG_PAIR x21, x22, #0x30 + EPILOG_RESTORE_REG_PAIR x23, x24, #0x40 + EPILOG_RESTORE_REG_PAIR x25, x26, #0x50 + EPILOG_RESTORE_REG_PAIR x27, x28, #0x60 + + EPILOG_RESTORE_REG_PAIR fp, lr, #(PROBE_FRAME_SIZE + $extraStackSpace)! + MEND + + ;; Complete the setup of a probe frame allocated with ALLOC_PROBE_FRAME with the initialization that can + ;; occur only outside the prolog (includes linking the frame to the current Thread). This macro assumes SP + ;; is invariant outside of the prolog. + ;; + ;; $threadReg : register containing the Thread* (this will be preserved) + ;; $trashReg : register that can be trashed by this macro + ;; $savedRegsMask : value to initialize m_Flags field with (register or #constant) + ;; $gcFlags : value of gcref / gcbyref flags for saved registers, used only if $savedRegsMask is constant + ;; $frameSize : total size of the method's stack frame (including probe frame size) + MACRO + INIT_PROBE_FRAME $threadReg, $trashReg, $savedRegsMask, $gcFlags, $frameSize + + LCLS BitmaskStr +BitmaskStr SETS "$savedRegsMask" + + str $threadReg, [sp, #OFFSETOF__PInvokeTransitionFrame__m_pThread] ; Thread * + IF BitmaskStr:LEFT:1 == "#" + ;; The savedRegsMask is a constant, remove the leading "#" since the MOVL64 doesn't expect it +BitmaskStr SETS BitmaskStr:RIGHT:(:LEN:BitmaskStr - 1) + MOVL64 $trashReg, $BitmaskStr, $gcFlags + ELSE + ASSERT "$gcFlags" == "" + ;; The savedRegsMask is a register + mov $trashReg, $savedRegsMask + ENDIF + str $trashReg, [sp, #OFFSETOF__PInvokeTransitionFrame__m_Flags] + add $trashReg, sp, #$frameSize + str $trashReg, [sp, #m_CallersSP] + MEND + + ;; Simple macro to use when setting up the probe frame can comprise the entire prolog. Call this macro + ;; first in the method (no further prolog instructions can be added after this). + ;; + ;; $threadReg : register containing the Thread* (this will be preserved). If defaulted (specify |) then + ;; the current thread will be calculated inline into r2 ($trashReg must not equal r2 in + ;; this case) + ;; $trashReg : register that can be trashed by this macro + ;; $savedRegsMask : value to initialize m_dwFlags field with (register or #constant) + ;; $gcFlags : value of gcref / gcbyref flags for saved registers, used only if $savedRegsMask is constant + MACRO + PROLOG_PROBE_FRAME $threadReg, $trashReg, $savedRegsMask, $gcFlags + + ; Local string tracking the name of the register in which the Thread* is kept. Defaults to the value + ; of $threadReg. + LCLS __PPF_ThreadReg +__PPF_ThreadReg SETS "$threadReg" + + ; Define the method prolog, allocating enough stack space for the PInvokeTransitionFrame and saving + ; incoming register values into it. + ALLOC_PROBE_FRAME 0, {true} + + ; If the caller didn't provide a value for $threadReg then generate code to fetch the Thread* into x2. + ; Record that x2 holds the Thread* in our local variable. + IF "$threadReg" == "" + ASSERT "$trashReg" != "x2" +__PPF_ThreadReg SETS "x2" + INLINE_GETTHREAD $__PPF_ThreadReg, $trashReg + ENDIF + + ; Perform the rest of the PInvokeTransitionFrame initialization. + INIT_PROBE_FRAME $__PPF_ThreadReg, $trashReg, $savedRegsMask, $gcFlags, PROBE_FRAME_SIZE + mov $trashReg, sp + str $trashReg, [$__PPF_ThreadReg, #OFFSETOF__Thread__m_pHackPInvokeTunnel] + MEND + + ; Simple macro to use when PROLOG_PROBE_FRAME was used to set up and initialize the prolog and + ; PInvokeTransitionFrame. This will define the epilog including a return via the restored LR. + MACRO + EPILOG_PROBE_FRAME + + FREE_PROBE_FRAME 0, {true} + EPILOG_RETURN + MEND + +;; In order to avoid trashing VFP registers across the loop hijack we must save all user registers, so that +;; registers used by the loop being hijacked will not be affected. Unlike ARM32 where neon registers (NQ0, ..., NQ15) +;; are fully covered by the floating point registers D0 ... D31, we have 32 neon registers Q0, ... Q31 on ARM64 +;; which are not fully covered by the register D0 ... D31. Therefore we must explicitly save all Q registers. +EXTRA_SAVE_SIZE equ (32*16) + + MACRO + ALLOC_LOOP_HIJACK_FRAME + + PROLOG_STACK_ALLOC EXTRA_SAVE_SIZE + + ;; Save all neon registers + PROLOG_NOP stp q0, q1, [sp] + PROLOG_NOP stp q2, q3, [sp, #0x20] + PROLOG_NOP stp q4, q5, [sp, #0x40] + PROLOG_NOP stp q6, q7, [sp, #0x60] + PROLOG_NOP stp q8, q9, [sp, #0x80] + PROLOG_NOP stp q10, q11, [sp, #0xA0] + PROLOG_NOP stp q12, q13, [sp, #0xC0] + PROLOG_NOP stp q14, q15, [sp, #0xE0] + PROLOG_NOP stp q16, q17, [sp, #0x100] + PROLOG_NOP stp q18, q19, [sp, #0x120] + PROLOG_NOP stp q20, q21, [sp, #0x140] + PROLOG_NOP stp q22, q23, [sp, #0x160] + PROLOG_NOP stp q24, q25, [sp, #0x180] + PROLOG_NOP stp q26, q27, [sp, #0x1A0] + PROLOG_NOP stp q28, q29, [sp, #0x1C0] + PROLOG_NOP stp q30, q31, [sp, #0x1E0] + + ALLOC_PROBE_FRAME 0, {false} + MEND + + MACRO + FREE_LOOP_HIJACK_FRAME + + FREE_PROBE_FRAME 0, {false} + + ;; restore all neon registers + PROLOG_NOP ldp q0, q1, [sp] + PROLOG_NOP ldp q2, q3, [sp, #0x20] + PROLOG_NOP ldp q4, q5, [sp, #0x40] + PROLOG_NOP ldp q6, q7, [sp, #0x60] + PROLOG_NOP ldp q8, q9, [sp, #0x80] + PROLOG_NOP ldp q10, q11, [sp, #0xA0] + PROLOG_NOP ldp q12, q13, [sp, #0xC0] + PROLOG_NOP ldp q14, q15, [sp, #0xE0] + PROLOG_NOP ldp q16, q17, [sp, #0x100] + PROLOG_NOP ldp q18, q19, [sp, #0x120] + PROLOG_NOP ldp q20, q21, [sp, #0x140] + PROLOG_NOP ldp q22, q23, [sp, #0x160] + PROLOG_NOP ldp q24, q25, [sp, #0x180] + PROLOG_NOP ldp q26, q27, [sp, #0x1A0] + PROLOG_NOP ldp q28, q29, [sp, #0x1C0] + PROLOG_NOP ldp q30, q31, [sp, #0x1E0] + + EPILOG_STACK_FREE EXTRA_SAVE_SIZE + MEND + +;; +;; Macro to clear the hijack state. This is safe to do because the suspension code will not Unhijack this +;; thread if it finds it at an IP that isn't managed code. +;; +;; Register state on entry: +;; x2: thread pointer +;; +;; Register state on exit: +;; + MACRO + ClearHijackState + + ASSERT OFFSETOF__Thread__m_pvHijackedReturnAddress == (OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation + 8) + ;; Clear m_ppvHijackedReturnAddressLocation and m_pvHijackedReturnAddress + stp xzr, xzr, [x2, #OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation] + ;; Clear m_uHijackedReturnValueFlags + str xzr, [x2, #OFFSETOF__Thread__m_uHijackedReturnValueFlags] + MEND + +;; +;; The prolog for all GC suspension hijacks (normal and stress). Fixes up the hijacked return address, and +;; clears the hijack state. +;; +;; Register state on entry: +;; All registers correct for return to the original return address. +;; +;; Register state on exit: +;; x2: thread pointer +;; x3: trashed +;; x12: transition frame flags for the return registers x0 and x1 +;; + MACRO + FixupHijackedCallstack + + ;; x2 <- GetThread(), TRASHES x3 + INLINE_GETTHREAD x2, x3 + + ;; + ;; Fix the stack by restoring the original return address + ;; + ASSERT OFFSETOF__Thread__m_uHijackedReturnValueFlags == (OFFSETOF__Thread__m_pvHijackedReturnAddress + 8) + ;; Load m_pvHijackedReturnAddress and m_uHijackedReturnValueFlags + ldp lr, x12, [x2, #OFFSETOF__Thread__m_pvHijackedReturnAddress] + + ClearHijackState + MEND + +;; +;; Set the Thread state and wait for a GC to complete. +;; +;; Register state on entry: +;; x4: thread pointer +;; +;; Register state on exit: +;; x4: thread pointer +;; All other registers trashed +;; + + EXTERN RhpWaitForGCNoAbort + + MACRO + WaitForGCCompletion + + ldr w2, [x4, #OFFSETOF__Thread__m_ThreadStateFlags] + tst w2, #TSF_SuppressGcStress__OR__TSF_DoNotTriggerGC + bne %ft0 + + ldr x9, [x4, #OFFSETOF__Thread__m_pHackPInvokeTunnel] + bl RhpWaitForGCNoAbort +0 + MEND + + MACRO + HijackTargetFakeProlog + + ;; This is a fake entrypoint for the method that 'tricks' the OS into calling our personality routine. + ;; The code here should never be executed, and the unwind info is bogus, but we don't mind since the + ;; stack is broken by the hijack anyway until after we fix it below. + PROLOG_SAVE_REG_PAIR fp, lr, #-0x10! + nop ; We also need a nop here to simulate the implied bl instruction. Without + ; this, an OS-applied -4 will back up into the method prolog and the unwind + ; will not be applied as desired. + + MEND + +;; +;; +;; +;; GC Probe Hijack targets +;; +;; + EXTERN RhpPInvokeExceptionGuard + + NESTED_ENTRY RhpGcProbeHijackWrapper, .text, RhpPInvokeExceptionGuard + HijackTargetFakeProlog + + LABELED_RETURN_ADDRESS RhpGcProbeHijack + + FixupHijackedCallstack + orr x12, x12, #DEFAULT_FRAME_SAVE_FLAGS + b RhpGcProbe + NESTED_END RhpGcProbeHijackWrapper + +#ifdef FEATURE_GC_STRESS +;; +;; +;; GC Stress Hijack targets +;; +;; + LEAF_ENTRY RhpGcStressHijack + FixupHijackedCallstack + orr x12, x12, #DEFAULT_FRAME_SAVE_FLAGS + b RhpGcStressProbe + LEAF_END RhpGcStressHijack +;; +;; Worker for our GC stress probes. Do not call directly!! +;; Instead, go through RhpGcStressHijack{Scalar|Object|Byref}. +;; This worker performs the GC Stress work and returns to the original return address. +;; +;; Register state on entry: +;; x0: hijacked function return value +;; x1: hijacked function return value +;; x2: thread pointer +;; w12: register bitmask +;; +;; Register state on exit: +;; Scratch registers, except for x0, have been trashed +;; All other registers restored as they were when the hijack was first reached. +;; + NESTED_ENTRY RhpGcStressProbe + PROLOG_PROBE_FRAME x2, x3, x12, + + bl $REDHAWKGCINTERFACE__STRESSGC + + EPILOG_PROBE_FRAME + NESTED_END RhpGcStressProbe +#endif ;; FEATURE_GC_STRESS + + LEAF_ENTRY RhpGcProbe + ldr x3, =RhpTrapThreads + ldr w3, [x3] + tbnz x3, #TrapThreadsFlags_TrapThreads_Bit, RhpGcProbeRare + ret + LEAF_END RhpGcProbe + + EXTERN RhpThrowHwEx + + NESTED_ENTRY RhpGcProbeRare + PROLOG_PROBE_FRAME x2, x3, x12, + + mov x4, x2 + WaitForGCCompletion + + ldr x2, [sp, #OFFSETOF__PInvokeTransitionFrame__m_Flags] + tbnz x2, #PTFF_THREAD_ABORT_BIT, %F1 + + EPILOG_PROBE_FRAME + +1 + FREE_PROBE_FRAME 0, {true} + EPILOG_NOP mov w0, #STATUS_REDHAWK_THREAD_ABORT + EPILOG_NOP mov x1, lr ;; return address as exception PC + EPILOG_NOP b RhpThrowHwEx + NESTED_END RhpGcProbeRare + + LEAF_ENTRY RhpGcPoll + brk 0xf000 ;; TODO: remove after debugging/testing stub + ; @todo: I'm assuming it's not OK to trash any register here. If that's not true we can optimize the + ; push/pops out of this fast path. + str x0, [sp], #-0x10! + ldr x0, =RhpTrapThreads + ldr w0, [x0] + tbnz x0, #TrapThreadsFlags_TrapThreads_Bit, %F0 + ldr x0, [sp], #0x10! + ret +0 + ldr x0, [sp], #0x10! + b RhpGcPollRare + LEAF_END RhpGcPoll + + NESTED_ENTRY RhpGcPollRare + brk 0xf000 ;; TODO: remove after debugging/testing stub + PROLOG_PROBE_FRAME |, x3, #PROBE_SAVE_FLAGS_EVERYTHING, 0 + + ; Unhijack this thread, if necessary. + INLINE_THREAD_UNHIJACK x2, x0, x1 ;; trashes x0, x1 + + mov x4, x2 + WaitForGCCompletion + + EPILOG_PROBE_FRAME + NESTED_END RhpGcPollRare + + LEAF_ENTRY RhpGcPollStress + ; + ; loop hijacking is used instead + ; + brk 0xf000 + + LEAF_END RhpGcPollStress + + +#ifdef FEATURE_GC_STRESS + NESTED_ENTRY RhpHijackForGcStress + ;; This function should be called from right before epilog + + ;; Push FP and LR, and allocate stack to hold PAL_LIMITED_CONTEXT structure and VFP return value registers + PROLOG_SAVE_REG_PAIR fp, lr, #-(SIZEOF__PAL_LIMITED_CONTEXT + 0x20)! + + ;; + ;; Setup a PAL_LIMITED_CONTEXT that looks like what you'd get if you had suspended this thread at the + ;; IP after the call to this helper. + ;; + ;; This is very likely overkill since the calculation of the return address should only need SP and + ;; LR, but this is test code, so I'm not too worried about efficiency. + ;; + ;; Setup a PAL_LIMITED_CONTEXT on the stack + ;; { + ;; FP and LR already pushed. + PROLOG_NOP stp x0, x1, [sp, #0x10] + PROLOG_SAVE_REG_PAIR x19, x20, #0x20 + PROLOG_SAVE_REG_PAIR x21, x22, #0x30 + PROLOG_SAVE_REG_PAIR x23, x24, #0x40 + PROLOG_SAVE_REG_PAIR x25, x26, #0x50 + PROLOG_SAVE_REG_PAIR x27, x28, #0x60 + PROLOG_SAVE_REG lr, #0x78 + + ;; } end PAL_LIMITED_CONTEXT + + ;; Save VFP return value + stp d0, d1, [sp, #(SIZEOF__PAL_LIMITED_CONTEXT + 0x00)] + stp d2, d3, [sp, #(SIZEOF__PAL_LIMITED_CONTEXT + 0x10)] + + ;; Compute and save SP at callsite. + add x0, sp, #(SIZEOF__PAL_LIMITED_CONTEXT + 0x20) ;; +0x20 for the pushes right before the context struct + str x0, [sp, #OFFSETOF__PAL_LIMITED_CONTEXT__SP] + + mov x0, sp ; Address of PAL_LIMITED_CONTEXT + bl $THREAD__HIJACKFORGCSTRESS + + ;; Restore return value registers (saved in PAL_LIMITED_CONTEXT structure) + ldp x0, x1, [sp, #0x10] + + ;; Restore VFP return value + ldp d0, d1, [sp, #(SIZEOF__PAL_LIMITED_CONTEXT + 0x00)] + ldp d2, d3, [sp, #(SIZEOF__PAL_LIMITED_CONTEXT + 0x10)] + + ;; Epilog + EPILOG_RESTORE_REG_PAIR x19, x20, #0x20 + EPILOG_RESTORE_REG_PAIR x21, x22, #0x30 + EPILOG_RESTORE_REG_PAIR x23, x24, #0x40 + EPILOG_RESTORE_REG_PAIR x25, x26, #0x50 + EPILOG_RESTORE_REG_PAIR x27, x28, #0x60 + EPILOG_RESTORE_REG_PAIR fp, lr, #(SIZEOF__PAL_LIMITED_CONTEXT + 0x20)! + EPILOG_RETURN + + NESTED_END RhpHijackForGcStress + + NESTED_ENTRY RhpHijackForGcStressLeaf + ;; This should be jumped to, right before epilog + ;; x9 has the return address (we don't care about trashing scratch regs at this point) + + ;; Push FP and LR, and allocate stack to hold PAL_LIMITED_CONTEXT structure and VFP return value registers + PROLOG_SAVE_REG_PAIR fp, lr, #-(SIZEOF__PAL_LIMITED_CONTEXT + 0x20)! + + ;; + ;; Setup a PAL_LIMITED_CONTEXT that looks like what you'd get if you had suspended this thread at the + ;; IP after the call to this helper. + ;; + ;; This is very likely overkill since the calculation of the return address should only need SP and + ;; LR, but this is test code, so I'm not too worried about efficiency. + ;; + ;; Setup a PAL_LIMITED_CONTEXT on the stack + ;; { + ;; FP and LR already pushed. + PROLOG_NOP stp x0, x1, [sp, #0x10] + PROLOG_SAVE_REG_PAIR x19, x20, #0x20 + PROLOG_SAVE_REG_PAIR x21, x22, #0x30 + PROLOG_SAVE_REG_PAIR x23, x24, #0x40 + PROLOG_SAVE_REG_PAIR x25, x26, #0x50 + PROLOG_SAVE_REG_PAIR x27, x28, #0x60 + ; PROLOG_SAVE_REG macro doesn't let to use scratch reg: + PROLOG_NOP str x9, [sp, #0x78] ; this is return address from RhpHijackForGcStress; lr is return address for it's caller + + ;; } end PAL_LIMITED_CONTEXT + + ;; Save VFP return value + stp d0, d1, [sp, #(SIZEOF__PAL_LIMITED_CONTEXT + 0x00)] + stp d2, d3, [sp, #(SIZEOF__PAL_LIMITED_CONTEXT + 0x10)] + + ;; Compute and save SP at callsite. + add x0, sp, #(SIZEOF__PAL_LIMITED_CONTEXT + 0x20) ;; +0x20 for the pushes right before the context struct + str x0, [sp, #OFFSETOF__PAL_LIMITED_CONTEXT__SP] + + mov x0, sp ; Address of PAL_LIMITED_CONTEXT + bl $THREAD__HIJACKFORGCSTRESS + + ;; Restore return value registers (saved in PAL_LIMITED_CONTEXT structure) + ldp x0, x1, [sp, #0x10] + + ;; Restore VFP return value + ldp d0, d1, [sp, #(SIZEOF__PAL_LIMITED_CONTEXT + 0x00)] + ldp d2, d3, [sp, #(SIZEOF__PAL_LIMITED_CONTEXT + 0x10)] + + ;; Epilog + EPILOG_RESTORE_REG_PAIR x19, x20, #0x20 + EPILOG_RESTORE_REG_PAIR x21, x22, #0x30 + EPILOG_RESTORE_REG_PAIR x23, x24, #0x40 + EPILOG_RESTORE_REG_PAIR x25, x26, #0x50 + EPILOG_RESTORE_REG_PAIR x27, x28, #0x60 + EPILOG_NOP ldr x9, [sp, #0x78] + EPILOG_RESTORE_REG_PAIR fp, lr, #(SIZEOF__PAL_LIMITED_CONTEXT + 0x20)! + EPILOG_NOP br x9 + + NESTED_END RhpHijackForGcStressLeaf + +#endif ;; FEATURE_GC_STRESS + +#if 0 // used by the binder only +;; +;; The following functions are _jumped_ to when we need to transfer control from one method to another for EH +;; dispatch. These are needed to properly coordinate with the GC hijacking logic. We are essentially replacing +;; the return from the throwing method with a jump to the handler in the caller, but we need to be aware of +;; any return address hijack that may be in place for GC suspension. These routines use a quick test of the +;; return address against a specific GC hijack routine, and then fixup the stack pointer to what it would be +;; after a real return from the throwing method. Then, if we are not hijacked we can simply jump to the +;; handler in the caller. +;; +;; If we are hijacked, then we jump to a routine that will unhijack appropriately and wait for the GC to +;; complete. There are also variants for GC stress. +;; +;; Note that at this point we are either hijacked or we are not, and this will not change until we return to +;; managed code. It is an invariant of the system that a thread will only attempt to hijack or unhijack +;; another thread while the target thread is suspended in managed code, and this is _not_ managed code. +;; + MACRO + RTU_EH_JUMP_HELPER $funcName, $hijackFuncName, $isStress, $stressFuncName + + LEAF_ENTRY $funcName + ldr x0, =$hijackFuncName + cmp x0, lr + beq RhpGCProbeForEHJump + + IF $isStress + ldr x0, =$stressFuncName + cmp x0, lr + beq RhpGCStressProbeForEHJump + ENDIF + + ;; We are not hijacked, so we can return to the handler. + ;; We return to keep the call/return prediction balanced. + mov lr, x2 ; Update the return address + ret + LEAF_END $funcName + MEND +;; We need an instance of the helper for each possible hijack function. The binder has enough +;; information to determine which one we need to use for any function. + RTU_EH_JUMP_HELPER RhpEHJumpScalar, RhpGcProbeHijack, {false}, 0 + RTU_EH_JUMP_HELPER RhpEHJumpObject, RhpGcProbeHijack, {false}, 0 + RTU_EH_JUMP_HELPER RhpEHJumpByref, RhpGcProbeHijack, {false}, 0 +#ifdef FEATURE_GC_STRESS + RTU_EH_JUMP_HELPER RhpEHJumpScalarGCStress, RhpGcProbeHijack, {true}, RhpGcStressHijack + RTU_EH_JUMP_HELPER RhpEHJumpObjectGCStress, RhpGcProbeHijack, {true}, RhpGcStressHijack + RTU_EH_JUMP_HELPER RhpEHJumpByrefGCStress, RhpGcProbeHijack, {true}, RhpGcStressHijack +#endif + +;; +;; Macro to setup our frame and adjust the location of the EH object reference for EH jump probe funcs. +;; +;; Register state on entry: +;; x0: scratch +;; x1: reference to the exception object. +;; x2: handler address we want to jump to. +;; Non-volatile registers are all already correct for return to the caller. +;; The stack is as if we are just about to returned from the call +;; +;; Register state on exit: +;; x0: reference to the exception object +;; x2: thread pointer +;; + MACRO + EHJumpProbeProlog + + PROLOG_NOP mov x0, x1 ; move the ex object reference into x0 so we can report it + ALLOC_PROBE_FRAME 0x10, {true} + str x2, [sp, #PROBE_FRAME_SIZE] + + ;; x2 <- GetThread(), TRASHES x1 + INLINE_GETTHREAD x2, x1 + + ;; Recover the original return address and update the frame + ldr lr, [x2, #OFFSETOF__Thread__m_pvHijackedReturnAddress] + str lr, [sp, #OFFSETOF__PInvokeTransitionFrame__m_RIP] + + ;; ClearHijackState expects thread in x2 + ClearHijackState + + ; TRASHES x1 + INIT_PROBE_FRAME x2, x1, #(DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_X0), PTFF_X0_IS_GCREF_HI, (PROBE_FRAME_SIZE + 8) + add x1, sp, xzr + str x1, [x2, #OFFSETOF__Thread__m_pHackPInvokeTunnel] + MEND + +;; +;; Macro to re-adjust the location of the EH object reference, cleanup the frame, and make the +;; final jump to the handler for EH jump probe funcs. +;; +;; Register state on entry: +;; x0: reference to the exception object +;; x1-x3: scratch +;; +;; Register state on exit: +;; sp: correct for return to the caller +;; x1: reference to the exception object +;; + MACRO + EHJumpProbeEpilog + + ldr x2, [sp, #PROBE_FRAME_SIZE] + FREE_PROBE_FRAME 0x10, {true} ; This restores exception object back into x0 + EPILOG_NOP mov x1, x0 ; Move the Exception object back into x1 where the catch handler expects it + EPILOG_NOP br x2 + MEND + +;; +;; We are hijacked for a normal GC (not GC stress), so we need to unhijack and wait for the GC to complete. +;; +;; Register state on entry: +;; x0: reference to the exception object. +;; x2: thread +;; Non-volatile registers are all already correct for return to the caller. +;; The stack is as if we have tail called to this function (lr points to return address). +;; +;; Register state on exit: +;; x0: reference to the exception object +;; + NESTED_ENTRY RhpGCProbeForEHJump + brk 0xf000 ;; TODO: remove after debugging/testing stub + EHJumpProbeProlog + +#ifdef _DEBUG + ;; + ;; If we get here, then we have been hijacked for a real GC, and our SyncState must + ;; reflect that we've been requested to synchronize. + + ldr x1, =RhpTrapThreads + ldr w1, [x1] + tbnz x1, #TrapThreadsFlags_TrapThreads_Bit, %0 + + bl RhDebugBreak +0 +#endif ;; _DEBUG + + mov x4, x2 + WaitForGCCompletion + + EHJumpProbeEpilog + NESTED_END RhpGCProbeForEHJump + +#ifdef FEATURE_GC_STRESS +;; +;; We are hijacked for GC Stress (not a normal GC) so we need to invoke the GC stress helper. +;; +;; Register state on entry: +;; x1: reference to the exception object. +;; x2: thread +;; Non-volatile registers are all already correct for return to the caller. +;; The stack is as if we have tail called to this function (lr points to return address). +;; +;; Register state on exit: +;; x0: reference to the exception object +;; + NESTED_ENTRY RhpGCStressProbeForEHJump + brk 0xf000 ;; TODO: remove after debugging/testing stub + EHJumpProbeProlog + + bl $REDHAWKGCINTERFACE__STRESSGC + + EHJumpProbeEpilog + NESTED_END RhpGCStressProbeForEHJump +#endif ;; FEATURE_GC_STRESS +#endif ;; 0 + +#ifdef FEATURE_GC_STRESS +;; +;; INVARIANT: Don't trash the argument registers, the binder codegen depends on this. +;; + LEAF_ENTRY RhpSuppressGcStress + INLINE_GETTHREAD x9, x10 + add x9, x9, #OFFSETOF__Thread__m_ThreadStateFlags +Retry + ldxr w10, [x9] + orr w10, w10, #TSF_SuppressGcStress + stxr w11, w10, [x9] + cbz w11, Success + b Retry + +Success + ret + LEAF_END RhpSuppressGcStress +#endif ;; FEATURE_GC_STRESS + + INLINE_GETTHREAD_CONSTANT_POOL + + end + diff --git a/src/coreclr/src/nativeaot/Runtime/arm64/GetThread.asm b/src/coreclr/src/nativeaot/Runtime/arm64/GetThread.asm new file mode 100644 index 0000000000000..7c01e66453385 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/arm64/GetThread.asm @@ -0,0 +1,29 @@ +;; Licensed to the .NET Foundation under one or more agreements. +;; The .NET Foundation licenses this file to you under the MIT license. + +#include "AsmMacros.h" + + TEXTAREA + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; RhpGetThread +;; +;; +;; INPUT: none +;; +;; OUTPUT: x9: Thread pointer +;; +;; MUST PRESERVE ARGUMENT REGISTERS +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + LEAF_ENTRY RhpGetThread + ;; x9 = GetThread(), TRASHES xip0 (which can be used as an intra-procedure-call scratch register) + INLINE_GETTHREAD x9, xip0 + ret + LEAF_END +FASTCALL_ENDFUNC + + INLINE_GETTHREAD_CONSTANT_POOL + + end diff --git a/src/coreclr/src/nativeaot/Runtime/arm64/Interlocked.S b/src/coreclr/src/nativeaot/Runtime/arm64/Interlocked.S new file mode 100644 index 0000000000000..755b5fd3d302b --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/arm64/Interlocked.S @@ -0,0 +1,42 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include + +// WARNING: Code in EHHelpers.cpp makes assumptions about this helper, in particular: +// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at RhpLockCmpXchg32AVLocation +// - Function "UnwindWriteBarrierToCaller" assumes the stack contains just the pushed return address +// x0 = destination address +// w1 = value +// w2 = comparand +LEAF_ENTRY RhpLockCmpXchg32, _TEXT + mov x8, x0 // Save value of x0 into x8 as x0 is used for the return value +ALTERNATE_ENTRY RhpLockCmpXchg32AVLocation +1: // loop + ldaxr w0, [x8] // w0 = *x8 + cmp w0, w2 + bne 2f // if (w0 != w2) goto exit + stlxr w9, w1, [x8] // if (w0 == w2) { try *x8 = w1 and goto loop if failed or goto exit } + cbnz w9, 1b +2: // exit + ret +LEAF_END RhpLockCmpXchg32, _TEXT + +// WARNING: Code in EHHelpers.cpp makes assumptions about this helper, in particular: +// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at RhpLockCmpXchg64AVLocation +// - Function "UnwindWriteBarrierToCaller" assumes the stack contains just the pushed return address +// x0 = destination address +// x1 = value +// x2 = comparand +LEAF_ENTRY RhpLockCmpXchg64, _TEXT + mov x8, x0 // Save value of x0 into x8 as x0 is used for the return value +ALTERNATE_ENTRY RhpLockCmpXchg64AVLocation +1: // loop + ldaxr x0, [x8] // x0 = *x8 + cmp x0, x2 + bne 2f // if (x0 != x2) goto exit + stlxr w9, x1, [x8] // if (x0 == x2) { try *x8 = x1 and goto loop if failed or goto exit } + cbnz w9, 1b +2: // exit + ret +LEAF_END RhpLockCmpXchg64, _TEXT diff --git a/src/coreclr/src/nativeaot/Runtime/arm64/InteropThunksHelpers.S b/src/coreclr/src/nativeaot/Runtime/arm64/InteropThunksHelpers.S new file mode 100644 index 0000000000000..876f2dfbcb80d --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/arm64/InteropThunksHelpers.S @@ -0,0 +1,4 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// TODO: Implement diff --git a/src/coreclr/src/nativeaot/Runtime/arm64/InteropThunksHelpers.asm b/src/coreclr/src/nativeaot/Runtime/arm64/InteropThunksHelpers.asm new file mode 100644 index 0000000000000..bd9cbb4e882c1 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/arm64/InteropThunksHelpers.asm @@ -0,0 +1,91 @@ +;; Licensed to the .NET Foundation under one or more agreements. +;; The .NET Foundation licenses this file to you under the MIT license. + + +#include "ksarm64.h" + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; DATA SECTIONS ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +__tls_array equ 0x58 ;; offsetof(TEB, ThreadLocalStoragePointer) + +POINTER_SIZE equ 0x08 + +;; TLS variables + AREA |.tls$|, DATA +ThunkParamSlot % 0x8 + + TEXTAREA + + EXTERN _tls_index + + ;; Section relocs are 32 bits. Using an extra DCD initialized to zero for 8-byte alignment. +__SECTIONREL_ThunkParamSlot + DCD ThunkParamSlot + RELOC 8, ThunkParamSlot ;; SECREL + DCD 0 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; Interop Thunks Helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ;; + ;; RhCommonStub + ;; + ;; INPUT: xip0: thunk's data block + ;; + ;; TRASHES: x9, x10, x11, xip0 + ;; + LEAF_ENTRY RhCommonStub + ;; There are arbitrary callers passing arguments with arbitrary signatures. + ;; Custom calling convention: + ;; xip0 pointer to the current thunk's data block (data contains 2 pointer values: context + target pointers) + + ;; Save context data into the ThunkParamSlot thread-local variable + ;; A pointer to the delegate and function pointer for open static delegate should have been saved in the thunk's context cell during thunk allocation + ldr x10, =_tls_index + ldr w10, [x10] + ldr x9, [xpr, #__tls_array] + ldr x9, [x9, x10 lsl #3] ;; x9 <- our TLS base + + ;; x9 = base address of TLS data + ;; x10 = trashed + ;; xip0 = address of context cell in thunk's data + + ;; store thunk address in thread static + ldr x10, [xip0] + ldr x11, =__SECTIONREL_ThunkParamSlot + ldr x11, [x11] + str x10, [x9, x11] ;; ThunkParamSlot <- context slot data + + ;; Now load the target address and jump to it. + ldr xip0, [xip0, #POINTER_SIZE] + br xip0 + + LEAF_END RhCommonStub + + ;; + ;; IntPtr RhGetCommonStubAddress() + ;; + LEAF_ENTRY RhGetCommonStubAddress + ldr x0, =RhCommonStub + ret + LEAF_END RhGetCommonStubAddress + + + ;; + ;; IntPtr RhGetCurrentThunkContext() + ;; + LEAF_ENTRY RhGetCurrentThunkContext + + ldr x1, =_tls_index + ldr w1, [x1] + ldr x0, [xpr, #__tls_array] + ldr x0, [x0, x1 lsl #3] ;; x0 <- our TLS base + + ldr x1, =__SECTIONREL_ThunkParamSlot + ldr x1, [x1] + ldr x0, [x0, x1] ;; x0 <- ThunkParamSlot + + ret + + LEAF_END RhGetCurrentThunkContext + + END diff --git a/src/coreclr/src/nativeaot/Runtime/arm64/MiscStubs.S b/src/coreclr/src/nativeaot/Runtime/arm64/MiscStubs.S new file mode 100644 index 0000000000000..53616c2269615 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/arm64/MiscStubs.S @@ -0,0 +1,2 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. diff --git a/src/coreclr/src/nativeaot/Runtime/arm64/MiscStubs.asm b/src/coreclr/src/nativeaot/Runtime/arm64/MiscStubs.asm new file mode 100644 index 0000000000000..85c5d1e2ffd34 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/arm64/MiscStubs.asm @@ -0,0 +1,244 @@ +;; Licensed to the .NET Foundation under one or more agreements. +;; The .NET Foundation licenses this file to you under the MIT license. + +#include "AsmMacros.h" + + EXTERN memcpy + EXTERN memcpyGCRefs + EXTERN memcpyGCRefsWithWriteBarrier + EXTERN memcpyAnyWithWriteBarrier + EXTERN GetClasslibCCtorCheck + + TEXTAREA + +;; +;; Checks whether the static class constructor for the type indicated by the context structure has been +;; executed yet. If not the classlib is called via their CheckStaticClassConstruction callback which will +;; execute the cctor and update the context to record this fact. +;; +;; Input: +;; x0 : Address of StaticClassConstructionContext structure +;; +;; Output: +;; All volatile registers and the condition codes may be trashed. +;; + LEAF_ENTRY RhpCheckCctor + + ;; Check the m_initialized field of the context. The cctor has been run only if this equals 1 (the + ;; initial state is 0 and the remaining values are reserved for classlib use). This check is + ;; unsynchronized; if we go down the slow path and call the classlib then it is responsible for + ;; synchronizing with other threads and re-checking the value. + ldr w12, [x0, #OFFSETOF__StaticClassConstructionContext__m_initialized] + cmp w12, #1 + bne RhpCheckCctor__SlowPath + ret +RhpCheckCctor__SlowPath + mov x1, x0 + b RhpCheckCctor2 ; tail-call the check cctor helper that actually has an implementation to call + ; the cctor + + LEAF_END RhpCheckCctor + +;; +;; Checks whether the static class constructor for the type indicated by the context structure has been +;; executed yet. If not the classlib is called via their CheckStaticClassConstruction callback which will +;; execute the cctor and update the context to record this fact. +;; +;; Input: +;; x0 : Value that must be preserved in this register across the cctor check. +;; x1 : Address of StaticClassConstructionContext structure +;; +;; Output: +;; All volatile registers other than x0 may be trashed and the condition codes may also be trashed. +;; + LEAF_ENTRY RhpCheckCctor2 + + ;; Check the m_initialized field of the context. The cctor has been run only if this equals 1 (the + ;; initial state is 0 and the remaining values are reserved for classlib use). This check is + ;; unsynchronized; if we go down the slow path and call the classlib then it is responsible for + ;; synchronizing with other threads and re-checking the value. + ldr w12, [x1, #OFFSETOF__StaticClassConstructionContext__m_initialized] + cmp w12, #1 + bne RhpCheckCctor2__SlowPath + ret + + LEAF_END RhpCheckCctor2 + +;; +;; Slow path helper for RhpCheckCctor. +;; +;; Input: +;; x0 : Value that must be preserved in this register across the cctor check. +;; x1 : Address of StaticClassConstructionContext structure +;; +;; Output: +;; All volatile registers other than x0 may be trashed and the condition codes may also be trashed. +;; + NESTED_ENTRY RhpCheckCctor2__SlowPath + + ;; Need to preserve x0, x1 and lr across helper call. fp is also pushed to keep the stack 16 byte aligned. + PROLOG_SAVE_REG_PAIR fp, lr, #-0x20! + stp x0, x1, [sp, #0x10] + + ;; Call a C++ helper to retrieve the address of the classlib callback. The caller's return address is + ;; passed as the argument to the helper; it's an address in the module and is used by the helper to + ;; locate the classlib. + mov x0, lr + bl GetClasslibCCtorCheck + + ;; X0 now contains the address of the classlib method to call. The single argument is the context + ;; structure address currently in stashed on the stack. Clean up and tail call to the classlib + ;; callback so we're not on the stack should a GC occur (so we don't need to worry about transition + ;; frames). + mov x12, x0 + ldp x0, x1, [sp, #0x10] + EPILOG_RESTORE_REG_PAIR fp, lr, #0x20! + ;; tail-call the class lib cctor check function. This function is required to return its first + ;; argument, so that x0 can be preserved. + EPILOG_NOP br x12 + + NESTED_END RhpCheckCctor__SlowPath2 + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; void* RhpCopyMultibyteNoGCRefs(void*, void*, size_t) +;; +;; The purpose of this wrapper is to hoist the potential null reference exceptions of copying memory up to a place where +;; the stack unwinder and exception dispatch can properly transform the exception into a managed exception and dispatch +;; it to managed code. +;; + + LEAF_ENTRY RhpCopyMultibyteNoGCRefs + + ; x0 dest + ; x1 src + ; x2 count + + cbz x2, NothingToCopy_NoGCRefs ; check for a zero-length copy + + ; Now check the dest and src pointers. If they AV, the EH subsystem will recognize the address of the AV, + ; unwind the frame, and fixup the stack to make it look like the (managed) caller AV'ed, which will be + ; translated to a managed exception as usual. + ALTERNATE_ENTRY RhpCopyMultibyteNoGCRefsDestAVLocation + ldrb wzr, [x0] + ALTERNATE_ENTRY RhpCopyMultibyteNoGCRefsSrcAVLocation + ldrb wzr, [x1] + + ; tail-call to plain-old-memcpy + b memcpy + +NothingToCopy_NoGCRefs + ; dest is already in x0 + ret + + LEAF_END + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; void* RhpCopyMultibyte(void*, void*, size_t) +;; +;; The purpose of this wrapper is to hoist the potential null reference exceptions of copying memory up to a place where +;; the stack unwinder and exception dispatch can properly transform the exception into a managed exception and dispatch +;; it to managed code. +;; + + LEAF_ENTRY RhpCopyMultibyte + + ; x0 dest + ; x1 src + ; x2 count + + ; check for a zero-length copy + cbz x2, NothingToCopy_RhpCopyMultibyte + + ; Now check the dest and src pointers. If they AV, the EH subsystem will recognize the address of the AV, + ; unwind the frame, and fixup the stack to make it look like the (managed) caller AV'ed, which will be + ; translated to a managed exception as usual. + ALTERNATE_ENTRY RhpCopyMultibyteDestAVLocation + ldrb wzr, [x0] + ALTERNATE_ENTRY RhpCopyMultibyteSrcAVLocation + ldrb wzr, [x1] + + ; tail-call to the GC-safe memcpy implementation + b memcpyGCRefs + +NothingToCopy_RhpCopyMultibyte + ; dest is already still in x0 + ret + + LEAF_END + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; void* RhpCopyMultibyteWithWriteBarrier(void*, void*, size_t) +;; +;; The purpose of this wrapper is to hoist the potential null reference exceptions of copying memory up to a place where +;; the stack unwinder and exception dispatch can properly transform the exception into a managed exception and dispatch +;; it to managed code. +;; Runs a card table update via RhpBulkWriteBarrier after the copy +;; + + LEAF_ENTRY RhpCopyMultibyteWithWriteBarrier + + ; x0 dest + ; x1 src + ; x2 count + + ; check for a zero-length copy + cbz x2, NothingToCopy_RhpCopyMultibyteWithWriteBarrier + + ; Now check the dest and src pointers. If they AV, the EH subsystem will recognize the address of the AV, + ; unwind the frame, and fixup the stack to make it look like the (managed) caller AV'ed, which will be + ; translated to a managed exception as usual. + ALTERNATE_ENTRY RhpCopyMultibyteWithWriteBarrierDestAVLocation + ldrb wzr, [x0] + ALTERNATE_ENTRY RhpCopyMultibyteWithWriteBarrierSrcAVLocation + ldrb wzr, [x1] + + ; tail-call to the GC-safe memcpy implementation + b memcpyGCRefsWithWriteBarrier + +NothingToCopy_RhpCopyMultibyteWithWriteBarrier + ; dest is already still in x0 + ret + LEAF_END + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; void* RhpCopyAnyWithWriteBarrier(void*, void*, size_t) +;; +;; The purpose of this wrapper is to hoist the potential null reference exceptions of copying memory up to a place where +;; the stack unwinder and exception dispatch can properly transform the exception into a managed exception and dispatch +;; it to managed code. +;; Runs a card table update via RhpBulkWriteBarrier after the copy if it contained GC pointers +;; + + LEAF_ENTRY RhpCopyAnyWithWriteBarrier + + ; x0 dest + ; x1 src + ; x2 count + + ; check for a zero-length copy + cbz x2, NothingToCopy_RhpCopyAnyWithWriteBarrier + + ; Now check the dest and src pointers. If they AV, the EH subsystem will recognize the address of the AV, + ; unwind the frame, and fixup the stack to make it look like the (managed) caller AV'ed, which will be + ; translated to a managed exception as usual. + ALTERNATE_ENTRY RhpCopyAnyWithWriteBarrierDestAVLocation + ldrb wzr, [x0] + ALTERNATE_ENTRY RhpCopyAnyWithWriteBarrierSrcAVLocation + ldrb wzr, [x1] + + ; tail-call to the GC-safe memcpy implementation + b memcpyAnyWithWriteBarrier + +NothingToCopy_RhpCopyAnyWithWriteBarrier + ; dest is already still in x0 + ret + + LEAF_END + + end diff --git a/src/coreclr/src/nativeaot/Runtime/arm64/PInvoke.S b/src/coreclr/src/nativeaot/Runtime/arm64/PInvoke.S new file mode 100644 index 0000000000000..876f2dfbcb80d --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/arm64/PInvoke.S @@ -0,0 +1,4 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// TODO: Implement diff --git a/src/coreclr/src/nativeaot/Runtime/arm64/PInvoke.asm b/src/coreclr/src/nativeaot/Runtime/arm64/PInvoke.asm new file mode 100644 index 0000000000000..e4db7d65cb4f3 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/arm64/PInvoke.asm @@ -0,0 +1,301 @@ +;; Licensed to the .NET Foundation under one or more agreements. +;; The .NET Foundation licenses this file to you under the MIT license. + +#include "AsmMacros.h" + + TEXTAREA + + IMPORT RhpReversePInvokeBadTransition + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; RhpWaitForSuspend -- rare path for RhpPInvoke and RhpReversePInvokeReturn +;; +;; +;; INPUT: none +;; +;; TRASHES: none +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + NESTED_ENTRY RhpWaitForSuspend + + ;; FP and LR registers + PROLOG_SAVE_REG_PAIR fp, lr, #-0xA0! ;; Push down stack pointer and store FP and LR + + ;; Need to save argument registers x0-x7 and the return buffer register x8 + ;; Also save x9 which may be used for saving indirect call target + stp x0, x1, [sp, #0x10] + stp x2, x3, [sp, #0x20] + stp x4, x5, [sp, #0x30] + stp x6, x7, [sp, #0x40] + stp x8, x9, [sp, #0x50] + + ;; Save float argument registers as well since they're volatile + stp d0, d1, [sp, #0x60] + stp d2, d3, [sp, #0x70] + stp d4, d5, [sp, #0x80] + stp d6, d7, [sp, #0x90] + + bl RhpWaitForSuspend2 + + ;; Restore floating point registers + ldp d0, d1, [sp, #0x60] + ldp d2, d3, [sp, #0x70] + ldp d4, d5, [sp, #0x80] + ldp d6, d7, [sp, #0x90] + + ;; Restore the argument registers + ldp x0, x1, [sp, #0x10] + ldp x2, x3, [sp, #0x20] + ldp x4, x5, [sp, #0x30] + ldp x6, x7, [sp, #0x40] + ldp x8, x9, [sp, #0x50] + + ;; Restore FP and LR registers, and free the allocated stack block + EPILOG_RESTORE_REG_PAIR fp, lr, #0xA0! + EPILOG_RETURN + + NESTED_END RhpWaitForSuspend + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; RhpWaitForGCNoAbort +;; +;; +;; INPUT: x9: transition frame +;; +;; TRASHES: None +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + NESTED_ENTRY RhpWaitForGCNoAbort + + ;; FP and LR registers + PROLOG_SAVE_REG_PAIR fp, lr, #-0x40! ;; Push down stack pointer and store FP and LR + + ;; Save the integer return registers, as well as the floating return registers + stp x0, x1, [sp, #0x10] + stp d0, d1, [sp, #0x20] + stp d2, d3, [sp, #0x30] + + ldr x0, [x9, #OFFSETOF__PInvokeTransitionFrame__m_pThread] + ldr w0, [x0, #OFFSETOF__Thread__m_ThreadStateFlags] + tbnz x0, #TSF_DoNotTriggerGc_Bit, Done + + mov x0, x9 ; passing transition frame in x0 + bl RhpWaitForGC2 + +Done + ldp x0, x1, [sp, #0x10] + ldp d0, d1, [sp, #0x20] + ldp d2, d3, [sp, #0x30] + EPILOG_RESTORE_REG_PAIR fp, lr, #0x40! + EPILOG_RETURN + + NESTED_END RhpWaitForGCNoAbort + + EXTERN RhpThrowHwEx + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; RhpWaitForGC +;; +;; +;; INPUT: x9: transition frame +;; +;; TRASHES: x0, x1, x10 +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + NESTED_ENTRY RhpWaitForGC + + PROLOG_SAVE_REG_PAIR fp, lr, #-0x10! + + ldr x10, =RhpTrapThreads + ldr w10, [x10] + tbz x10, #TrapThreadsFlags_TrapThreads_Bit, NoWait + bl RhpWaitForGCNoAbort +NoWait + tbz x10, #TrapThreadsFlags_AbortInProgress_Bit, NoAbort + ldr x10, [x9, #OFFSETOF__PInvokeTransitionFrame__m_Flags] + tbz x10, #PTFF_THREAD_ABORT_BIT, NoAbort + + EPILOG_RESTORE_REG_PAIR fp, lr, #0x10! + EPILOG_NOP mov w0, #STATUS_REDHAWK_THREAD_ABORT + EPILOG_NOP mov x1, lr ; hijack target address as exception PC + EPILOG_NOP b RhpThrowHwEx + +NoAbort + EPILOG_RESTORE_REG_PAIR fp, lr, #0x10! + EPILOG_RETURN + + NESTED_END RhpWaitForGC + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; RhpReversePInvoke +;; +;; IN: x9: address of reverse pinvoke frame +;; 0: save slot for previous M->U transition frame +;; 8: save slot for thread pointer to avoid re-calc in epilog sequence +;; +;; PRESERVES: x0 - x8 -- need to preserve these because the caller assumes they aren't trashed +;; +;; TRASHES: x10, x11 +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + LEAF_ENTRY RhpReversePInvoke + + INLINE_GETTHREAD x10, x11 ; x10 = Thread, x11 trashed + str x10, [x9, #8] ; save Thread pointer for RhpReversePInvokeReturn + + ;; x9 = reverse pinvoke frame + ;; x10 = thread + ;; x11 = scratch + + ldr w11, [x10, #OFFSETOF__Thread__m_ThreadStateFlags] + tbz x11, #TSF_Attached_Bit, AttachThread + +ThreadAttached + ;; + ;; Check for the correct mode. This is accessible via various odd things that we cannot completely + ;; prevent such as : + ;; 1) Registering a reverse pinvoke entrypoint as a vectored exception handler + ;; 2) Performing a managed delegate invoke on a reverse pinvoke delegate. + ;; + ldr x11, [x10, #OFFSETOF__Thread__m_pTransitionFrame] + cbz x11, CheckBadTransition + + ;; Save previous TransitionFrame prior to making the mode transition so that it is always valid + ;; whenever we might attempt to hijack this thread. + str x11, [x9] + + str xzr, [x10, #OFFSETOF__Thread__m_pTransitionFrame] + dmb ish + + ldr x11, =RhpTrapThreads + ldr w11, [x11] + tbnz x11, #TrapThreadsFlags_TrapThreads_Bit, TrapThread + + ret + +CheckBadTransition + ;; Allow 'bad transitions' in when the TSF_DoNotTriggerGc mode is set. This allows us to have + ;; [UnmanagedCallersOnly] methods that are called via the "restricted GC callouts" as well as from native, + ;; which is necessary because the methods are CCW vtable methods on interfaces passed to native. + ldr w11, [x10, #OFFSETOF__Thread__m_ThreadStateFlags] + tbz x11, #TSF_DoNotTriggerGc_Bit, BadTransition + + ;; zero-out our 'previous transition frame' save slot + mov x11, #0 + str x11, [x9] + + ;; nothing more to do + ret + +TrapThread + ;; put the previous frame back (sets us back to preemptive mode) + ldr x11, [x9] + str x11, [x10, #OFFSETOF__Thread__m_pTransitionFrame] + dmb ish + +AttachThread + ; passing address of reverse pinvoke frame in x9 + b RhpReversePInvokeAttachOrTrapThread + +BadTransition + mov x0, lr ; arg <- return address + b RhpReversePInvokeBadTransition + + LEAF_END RhpReversePInvoke + + INLINE_GETTHREAD_CONSTANT_POOL + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; RhpReversePInvokeAttachOrTrapThread -- rare path for RhpPInvoke +;; +;; +;; INPUT: x9: address of reverse pinvoke frame +;; +;; PRESERVES: x0-x8 -- need to preserve these because the caller assumes they aren't trashed +;; +;; TRASHES: none +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + NESTED_ENTRY RhpReversePInvokeAttachOrTrapThread + + ;; FP and LR registers + PROLOG_SAVE_REG_PAIR fp, lr, #-0xA0! ;; Push down stack pointer and store FP and LR + + ;; Need to save argument registers x0-x7 and the return buffer register x8 (twice for 16B alignment) + stp x0, x1, [sp, #0x10] + stp x2, x3, [sp, #0x20] + stp x4, x5, [sp, #0x30] + stp x6, x7, [sp, #0x40] + stp x8, x8, [sp, #0x50] + + ;; Save float argument registers as well since they're volatile + stp d0, d1, [sp, #0x60] + stp d2, d3, [sp, #0x70] + stp d4, d5, [sp, #0x80] + stp d6, d7, [sp, #0x90] + + mov x0, x9 ; passing reverse pinvoke frame pointer in x0 + bl RhpReversePInvokeAttachOrTrapThread2 + + ;; Restore floating point registers + ldp d0, d1, [sp, #0x60] + ldp d2, d3, [sp, #0x70] + ldp d4, d5, [sp, #0x80] + ldp d6, d7, [sp, #0x90] + + ;; Restore the argument registers + ldp x0, x1, [sp, #0x10] + ldp x2, x3, [sp, #0x20] + ldp x4, x5, [sp, #0x30] + ldp x6, x7, [sp, #0x40] + ldr x8, [sp, #0x50] + + ;; Restore FP and LR registers, and free the allocated stack block + EPILOG_RESTORE_REG_PAIR fp, lr, #0xA0! + EPILOG_RETURN + + NESTED_END RhpReversePInvokeTrapThread + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; RhpReversePInvokeReturn +;; +;; IN: x9: address of reverse pinvoke frame +;; 0: save slot for previous M->U transition frame +;; 8: save slot for thread pointer to avoid re-calc in epilog sequence +;; +;; TRASHES: x10, x11 +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + LEAF_ENTRY RhpReversePInvokeReturn + + ldp x10, x11, [x9] + + ;; x10: previous M->U transition frame + ;; x11: thread pointer + + str x10, [x11, #OFFSETOF__Thread__m_pTransitionFrame] + dmb ish + + ldr x10, =RhpTrapThreads + ldr w10, [x10] + tbnz x10, #TrapThreadsFlags_TrapThreads_Bit, RareTrapThread + + ret + +RareTrapThread + b RhpWaitForSuspend + + LEAF_END RhpReversePInvokeReturn + + INLINE_GETTHREAD_CONSTANT_POOL + + end diff --git a/src/coreclr/src/nativeaot/Runtime/arm64/StubDispatch.S b/src/coreclr/src/nativeaot/Runtime/arm64/StubDispatch.S new file mode 100644 index 0000000000000..01ed602a761cf --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/arm64/StubDispatch.S @@ -0,0 +1,6 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include + +// TODO: Implement Arm64 support diff --git a/src/coreclr/src/nativeaot/Runtime/arm64/StubDispatch.asm b/src/coreclr/src/nativeaot/Runtime/arm64/StubDispatch.asm new file mode 100644 index 0000000000000..956bcdb4d013f --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/arm64/StubDispatch.asm @@ -0,0 +1,116 @@ +;; Licensed to the .NET Foundation under one or more agreements. +;; The .NET Foundation licenses this file to you under the MIT license. + +#include "AsmMacros.h" + + TEXTAREA + +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH + + EXTERN RhpCidResolve + EXTERN RhpUniversalTransition_DebugStepTailCall + + ;; Macro that generates code to check a single cache entry. + MACRO + CHECK_CACHE_ENTRY $entry + ;; Check a single entry in the cache. + ;; x9 : Cache data structure. Also used for target address jump. + ;; x10 : Instance EEType* + ;; x11 : Trashed + ldr x11, [x9, #(OFFSETOF__InterfaceDispatchCache__m_rgEntries + ($entry * 16))] + cmp x10, x11 + bne %ft0 ;; Jump to label '0' + ldr x9, [x9, #(OFFSETOF__InterfaceDispatchCache__m_rgEntries + ($entry * 16) + 8)] + br x9 +0 ;; Label '0' + MEND + + +;; +;; Macro that generates a stub consuming a cache with the given number of entries. +;; + GBLS StubName + + MACRO + DEFINE_INTERFACE_DISPATCH_STUB $entries + +StubName SETS "RhpInterfaceDispatch$entries" + + NESTED_ENTRY $StubName + + ;; xip1 currently holds the indirection cell address. We need to get the cache structure instead. + ldr x9, [xip1, #OFFSETOF__InterfaceDispatchCell__m_pCache] + + ;; Load the EEType from the object instance in x0. + ldr x10, [x0] + + GBLA CurrentEntry +CurrentEntry SETA 0 + + WHILE CurrentEntry < $entries + CHECK_CACHE_ENTRY CurrentEntry +CurrentEntry SETA CurrentEntry + 1 + WEND + + ;; xip1 still contains the indirection cell address. + b RhpInterfaceDispatchSlow + + NESTED_END $StubName + + MEND + +;; +;; Define all the stub routines we currently need. +;; + DEFINE_INTERFACE_DISPATCH_STUB 1 + DEFINE_INTERFACE_DISPATCH_STUB 2 + DEFINE_INTERFACE_DISPATCH_STUB 4 + DEFINE_INTERFACE_DISPATCH_STUB 8 + DEFINE_INTERFACE_DISPATCH_STUB 16 + DEFINE_INTERFACE_DISPATCH_STUB 32 + DEFINE_INTERFACE_DISPATCH_STUB 64 + + +;; +;; Initial dispatch on an interface when we don't have a cache yet. +;; + LEAF_ENTRY RhpInitialInterfaceDispatch + ;; Just tail call to the cache miss helper. + b RhpInterfaceDispatchSlow + LEAF_END RhpInitialInterfaceDispatch + +;; +;; Stub dispatch routine for dispatch to a vtable slot +;; + LEAF_ENTRY RhpVTableOffsetDispatch + ;; xip1 has the interface dispatch cell address in it. + ;; load x12 to point to the vtable offset (which is stored in the m_pCache field). + ldr x12, [xip1, #OFFSETOF__InterfaceDispatchCell__m_pCache] + + ;; Load the EEType from the object instance in x0, and add it to the vtable offset + ;; to get the address in the vtable of what we want to dereference + ldr x13, [x0] + add x12, x12, x13 + + ;; Load the target address of the vtable into x12 + ldr x12, [x12] + + br x12 + LEAF_END RhpVTableOffsetDispatch + +;; +;; Cache miss case, call the runtime to resolve the target and update the cache. +;; + LEAF_ENTRY RhpInterfaceDispatchSlow + ALTERNATE_ENTRY RhpInitialDynamicInterfaceDispatch + ;; xip1 has the interface dispatch cell address in it. + ;; Calling convention of the universal thunk is: + ;; xip0: contains target address for the thunk to call + ;; xip1: contains parameter of the thunk's target + ldr xip0, =RhpCidResolve + b RhpUniversalTransition_DebugStepTailCall + LEAF_END RhpInterfaceDispatchSlow + +#endif // FEATURE_CACHED_INTERFACE_DISPATCH + + END diff --git a/src/coreclr/src/nativeaot/Runtime/arm64/ThunkPoolThunks.asm b/src/coreclr/src/nativeaot/Runtime/arm64/ThunkPoolThunks.asm new file mode 100644 index 0000000000000..5306cf92ee27b --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/arm64/ThunkPoolThunks.asm @@ -0,0 +1,334 @@ +;; Licensed to the .NET Foundation under one or more agreements. +;; The .NET Foundation licenses this file to you under the MIT license. + +#include "ksarm64.h" + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; STUBS & DATA SECTIONS ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +THUNK_CODESIZE equ 0x10 ;; 3 instructions, 4 bytes each (and we also have 4 bytes of padding) +THUNK_DATASIZE equ 0x10 ;; 2 qwords + +THUNK_POOL_NUM_THUNKS_PER_PAGE equ 0xFA ;; 250 thunks per page + +POINTER_SIZE equ 0x08 + + MACRO + NAMED_READONLY_DATA_SECTION $name, $areaAlias + AREA $areaAlias,DATA,READONLY +RO$name % 8 + MEND + + ;; This macro is used to declare the thunks data blocks. Unlike the macro above (which is just used for padding), + ;; this macro needs to assign labels to each data block, so we can address them using PC-relative addresses. + MACRO + NAMED_READWRITE_DATA_SECTION $name, $areaAlias, $pageIndex + AREA $areaAlias,DATA + THUNKS_DATA_PAGE_BLOCK $pageIndex + MEND + + MACRO + LOAD_DATA_ADDRESS $groupIndex, $index, $pageIndex + + ;; Set xip0 to the address of the current thunk's data block. This is done using labels. + adr xip0, label_$groupIndex_$index_P$pageIndex + MEND + + MACRO + JUMP_TO_COMMON $groupIndex, $index + ;; start : xip0 points to the current thunks first data cell in the data page + ;; set xip0 to begining of data page : xip0 <- xip0 - (THUNK_DATASIZE * current thunk's index) + ;; fix offset to point to last QWROD in page : xip1 <- [xip0 + PAGE_SIZE - POINTER_SIZE] + ;; tailcall to the location pointed at by the last qword in the data page + ldr xip1, [xip0, #(PAGE_SIZE - POINTER_SIZE - ($groupIndex * THUNK_DATASIZE * 10 + THUNK_DATASIZE * $index))] + br xip1 + + brk 0xf000 ;; Stubs need to be 16-byte aligned for CFG table. Filling padding with a + ;; deterministic brk instruction, instead of having it just filled with zeros. + MEND + + MACRO + THUNK_LABELED_DATA_BLOCK $groupIndex, $index, $pageIndex + + ;; Each data block contains 2 qword cells. The data block is also labeled so it can be addressed + ;; using PC relative instructions +label_$groupIndex_$index_P$pageIndex + DCQ 0 + DCQ 0 + MEND + + MACRO + TenThunks $groupIndex, $pageIndex + + ;; Each thunk will load the address of its corresponding data (from the page that immediately follows) + ;; and call a common stub. The address of the common stub is setup by the caller (last qword + ;; in the thunks data section) depending on the 'kind' of thunks needed (interop, fat function pointers, etc...) + + ;; Each data block used by a thunk consists of two qword values: + ;; - Context: some value given to the thunk as context. Example for fat-fptrs: context = generic dictionary + ;; - Target : target code that the thunk eventually jumps to. + + LOAD_DATA_ADDRESS $groupIndex,0,$pageIndex + JUMP_TO_COMMON $groupIndex,0 + + LOAD_DATA_ADDRESS $groupIndex,1,$pageIndex + JUMP_TO_COMMON $groupIndex,1 + + LOAD_DATA_ADDRESS $groupIndex,2,$pageIndex + JUMP_TO_COMMON $groupIndex,2 + + LOAD_DATA_ADDRESS $groupIndex,3,$pageIndex + JUMP_TO_COMMON $groupIndex,3 + + LOAD_DATA_ADDRESS $groupIndex,4,$pageIndex + JUMP_TO_COMMON $groupIndex,4 + + LOAD_DATA_ADDRESS $groupIndex,5,$pageIndex + JUMP_TO_COMMON $groupIndex,5 + + LOAD_DATA_ADDRESS $groupIndex,6,$pageIndex + JUMP_TO_COMMON $groupIndex,6 + + LOAD_DATA_ADDRESS $groupIndex,7,$pageIndex + JUMP_TO_COMMON $groupIndex,7 + + LOAD_DATA_ADDRESS $groupIndex,8,$pageIndex + JUMP_TO_COMMON $groupIndex,8 + + LOAD_DATA_ADDRESS $groupIndex,9,$pageIndex + JUMP_TO_COMMON $groupIndex,9 + MEND + + MACRO + TenThunkDataBlocks $groupIndex, $pageIndex + + ;; Similar to the thunks stubs block, we declare the thunks data blocks here + + THUNK_LABELED_DATA_BLOCK $groupIndex, 0, $pageIndex + THUNK_LABELED_DATA_BLOCK $groupIndex, 1, $pageIndex + THUNK_LABELED_DATA_BLOCK $groupIndex, 2, $pageIndex + THUNK_LABELED_DATA_BLOCK $groupIndex, 3, $pageIndex + THUNK_LABELED_DATA_BLOCK $groupIndex, 4, $pageIndex + THUNK_LABELED_DATA_BLOCK $groupIndex, 5, $pageIndex + THUNK_LABELED_DATA_BLOCK $groupIndex, 6, $pageIndex + THUNK_LABELED_DATA_BLOCK $groupIndex, 7, $pageIndex + THUNK_LABELED_DATA_BLOCK $groupIndex, 8, $pageIndex + THUNK_LABELED_DATA_BLOCK $groupIndex, 9, $pageIndex + MEND + + MACRO + THUNKS_PAGE_BLOCK $pageIndex + + TenThunks 0, $pageIndex + TenThunks 1, $pageIndex + TenThunks 2, $pageIndex + TenThunks 3, $pageIndex + TenThunks 4, $pageIndex + TenThunks 5, $pageIndex + TenThunks 6, $pageIndex + TenThunks 7, $pageIndex + TenThunks 8, $pageIndex + TenThunks 9, $pageIndex + TenThunks 10, $pageIndex + TenThunks 11, $pageIndex + TenThunks 12, $pageIndex + TenThunks 13, $pageIndex + TenThunks 14, $pageIndex + TenThunks 15, $pageIndex + TenThunks 16, $pageIndex + TenThunks 17, $pageIndex + TenThunks 18, $pageIndex + TenThunks 19, $pageIndex + TenThunks 20, $pageIndex + TenThunks 21, $pageIndex + TenThunks 22, $pageIndex + TenThunks 23, $pageIndex + TenThunks 24, $pageIndex + MEND + + MACRO + THUNKS_DATA_PAGE_BLOCK $pageIndex + + TenThunkDataBlocks 0, $pageIndex + TenThunkDataBlocks 1, $pageIndex + TenThunkDataBlocks 2, $pageIndex + TenThunkDataBlocks 3, $pageIndex + TenThunkDataBlocks 4, $pageIndex + TenThunkDataBlocks 5, $pageIndex + TenThunkDataBlocks 6, $pageIndex + TenThunkDataBlocks 7, $pageIndex + TenThunkDataBlocks 8, $pageIndex + TenThunkDataBlocks 9, $pageIndex + TenThunkDataBlocks 10, $pageIndex + TenThunkDataBlocks 11, $pageIndex + TenThunkDataBlocks 12, $pageIndex + TenThunkDataBlocks 13, $pageIndex + TenThunkDataBlocks 14, $pageIndex + TenThunkDataBlocks 15, $pageIndex + TenThunkDataBlocks 16, $pageIndex + TenThunkDataBlocks 17, $pageIndex + TenThunkDataBlocks 18, $pageIndex + TenThunkDataBlocks 19, $pageIndex + TenThunkDataBlocks 20, $pageIndex + TenThunkDataBlocks 21, $pageIndex + TenThunkDataBlocks 22, $pageIndex + TenThunkDataBlocks 23, $pageIndex + TenThunkDataBlocks 24, $pageIndex + MEND + + + ;; + ;; The first thunks section should be 64K aligned because it can get + ;; mapped multiple times in memory, and mapping works on allocation + ;; granularity boundaries (we don't want to map more than what we need) + ;; + ;; The easiest way to do so is by having the thunks section at the + ;; first 64K aligned virtual address in the binary. We provide a section + ;; layout file to the linker to tell it how to layout the thunks sections + ;; that we care about. (ndp\rh\src\runtime\DLLs\app\mrt100_app_sectionlayout.txt) + ;; + ;; The PE spec says images cannot have gaps between sections (other + ;; than what is required by the section alignment value in the header), + ;; therefore we need a couple of padding data sections (otherwise the + ;; OS will not load the image). + ;; + + NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment0, "|.pad0|" + NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment1, "|.pad1|" + NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment2, "|.pad2|" + NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment3, "|.pad3|" + NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment4, "|.pad4|" + NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment5, "|.pad5|" + NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment6, "|.pad6|" + NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment7, "|.pad7|" + NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment8, "|.pad8|" + NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment9, "|.pad9|" + NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment10, "|.pad10|" + NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment11, "|.pad11|" + NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment12, "|.pad12|" + NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment13, "|.pad13|" + NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment14, "|.pad14|" + + ;; + ;; Declaring all the data section first since they have labels referenced by the stubs sections, to prevent + ;; compilation errors ("undefined symbols"). The stubs/data sections will be correctly laid out in the image + ;; using using the explicit layout configurations (ndp\rh\src\runtime\DLLs\mrt100_sectionlayout.txt) + ;; + NAMED_READWRITE_DATA_SECTION ThunkData0, "|.tkd0|", 0 + NAMED_READWRITE_DATA_SECTION ThunkData1, "|.tkd1|", 1 + NAMED_READWRITE_DATA_SECTION ThunkData2, "|.tkd2|", 2 + NAMED_READWRITE_DATA_SECTION ThunkData3, "|.tkd3|", 3 + NAMED_READWRITE_DATA_SECTION ThunkData4, "|.tkd4|", 4 + NAMED_READWRITE_DATA_SECTION ThunkData5, "|.tkd5|", 5 + NAMED_READWRITE_DATA_SECTION ThunkData6, "|.tkd6|", 6 + NAMED_READWRITE_DATA_SECTION ThunkData7, "|.tkd7|", 7 + + ;; + ;; Thunk Stubs + ;; NOTE: Keep number of blocks in sync with macro/constant named 'NUM_THUNK_BLOCKS' in: + ;; - ndp\FxCore\src\System.Private.CoreLib\System\Runtime\InteropServices\ThunkPool.cs + ;; - ndp\rh\src\tools\rhbind\zapimage.h + ;; + + LEAF_ENTRY ThunkPool, "|.tks0|" + THUNKS_PAGE_BLOCK 0 + LEAF_END ThunkPool + + LEAF_ENTRY ThunkPool1, "|.tks1|" + THUNKS_PAGE_BLOCK 1 + LEAF_END ThunkPool1 + + LEAF_ENTRY ThunkPool2, "|.tks2|" + THUNKS_PAGE_BLOCK 2 + LEAF_END ThunkPool2 + + LEAF_ENTRY ThunkPool3, "|.tks3|" + THUNKS_PAGE_BLOCK 3 + LEAF_END ThunkPool3 + + LEAF_ENTRY ThunkPool4, "|.tks4|" + THUNKS_PAGE_BLOCK 4 + LEAF_END ThunkPool4 + + LEAF_ENTRY ThunkPool5, "|.tks5|" + THUNKS_PAGE_BLOCK 5 + LEAF_END ThunkPool5 + + LEAF_ENTRY ThunkPool6, "|.tks6|" + THUNKS_PAGE_BLOCK 6 + LEAF_END ThunkPool6 + + LEAF_ENTRY ThunkPool7, "|.tks7|" + THUNKS_PAGE_BLOCK 7 + LEAF_END ThunkPool7 + + + ;; + ;; IntPtr RhpGetThunksBase() + ;; + ;; ARM64TODO: There is a bug in the arm64 assembler which ends up with mis-sorted Pdata entries + ;; for the functions in this file. As a work around, don't generate pdata for these small stubs. + ;; All the "No_PDATA" variants need to be removed after MASM bug 516396 is fixed. + LEAF_ENTRY_NO_PDATA RhpGetThunksBase + ;; Return the address of the first thunk pool to the caller (this is really the base address) + ldr x0, =ThunkPool + ret + LEAF_END_NO_PDATA RhpGetThunksBase + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; General Helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ;; + ;; int RhpGetNumThunksPerBlock() + ;; + LEAF_ENTRY_NO_PDATA RhpGetNumThunksPerBlock + mov x0, THUNK_POOL_NUM_THUNKS_PER_PAGE + ret + LEAF_END_NO_PDATA RhpGetNumThunksPerBlock + + ;; + ;; int RhpGetThunkSize() + ;; + LEAF_ENTRY_NO_PDATA RhpGetThunkSize + mov x0, THUNK_CODESIZE + ret + LEAF_END_NO_PDATA RhpGetThunkSize + + ;; + ;; int RhpGetNumThunkBlocksPerMapping() + ;; + LEAF_ENTRY_NO_PDATA RhpGetNumThunkBlocksPerMapping + mov x0, 8 + ret + LEAF_END_NO_PDATA RhpGetNumThunkBlocksPerMapping + + ;; + ;; int RhpGetThunkBlockSize + ;; + LEAF_ENTRY_NO_PDATA RhpGetThunkBlockSize + mov x0, PAGE_SIZE * 2 + ret + LEAF_END_NO_PDATA RhpGetThunkBlockSize + + ;; + ;; IntPtr RhpGetThunkDataBlockAddress(IntPtr thunkStubAddress) + ;; + LEAF_ENTRY_NO_PDATA RhpGetThunkDataBlockAddress + mov x12, PAGE_SIZE - 1 + bic x0, x0, x12 + mov x12, PAGE_SIZE + add x0, x0, x12 + ret + LEAF_END_NO_PDATA RhpGetThunkDataBlockAddress + + ;; + ;; IntPtr RhpGetThunkStubsBlockAddress(IntPtr thunkDataAddress) + ;; + LEAF_ENTRY_NO_PDATA RhpGetThunkStubsBlockAddress + mov x12, PAGE_SIZE - 1 + bic x0, x0, x12 + mov x12, PAGE_SIZE + sub x0, x0, x12 + ret + LEAF_END_NO_PDATA RhpGetThunkStubsBlockAddress + + END diff --git a/src/coreclr/src/nativeaot/Runtime/arm64/UniversalTransition.S b/src/coreclr/src/nativeaot/Runtime/arm64/UniversalTransition.S new file mode 100644 index 0000000000000..876f2dfbcb80d --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/arm64/UniversalTransition.S @@ -0,0 +1,4 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// TODO: Implement diff --git a/src/coreclr/src/nativeaot/Runtime/arm64/UniversalTransition.asm b/src/coreclr/src/nativeaot/Runtime/arm64/UniversalTransition.asm new file mode 100644 index 0000000000000..f3df0ccf7ce84 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/arm64/UniversalTransition.asm @@ -0,0 +1,161 @@ +;; Licensed to the .NET Foundation under one or more agreements. +;; The .NET Foundation licenses this file to you under the MIT license. + +#include "AsmMacros.h" + +#ifdef _DEBUG +#define TRASH_SAVED_ARGUMENT_REGISTERS +#endif + +#ifdef TRASH_SAVED_ARGUMENT_REGISTERS + EXTERN RhpIntegerTrashValues + EXTERN RhpFpTrashValues +#endif ;; TRASH_SAVED_ARGUMENT_REGISTERS + +;; Padding to account for the odd number of saved integer registers +#define ALIGNMENT_PADDING_SIZE (8) + +#define COUNT_ARG_REGISTERS (9) +#define INTEGER_REGISTER_SIZE (8) +#define ARGUMENT_REGISTERS_SIZE (COUNT_ARG_REGISTERS * INTEGER_REGISTER_SIZE) + +;; Largest return block is 4 doubles +#define RETURN_BLOCK_SIZE (32) + +#define COUNT_FLOAT_ARG_REGISTERS (8) +#define FLOAT_REGISTER_SIZE (8) +#define FLOAT_ARG_REGISTERS_SIZE (COUNT_FLOAT_ARG_REGISTERS * FLOAT_REGISTER_SIZE) + +#define PUSHED_LR_SIZE (8) +#define PUSHED_FP_SIZE (8) + +;; +;; From CallerSP to ChildSP, the stack frame is composed of the following adjacent regions: +;; +;; ALIGNMENT_PADDING_SIZE +;; ARGUMENT_REGISTERS_SIZE +;; RETURN_BLOCK_SIZE +;; FLOAT_ARG_REGISTERS_SIZE +;; PUSHED_LR_SIZE +;; PUSHED_FP_SIZE +;; + +#define DISTANCE_FROM_CHILDSP_TO_RETURN_BLOCK (PUSHED_FP_SIZE + PUSHED_LR_SIZE + FLOAT_ARG_REGISTERS_SIZE) + +#define STACK_SIZE (ALIGNMENT_PADDING_SIZE + ARGUMENT_REGISTERS_SIZE + RETURN_BLOCK_SIZE + FLOAT_ARG_REGISTERS_SIZE + \ + PUSHED_LR_SIZE + PUSHED_FP_SIZE) + +#define FLOAT_ARG_OFFSET (PUSHED_FP_SIZE + PUSHED_LR_SIZE) +#define ARGUMENT_REGISTERS_OFFSET (FLOAT_ARG_OFFSET + FLOAT_ARG_REGISTERS_SIZE + RETURN_BLOCK_SIZE) + +;; +;; RhpUniversalTransition +;; +;; At input to this function, x0-8, d0-7 and the stack may contain any number of arguments. +;; +;; In addition, there are 2 extra arguments passed in the intra-procedure-call scratch register: +;; xip0 will contain the managed function that is to be called by this transition function +;; xip1 will contain the pointer sized extra argument to the managed function +;; +;; When invoking the callee: +;; +;; x0 shall contain a pointer to the TransitionBlock +;; x1 shall contain the value that was in xip1 at entry to this function +;; +;; Frame layout is: +;; +;; {StackPassedArgs} ChildSP+0C0 CallerSP+000 +;; {AlignmentPad (0x8 bytes)} ChildSP+0B8 CallerSP-008 +;; {IntArgRegs (x0-x8) (0x48 bytes)} ChildSP+070 CallerSP-050 +;; {ReturnBlock (0x20 bytes)} ChildSP+050 CallerSP-070 +;; -- The base address of the Return block is the TransitionBlock pointer, the floating point args are +;; in the neg space of the TransitionBlock pointer. Note that the callee has knowledge of the exact +;; layout of all pieces of the frame that lie at or above the pushed floating point registers. +;; {FpArgRegs (d0-d7) (0x40 bytes)} ChildSP+010 CallerSP-0B0 +;; {PushedLR} ChildSP+008 CallerSP-0B8 +;; {PushedFP} ChildSP+000 CallerSP-0C0 +;; +;; NOTE: If the frame layout ever changes, the C++ UniversalTransitionStackFrame structure +;; must be updated as well. +;; +;; NOTE: The callee receives a pointer to the base of the ReturnBlock, and the callee has +;; knowledge of the exact layout of all pieces of the frame that lie at or above the pushed +;; FpArgRegs. +;; +;; NOTE: The stack walker guarantees that conservative GC reporting will be applied to +;; everything between the base of the ReturnBlock and the top of the StackPassedArgs. +;; + + TEXTAREA + + MACRO + UNIVERSAL_TRANSITION $FunctionName + + NESTED_ENTRY Rhp$FunctionName + + ;; FP and LR registers + PROLOG_SAVE_REG_PAIR fp, lr, #-STACK_SIZE! ;; Push down stack pointer and store FP and LR + + ;; Floating point registers + stp d0, d1, [sp, #(FLOAT_ARG_OFFSET )] + stp d2, d3, [sp, #(FLOAT_ARG_OFFSET + 0x10)] + stp d4, d5, [sp, #(FLOAT_ARG_OFFSET + 0x20)] + stp d6, d7, [sp, #(FLOAT_ARG_OFFSET + 0x30)] + + ;; Space for return buffer data (0x40 bytes) + + ;; Save argument registers + stp x0, x1, [sp, #(ARGUMENT_REGISTERS_OFFSET )] + stp x2, x3, [sp, #(ARGUMENT_REGISTERS_OFFSET + 0x10)] + stp x4, x5, [sp, #(ARGUMENT_REGISTERS_OFFSET + 0x20)] + stp x6, x7, [sp, #(ARGUMENT_REGISTERS_OFFSET + 0x30)] + stp x8, xzr, [sp, #(ARGUMENT_REGISTERS_OFFSET + 0x40)] + +#ifdef TRASH_SAVED_ARGUMENT_REGISTERS + ;; ARM64TODO +#endif // TRASH_SAVED_ARGUMENT_REGISTERS + + add x0, sp, #DISTANCE_FROM_CHILDSP_TO_RETURN_BLOCK ;; First parameter to target function is a pointer to the return block + mov x8, x0 ;; Arm64 calling convention: Address of return block shall be passed in x8 + mov x1, xip1 ;; Second parameter to target function + blr xip0 + + ;; We cannot make the label public as that tricks DIA stackwalker into thinking + ;; it's the beginning of a method. For this reason we export an auxiliary variable + ;; holding the address instead. + EXPORT_POINTER_TO_ADDRESS PointerToReturnFrom$FunctionName + + ;; Move the result (the target address) to x12 so it doesn't get overridden when we restore the + ;; argument registers. + mov x12, x0 + + ;; Restore floating point registers + ldp d0, d1, [sp, #(FLOAT_ARG_OFFSET )] + ldp d2, d3, [sp, #(FLOAT_ARG_OFFSET + 0x10)] + ldp d4, d5, [sp, #(FLOAT_ARG_OFFSET + 0x20)] + ldp d6, d7, [sp, #(FLOAT_ARG_OFFSET + 0x30)] + + ;; Restore the argument registers + ldp x0, x1, [sp, #(ARGUMENT_REGISTERS_OFFSET )] + ldp x2, x3, [sp, #(ARGUMENT_REGISTERS_OFFSET + 0x10)] + ldp x4, x5, [sp, #(ARGUMENT_REGISTERS_OFFSET + 0x20)] + ldp x6, x7, [sp, #(ARGUMENT_REGISTERS_OFFSET + 0x30)] + ldr x8, [sp, #(ARGUMENT_REGISTERS_OFFSET + 0x40)] + + ;; Restore FP and LR registers, and free the allocated stack block + EPILOG_RESTORE_REG_PAIR fp, lr, #STACK_SIZE! + + ;; Tailcall to the target address. + EPILOG_NOP br x12 + + NESTED_END Rhp$FunctionName + + MEND + + ; To enable proper step-in behavior in the debugger, we need to have two instances + ; of the thunk. For the first one, the debugger steps into the call in the function, + ; for the other, it steps over it. + UNIVERSAL_TRANSITION UniversalTransition + UNIVERSAL_TRANSITION UniversalTransition_DebugStepTailCall + + END diff --git a/src/coreclr/src/nativeaot/Runtime/arm64/WriteBarriers.S b/src/coreclr/src/nativeaot/Runtime/arm64/WriteBarriers.S new file mode 100644 index 0000000000000..a14d99d7ef481 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/arm64/WriteBarriers.S @@ -0,0 +1,33 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// TODO: Implement Unix write barriers +#include + +LEAF_ENTRY RhpAssignRef, _TEXT + str x1, [x0] + ret +LEAF_END RhpAssignRef, _TEXT + +LEAF_ENTRY RhpCheckedAssignRef, _TEXT + str x1, [x0] + ret +LEAF_END RhpCheckedAssignRef, _TEXT + +// +// RhpByRefAssignRef simulates movs instruction for object references. +// +// On entry: +// x0: address of ref-field (assigned to) +// x1: address of the data (source) +// x3: be trashed +// +// On exit: +// x0, x1 are incremented by 8, +// x3: trashed +// +LEAF_ENTRY RhpByRefAssignRef, _TEXT + ldr x3, [x1], #8 + str x3, [x0], #8 + ret +LEAF_END RhpByRefAssignRef, _TEXT diff --git a/src/coreclr/src/nativeaot/Runtime/arm64/WriteBarriers.asm b/src/coreclr/src/nativeaot/Runtime/arm64/WriteBarriers.asm new file mode 100644 index 0000000000000..204c79d00c4a3 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/arm64/WriteBarriers.asm @@ -0,0 +1,318 @@ +;; Licensed to the .NET Foundation under one or more agreements. +;; The .NET Foundation licenses this file to you under the MIT license. + +;; +;; Define the helpers used to implement the write barrier required when writing an object reference into a +;; location residing on the GC heap. Such write barriers allow the GC to optimize which objects in +;; non-ephemeral generations need to be scanned for references to ephemeral objects during an ephemeral +;; collection. +;; + +#include "AsmMacros.h" + + TEXTAREA + +;; Macro used to copy contents of newly updated GC heap locations to a shadow copy of the heap. This is used +;; during garbage collections to verify that object references where never written to the heap without using a +;; write barrier. Note that we're potentially racing to update the shadow heap while other threads are writing +;; new references to the real heap. Since this can't be solved perfectly without critical sections around the +;; entire update process, we instead update the shadow location and then re-check the real location (as two +;; ordered operations) and if there is a disparity we'll re-write the shadow location with a special value +;; (INVALIDGCVALUE) which disables the check for that location. Since the shadow heap is only validated at GC +;; time and these write barrier operations are atomic wrt to GCs this is sufficient to guarantee that the +;; shadow heap contains only valid copies of real heap values or INVALIDGCVALUE. +#ifdef WRITE_BARRIER_CHECK + + SETALIAS g_GCShadow, ?g_GCShadow@@3PEAEEA + SETALIAS g_GCShadowEnd, ?g_GCShadowEnd@@3PEAEEA + EXTERN $g_GCShadow + EXTERN $g_GCShadowEnd + +INVALIDGCVALUE EQU 0xCCCCCCCD + + MACRO + ;; On entry: + ;; $destReg: location to be updated + ;; $refReg: objectref to be stored + ;; + ;; On exit: + ;; x9,x10: trashed + ;; other registers are preserved + ;; + UPDATE_GC_SHADOW $destReg, $refReg + + ;; If g_GCShadow is 0, don't perform the check. + adrp x9, $g_GCShadow + ldr x9, [x9, $g_GCShadow] + cbz x9, %ft1 + + ;; Save $destReg since we're about to modify it (and we need the original value both within the macro and + ;; once we exit the macro). + mov x10, $destReg + + ;; Transform $destReg into the equivalent address in the shadow heap. + adrp x9, g_lowest_address + ldr x9, [x9, g_lowest_address] + subs $destReg, $destReg, x9 + blt %ft0 + + adrp x9, $g_GCShadow + ldr x9, [x9, $g_GCShadow] + add $destReg, $destReg, x9 + + adrp x9, $g_GCShadowEnd + ldr x9, [x9, $g_GCShadowEnd] + cmp $destReg, x9 + bgt %ft0 + + ;; Update the shadow heap. + str $refReg, [$destReg] + + ;; The following read must be strongly ordered wrt to the write we've just performed in order to + ;; prevent race conditions. + dmb ish + + ;; Now check that the real heap location still contains the value we just wrote into the shadow heap. + mov x9, x10 + ldr x9, [x9] + cmp x9, $refReg + beq %ft0 + + ;; Someone went and updated the real heap. We need to invalidate the shadow location since we can't + ;; guarantee whose shadow update won. + MOVL64 x9, INVALIDGCVALUE, 0 + str x9, [$destReg] + +0 + ;; Restore original $destReg value + mov $destReg, x10 + +1 + MEND + +#else // WRITE_BARRIER_CHECK + + MACRO + UPDATE_GC_SHADOW $destReg, $refReg + MEND + +#endif // WRITE_BARRIER_CHECK + +;; There are several different helpers used depending on which register holds the object reference. Since all +;; the helpers have identical structure we use a macro to define this structure. Two arguments are taken, the +;; name of the register that points to the location to be updated and the name of the register that holds the +;; object reference (this should be in upper case as it's used in the definition of the name of the helper). + +;; Define a sub-macro first that expands to the majority of the barrier implementation. This is used below for +;; some interlocked helpers that need an inline barrier. + MACRO + ;; On entry: + ;; $destReg: location to be updated + ;; $refReg: objectref to be stored + ;; + ;; On exit: + ;; $destReg: trashed + ;; x9: trashed + ;; + INSERT_UNCHECKED_WRITE_BARRIER_CORE $destReg, $refReg + + ;; Update the shadow copy of the heap with the same value just written to the same heap. (A no-op unless + ;; we're in a debug build and write barrier checking has been enabled). + UPDATE_GC_SHADOW $destReg, $refReg + + ;; We can skip the card table write if the reference is to + ;; an object not on the epehemeral segment. + adrp x9, g_ephemeral_low + ldr x9, [x9, g_ephemeral_low] + cmp $refReg, x9 + blt %ft0 + + adrp x9, g_ephemeral_high + ldr x9, [x9, g_ephemeral_high] + cmp $refReg, x9 + bge %ft0 + + ;; Set this object's card, if it hasn't already been set. + adrp x9, g_card_table + ldr x9, [x9, g_card_table] + add $destReg, x9, $destReg lsr #11 + + ;; Check that this card hasn't already been written. Avoiding useless writes is a big win on + ;; multi-proc systems since it avoids cache thrashing. + ldrb w9, [$destReg] + cmp x9, 0xFF + beq %ft0 + + mov x9, 0xFF + strb w9, [$destReg] + +0 + ;; Exit label + MEND + + MACRO + ;; On entry: + ;; $destReg: location to be updated + ;; $refReg: objectref to be stored + ;; + ;; On exit: + ;; $destReg: trashed + ;; x9: trashed + ;; + INSERT_CHECKED_WRITE_BARRIER_CORE $destReg, $refReg + + ;; The "check" of this checked write barrier - is $destReg + ;; within the heap? if no, early out. + adrp x9, g_lowest_address + ldr x9, [x9, g_lowest_address] + cmp $destReg, x9 + blt %ft0 + + adrp x9, g_highest_address + ldr x9, [x9, g_highest_address] + cmp $destReg, x9 + bgt %ft0 + + INSERT_UNCHECKED_WRITE_BARRIER_CORE $destReg, $refReg + +0 + ;; Exit label + MEND + +;; RhpCheckedAssignRef(Object** dst, Object* src) +;; +;; Write barrier for writes to objects that may reside +;; on the managed heap. +;; +;; On entry: +;; x0 : the destination address (LHS of the assignment). +;; May not be an object reference (hence the checked). +;; x1 : the object reference (RHS of the assignment). +;; On exit: +;; x1 : trashed +;; x9 : trashed + LEAF_ENTRY RhpCheckedAssignRef + ALTERNATE_ENTRY RhpCheckedAssignRefAVLocation + ALTERNATE_ENTRY RhpCheckedAssignRefX1 + ALTERNATE_ENTRY RhpCheckedAssignRefX1AVLocation + + stlr x1, [x0] + + INSERT_CHECKED_WRITE_BARRIER_CORE x0, x1 + + ret + + LEAF_END RhpCheckedAssignRef + +;; RhpAssignRef(Object** dst, Object* src) +;; +;; Write barrier for writes to objects that are known to +;; reside on the managed heap. +;; +;; On entry: +;; x0 : the destination address (LHS of the assignment). +;; x1 : the object reference (RHS of the assignment). +;; On exit: +;; x1 : trashed +;; x9 : trashed + LEAF_ENTRY RhpAssignRef + ALTERNATE_ENTRY RhpAssignRefAVLocation + ALTERNATE_ENTRY RhpAssignRefX1 + ALTERNATE_ENTRY RhpAssignRefX1AVLocation + + stlr x1, [x0] + + INSERT_UNCHECKED_WRITE_BARRIER_CORE x0, x1 + + ret + + LEAF_END RhpAssignRef + +;; Interlocked operation helpers where the location is an objectref, thus requiring a GC write barrier upon +;; successful updates. + +;; WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular: +;; - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at RhpCheckedLockCmpXchgAVLocation +;; - Function "UnwindWriteBarrierToCaller" assumes no registers where pushed and LR contains the return address + +;; RhpCheckedLockCmpXchg(Object** dest, Object* value, Object* comparand) +;; +;; Interlocked compare exchange on objectref. +;; +;; On entry: +;; x0: pointer to objectref +;; x1: exchange value +;; x2: comparand +;; +;; On exit: +;; x0: original value of objectref +;; x9: trashed +;; x10: trashed +;; + LEAF_ENTRY RhpCheckedLockCmpXchg + ALTERNATE_ENTRY RhpCheckedLockCmpXchgAVLocation + +CmpXchgRetry + ;; Check location value is what we expect. + ldaxr x10, [x0] + cmp x10, x2 + bne CmpXchgNoUpdate + + ;; Current value matches comparand, attempt to update with the new value. + stlxr w9, x1, [x0] + cbnz w9, CmpXchgRetry + + ;; We've successfully updated the value of the objectref so now we need a GC write barrier. + ;; The following barrier code takes the destination in x0 and the value in x1 so the arguments are + ;; already correctly set up. + + INSERT_CHECKED_WRITE_BARRIER_CORE x0, x1 + +CmpXchgNoUpdate + ;; x10 still contains the original value. + mov x0, x10 + ret lr + + LEAF_END RhpCheckedLockCmpXchg + +;; WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular: +;; - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen within at RhpCheckedXchgAVLocation +;; - Function "UnwindWriteBarrierToCaller" assumes no registers where pushed and LR contains the return address + +;; RhpCheckedXchg(Object** destination, Object* value) +;; +;; Interlocked exchange on objectref. +;; +;; On entry: +;; x0: pointer to objectref +;; x1: exchange value +;; +;; On exit: +;; x0: original value of objectref +;; x9: trashed +;; x10: trashed +;; + LEAF_ENTRY RhpCheckedXchg + ALTERNATE_ENTRY RhpCheckedXchgAVLocation + +ExchangeRetry + ;; Read the existing memory location. + ldaxr x10, [x0] + + ;; Attempt to update with the new value. + stlxr w9, x1, [x0] + cbnz w9, ExchangeRetry + + ;; We've successfully updated the value of the objectref so now we need a GC write barrier. + ;; The following barrier code takes the destination in x0 and the value in x1 so the arguments are + ;; already correctly set up. + + INSERT_CHECKED_WRITE_BARRIER_CORE x0, x1 + + ;; x10 still contains the original value. + mov x0, x10 + ret + + LEAF_END RhpCheckedXchg + + end diff --git a/src/coreclr/src/nativeaot/Runtime/eetype.cpp b/src/coreclr/src/nativeaot/Runtime/eetype.cpp new file mode 100644 index 0000000000000..d18de90b889f8 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/eetype.cpp @@ -0,0 +1,166 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +#include "common.h" +#include "CommonTypes.h" +#include "CommonMacros.h" +#include "daccess.h" +#include "rhassert.h" +#include "rhbinder.h" +#include "eetype.h" +#include "PalRedhawkCommon.h" +#include "PalRedhawk.h" + +#include "CommonMacros.inl" + +#pragma warning(disable:4127) // C4127: conditional expression is constant + +// Validate an EEType extracted from an object. +bool EEType::Validate(bool assertOnFail /* default: true */) +{ +#define REPORT_FAILURE() do { if (assertOnFail) { ASSERT_UNCONDITIONALLY("EEType::Validate check failed"); } return false; } while (false) + + // Deal with the most common case of a bad pointer without an exception. + if (this == NULL) + REPORT_FAILURE(); + + // EEType structures should be at least pointer aligned. + if (dac_cast(this) & (sizeof(TADDR)-1)) + REPORT_FAILURE(); + + // Verify object size is bigger than min_obj_size + size_t minObjSize = get_BaseSize(); + if (get_ComponentSize() != 0) + { + // If it is an array, we will align the size to the nearest pointer alignment, even if there are + // zero elements. Our strings take advantage of this. + minObjSize = (size_t)ALIGN_UP(minObjSize, sizeof(TADDR)); + } + if (minObjSize < (3 * sizeof(TADDR))) + REPORT_FAILURE(); + + switch (get_Kind()) + { + case CanonicalEEType: + { + // If the parent type is NULL this had better look like Object. + if (!IsInterface() && (m_RelatedType.m_pBaseType == NULL)) + { + if (IsRelatedTypeViaIAT() || + get_IsValueType() || + HasFinalizer() || + HasReferenceFields() || + HasGenericVariance()) + { + REPORT_FAILURE(); + } + } + break; + } + + case ClonedEEType: + { + // Cloned types must have a related type. + if (m_RelatedType.m_ppCanonicalTypeViaIAT == NULL) + REPORT_FAILURE(); + + // Either we're dealing with a clone of String or a generic type. We can tell the difference based + // on the component size. + switch (get_ComponentSize()) + { + case 0: + { + // Cloned generic type. + if (!IsRelatedTypeViaIAT()) + { + REPORT_FAILURE(); + } + break; + } + + case 2: + { + // Cloned string. + if (get_IsValueType() || + HasFinalizer() || + HasReferenceFields() || + HasGenericVariance()) + { + REPORT_FAILURE(); + } + + break; + } + + default: + // Apart from cloned strings we don't expected cloned types to have a component size. + REPORT_FAILURE(); + } + break; + } + + case ParameterizedEEType: + { + // The only parameter EETypes that can exist on the heap are arrays + + // Array types must have a related type. + if (m_RelatedType.m_pRelatedParameterType == NULL) + REPORT_FAILURE(); + + // Component size cannot be zero in this case. + if (get_ComponentSize() == 0) + REPORT_FAILURE(); + + if (get_IsValueType() || + HasFinalizer() || + HasGenericVariance()) + { + REPORT_FAILURE(); + } + + break; + } + + case GenericTypeDefEEType: + { + // We should never see uninstantiated generic type definitions here + // since we should never construct an object instance around them. + REPORT_FAILURE(); + } + + default: + // Should be unreachable. + REPORT_FAILURE(); + } + +#undef REPORT_FAILURE + + return true; +} + +//----------------------------------------------------------------------------------------------------------- +EEType::Kinds EEType::get_Kind() +{ + return (Kinds)(m_usFlags & (UInt16)EETypeKindMask); +} + +//----------------------------------------------------------------------------------------------------------- +EEType * EEType::get_CanonicalEEType() +{ + // cloned EETypes must always refer to types in other modules + ASSERT(IsCloned()); + if (IsRelatedTypeViaIAT()) + return *PTR_PTR_EEType(reinterpret_cast(m_RelatedType.m_ppCanonicalTypeViaIAT)); + else + return PTR_EEType(reinterpret_cast(m_RelatedType.m_pCanonicalType)); // in the R2R case, the link is direct rather than indirect via the IAT +} + +//----------------------------------------------------------------------------------------------------------- +EEType * EEType::get_RelatedParameterType() +{ + ASSERT(IsParameterizedType()); + + if (IsRelatedTypeViaIAT()) + return *PTR_PTR_EEType(reinterpret_cast(m_RelatedType.m_ppRelatedParameterTypeViaIAT)); + else + return PTR_EEType(reinterpret_cast(m_RelatedType.m_pRelatedParameterType)); +} diff --git a/src/coreclr/src/nativeaot/Runtime/event.cpp b/src/coreclr/src/nativeaot/Runtime/event.cpp new file mode 100644 index 0000000000000..73ad5095008d5 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/event.cpp @@ -0,0 +1,120 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +#include "common.h" +#include "CommonTypes.h" +#include "CommonMacros.h" +#include "daccess.h" +#include "event.h" +#include "PalRedhawkCommon.h" +#include "PalRedhawk.h" +#include "rhassert.h" +#include "slist.h" +#include "gcrhinterface.h" +#include "varint.h" +#include "regdisplay.h" +#include "StackFrameIterator.h" +#include "thread.h" +#include "holder.h" +#include "Crst.h" +#include "RWLock.h" +#include "threadstore.h" +#include "threadstore.inl" + +// +// ----------------------------------------------------------------------------------------------------------- +// +// CLR wrapper around events. This version directly uses Win32 events (there's no support for host +// interception). +// + +bool CLREventStatic::CreateManualEventNoThrow(bool bInitialState) +{ + m_hEvent = PalCreateEventW(NULL, TRUE, bInitialState, NULL); + m_fInitialized = true; + return IsValid(); +} + +bool CLREventStatic::CreateAutoEventNoThrow(bool bInitialState) +{ + m_hEvent = PalCreateEventW(NULL, FALSE, bInitialState, NULL); + m_fInitialized = true; + return IsValid(); +} + +bool CLREventStatic::CreateOSManualEventNoThrow(bool bInitialState) +{ + m_hEvent = PalCreateEventW(NULL, TRUE, bInitialState, NULL); + m_fInitialized = true; + return IsValid(); +} + +bool CLREventStatic::CreateOSAutoEventNoThrow(bool bInitialState) +{ + m_hEvent = PalCreateEventW(NULL, FALSE, bInitialState, NULL); + m_fInitialized = true; + return IsValid(); +} + +void CLREventStatic::CloseEvent() +{ + if (m_fInitialized && m_hEvent != INVALID_HANDLE_VALUE) + { + PalCloseHandle(m_hEvent); + m_hEvent = INVALID_HANDLE_VALUE; + } +} + +bool CLREventStatic::IsValid() const +{ + return m_fInitialized && m_hEvent != INVALID_HANDLE_VALUE; +} + +bool CLREventStatic::Set() +{ + if (!m_fInitialized) + return false; + return PalSetEvent(m_hEvent); +} + +bool CLREventStatic::Reset() +{ + if (!m_fInitialized) + return false; + return PalResetEvent(m_hEvent); +} + +uint32_t CLREventStatic::Wait(uint32_t dwMilliseconds, bool bAlertable, bool bAllowReentrantWait) +{ + UInt32 result = WAIT_FAILED; + + if (m_fInitialized) + { + bool disablePreemptive = false; + Thread * pCurThread = ThreadStore::GetCurrentThreadIfAvailable(); + + if (NULL != pCurThread) + { + if (pCurThread->IsCurrentThreadInCooperativeMode()) + { + pCurThread->EnablePreemptiveMode(); + disablePreemptive = true; + } + } + + result = PalCompatibleWaitAny(bAlertable, dwMilliseconds, 1, &m_hEvent, bAllowReentrantWait); + + if (disablePreemptive) + { + pCurThread->DisablePreemptiveMode(); + } + } + + return result; +} + +HANDLE CLREventStatic::GetOSEvent() +{ + if (!m_fInitialized) + return INVALID_HANDLE_VALUE; + return m_hEvent; +} diff --git a/src/coreclr/src/nativeaot/Runtime/event.h b/src/coreclr/src/nativeaot/Runtime/event.h new file mode 100644 index 0000000000000..b46b9e538207c --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/event.h @@ -0,0 +1,20 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +class CLREventStatic +{ +public: + bool CreateManualEventNoThrow(bool bInitialState); + bool CreateAutoEventNoThrow(bool bInitialState); + bool CreateOSManualEventNoThrow(bool bInitialState); + bool CreateOSAutoEventNoThrow(bool bInitialState); + void CloseEvent(); + bool IsValid() const; + bool Set(); + bool Reset(); + uint32_t Wait(uint32_t dwMilliseconds, bool bAlertable, bool bAllowReentrantWait = false); + HANDLE GetOSEvent(); + +private: + HANDLE m_hEvent; + bool m_fInitialized; +}; diff --git a/src/coreclr/src/nativeaot/Runtime/eventtrace.cpp b/src/coreclr/src/nativeaot/Runtime/eventtrace.cpp new file mode 100644 index 0000000000000..fb8a2053d034c --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/eventtrace.cpp @@ -0,0 +1,6503 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// +// File: eventtrace.cpp +// Abstract: This module implements Event Tracing support +// +// ============================================================================ + +#include "common.h" + +#ifdef FEATURE_REDHAWK +#include "gcenv.h" +#include "gcheaputilities.h" + +#include "daccess.h" + +#include "slist.h" +#include "varint.h" +#include "regdisplay.h" +#include "stackframeiterator.h" +#include "thread.h" +#include "rwlock.h" +#include "threadstore.h" +#include "threadstore.inl" +//#include "PalRedhawk.h" + +#define Win32EventWrite PalEventWrite +#else // !FEATURE_REDHAWK + +#include "eventtrace.h" +#include "winbase.h" +#include "contract.h" +#include "ex.h" +#include "dbginterface.h" +#define Win32EventWrite EventWrite + +// Flags used to store some runtime information for Event Tracing +BOOL g_fEEOtherStartup=FALSE; +BOOL g_fEEComActivatedStartup=FALSE; +LPCGUID g_fEEComObjectGuid=&GUID_NULL; + +BOOL g_fEEHostedStartup = FALSE; + +#endif // FEATURE_REDHAWK + +#include "eventtracepriv.h" + +#ifdef FEATURE_REDHAWK +volatile LONGLONG ETW::GCLog::s_l64LastClientSequenceNumber = 0; +#else // FEATURE_REDHAWK +Volatile ETW::GCLog::s_l64LastClientSequenceNumber = 0; +#endif // FEATURE_REDHAWK + +#ifndef FEATURE_REDHAWK + +//--------------------------------------------------------------------------------------- +// Helper macros to determine which version of the Method events to use +// +// The V2 versions of these events include the ReJITID, the V1 versions do not. +// Historically, when we version events, we'd just stop sending the old version and only +// send the new one. However, now that we have xperf in heavy use internally and soon to be +// used externally, we need to be a bit careful. In particular, we'd like to allow +// current xperf to continue working without knowledge of ReJITIDs, and allow future +// xperf to decode symbols in ReJITted functions. Thus, +// * During a first-JIT, only issue the existing V1 MethodLoad, etc. events (NOT v0, +// NOT v2). This event does not include a ReJITID, and can thus continue to be +// parsed by older decoders. +// * During a rejit, only issue the new V2 events (NOT v0 or v1), which will include a +// nonzero ReJITID. Thus, your unique key for a method extent would be MethodID + +// ReJITID + extent (hot/cold). These events will be ignored by older decoders +// (including current xperf) because of the version number, but xperf will be +// updated to decode these in the future. + +#define FireEtwMethodLoadVerbose_V1_or_V2(ullMethodIdentifier, ullModuleID, ullMethodStartAddress, ulMethodSize, ulMethodToken, ulMethodFlags, szDtraceOutput1, szDtraceOutput2, szDtraceOutput3, clrInstanceID, rejitID) \ +{ \ + if (rejitID == 0) \ + { FireEtwMethodLoadVerbose_V1(ullMethodIdentifier, ullModuleID, ullMethodStartAddress, ulMethodSize, ulMethodToken, ulMethodFlags, szDtraceOutput1, szDtraceOutput2, szDtraceOutput3, clrInstanceID); } \ + else \ + { FireEtwMethodLoadVerbose_V2(ullMethodIdentifier, ullModuleID, ullMethodStartAddress, ulMethodSize, ulMethodToken, ulMethodFlags, szDtraceOutput1, szDtraceOutput2, szDtraceOutput3, clrInstanceID, rejitID); } \ +} + +#define FireEtwMethodLoad_V1_or_V2(ullMethodIdentifier, ullModuleID, ullMethodStartAddress, ulMethodSize, ulMethodToken, ulMethodFlags, clrInstanceID, rejitID) \ +{ \ + if (rejitID == 0) \ + { FireEtwMethodLoad_V1(ullMethodIdentifier, ullModuleID, ullMethodStartAddress, ulMethodSize, ulMethodToken, ulMethodFlags, clrInstanceID); } \ + else \ + { FireEtwMethodLoad_V2(ullMethodIdentifier, ullModuleID, ullMethodStartAddress, ulMethodSize, ulMethodToken, ulMethodFlags, clrInstanceID, rejitID); } \ +} + +#define FireEtwMethodUnloadVerbose_V1_or_V2(ullMethodIdentifier, ullModuleID, ullColdMethodStartAddress, ulColdMethodSize, ulMethodToken, ulColdMethodFlags, szDtraceOutput1, szDtraceOutput2, szDtraceOutput3, clrInstanceID, rejitID) \ +{ \ + if (rejitID == 0) \ + { FireEtwMethodUnloadVerbose_V1(ullMethodIdentifier, ullModuleID, ullColdMethodStartAddress, ulColdMethodSize, ulMethodToken, ulColdMethodFlags, szDtraceOutput1, szDtraceOutput2, szDtraceOutput3, clrInstanceID); } \ + else \ + { FireEtwMethodUnloadVerbose_V2(ullMethodIdentifier, ullModuleID, ullColdMethodStartAddress, ulColdMethodSize, ulMethodToken, ulColdMethodFlags, szDtraceOutput1, szDtraceOutput2, szDtraceOutput3, clrInstanceID, rejitID); } \ +} + +#define FireEtwMethodUnload_V1_or_V2(ullMethodIdentifier, ullModuleID, ullColdMethodStartAddress, ulColdMethodSize, ulMethodToken, ulColdMethodFlags, clrInstanceID, rejitID) \ +{ \ + if (rejitID == 0) \ + { FireEtwMethodUnload_V1(ullMethodIdentifier, ullModuleID, ullColdMethodStartAddress, ulColdMethodSize, ulMethodToken, ulColdMethodFlags, clrInstanceID); } \ + else \ + { FireEtwMethodUnload_V2(ullMethodIdentifier, ullModuleID, ullColdMethodStartAddress, ulColdMethodSize, ulMethodToken, ulColdMethodFlags, clrInstanceID, rejitID); } \ +} + +#define FireEtwMethodDCStartVerbose_V1_or_V2(ullMethodIdentifier, ullModuleID, ullColdMethodStartAddress, ulColdMethodSize, ulMethodToken, ulColdMethodFlags, szDtraceOutput1, szDtraceOutput2, szDtraceOutput3, clrInstanceID, rejitID) \ +{ \ + if (rejitID == 0) \ + { FireEtwMethodDCStartVerbose_V1(ullMethodIdentifier, ullModuleID, ullColdMethodStartAddress, ulColdMethodSize, ulMethodToken, ulColdMethodFlags, szDtraceOutput1, szDtraceOutput2, szDtraceOutput3, clrInstanceID); } \ + else \ + { FireEtwMethodDCStartVerbose_V2(ullMethodIdentifier, ullModuleID, ullColdMethodStartAddress, ulColdMethodSize, ulMethodToken, ulColdMethodFlags, szDtraceOutput1, szDtraceOutput2, szDtraceOutput3, clrInstanceID, rejitID); } \ +} + +#define FireEtwMethodDCStart_V1_or_V2(ullMethodIdentifier, ullModuleID, ullColdMethodStartAddress, ulColdMethodSize, ulMethodToken, ulColdMethodFlags, clrInstanceID, rejitID) \ +{ \ + if (rejitID == 0) \ + { FireEtwMethodDCStart_V1(ullMethodIdentifier, ullModuleID, ullColdMethodStartAddress, ulColdMethodSize, ulMethodToken, ulColdMethodFlags, clrInstanceID); } \ + else \ + { FireEtwMethodDCStart_V2(ullMethodIdentifier, ullModuleID, ullColdMethodStartAddress, ulColdMethodSize, ulMethodToken, ulColdMethodFlags, clrInstanceID, rejitID); } \ +} + +#define FireEtwMethodDCEndVerbose_V1_or_V2(ullMethodIdentifier, ullModuleID, ullColdMethodStartAddress, ulColdMethodSize, ulMethodToken, ulColdMethodFlags, szDtraceOutput1, szDtraceOutput2, szDtraceOutput3, clrInstanceID, rejitID) \ +{ \ + if (rejitID == 0) \ + { FireEtwMethodDCEndVerbose_V1(ullMethodIdentifier, ullModuleID, ullColdMethodStartAddress, ulColdMethodSize, ulMethodToken, ulColdMethodFlags, szDtraceOutput1, szDtraceOutput2, szDtraceOutput3, clrInstanceID); } \ + else \ + { FireEtwMethodDCEndVerbose_V2(ullMethodIdentifier, ullModuleID, ullColdMethodStartAddress, ulColdMethodSize, ulMethodToken, ulColdMethodFlags, szDtraceOutput1, szDtraceOutput2, szDtraceOutput3, clrInstanceID, rejitID); } \ +} + +#define FireEtwMethodDCEnd_V1_or_V2(ullMethodIdentifier, ullModuleID, ullColdMethodStartAddress, ulColdMethodSize, ulMethodToken, ulColdMethodFlags, clrInstanceID, rejitID) \ +{ \ + if (rejitID == 0) \ + { FireEtwMethodDCEnd_V1(ullMethodIdentifier, ullModuleID, ullColdMethodStartAddress, ulColdMethodSize, ulMethodToken, ulColdMethodFlags, clrInstanceID); } \ + else \ + { FireEtwMethodDCEnd_V2(ullMethodIdentifier, ullModuleID, ullColdMethodStartAddress, ulColdMethodSize, ulMethodToken, ulColdMethodFlags, clrInstanceID, rejitID); } \ +} + +// Module load / unload events: +// There is no precedent here for using GUIDs in Mac events, and it's doubtful any +// of the new PDB fields for the V2 Module events are at all useful on the Mac anyway. So +// stick with V1 module events on the Mac. + +#ifdef FEATURE_DTRACE +#define FireEtwModuleLoad_V1_or_V2(ullModuleId, ullAssemblyId, ulFlags, ulReservedFlags, szDtraceOutput1, szDtraceOutput2, clrInstanceId, ManagedPdbSignature, ManagedPdbAge, ManagedPdbPath, NativePdbSignature, NativePdbAge, NativePdbPath) \ + FireEtwModuleLoad_V1(ullModuleId, ullAssemblyId, ulFlags, ulReservedFlags, szDtraceOutput1, szDtraceOutput2, clrInstanceId) +#define FireEtwModuleUnload_V1_or_V2(ullModuleId, ullAssemblyId, ulFlags, ulReservedFlags, szDtraceOutput1, szDtraceOutput2, clrInstanceId, ManagedPdbSignature, ManagedPdbAge, ManagedPdbPath, NativePdbSignature, NativePdbAge, NativePdbPath) \ + FireEtwModuleUnload_V1(ullModuleId, ullAssemblyId, ulFlags, ulReservedFlags, szDtraceOutput1, szDtraceOutput2, clrInstanceId) +#define FireEtwModuleDCStart_V1_or_V2(ullModuleId, ullAssemblyId, ulFlags, ulReservedFlags, szDtraceOutput1, szDtraceOutput2, clrInstanceId, ManagedPdbSignature, ManagedPdbAge, ManagedPdbPath, NativePdbSignature, NativePdbAge, NativePdbPath) \ + FireEtwModuleDCStart_V1(ullModuleId, ullAssemblyId, ulFlags, ulReservedFlags, szDtraceOutput1, szDtraceOutput2, clrInstanceId) +#define FireEtwModuleDCEnd_V1_or_V2(ullModuleId, ullAssemblyId, ulFlags, ulReservedFlags, szDtraceOutput1, szDtraceOutput2, clrInstanceId, ManagedPdbSignature, ManagedPdbAge, ManagedPdbPath, NativePdbSignature, NativePdbAge, NativePdbPath) \ + FireEtwModuleDCEnd_V1(ullModuleId, ullAssemblyId, ulFlags, ulReservedFlags, szDtraceOutput1, szDtraceOutput2, clrInstanceId) +#else // FEATURE_DTRACE +#define FireEtwModuleLoad_V1_or_V2(ullModuleId, ullAssemblyId, ulFlags, ulReservedFlags, szDtraceOutput1, szDtraceOutput2, clrInstanceId, ManagedPdbSignature, ManagedPdbAge, ManagedPdbPath, NativePdbSignature, NativePdbAge, NativePdbPath) \ + FireEtwModuleLoad_V2(ullModuleId, ullAssemblyId, ulFlags, ulReservedFlags, szDtraceOutput1, szDtraceOutput2, clrInstanceId, ManagedPdbSignature, ManagedPdbAge, ManagedPdbPath, NativePdbSignature, NativePdbAge, NativePdbPath) +#define FireEtwModuleUnload_V1_or_V2(ullModuleId, ullAssemblyId, ulFlags, ulReservedFlags, szDtraceOutput1, szDtraceOutput2, clrInstanceId, ManagedPdbSignature, ManagedPdbAge, ManagedPdbPath, NativePdbSignature, NativePdbAge, NativePdbPath) \ + FireEtwModuleUnload_V2(ullModuleId, ullAssemblyId, ulFlags, ulReservedFlags, szDtraceOutput1, szDtraceOutput2, clrInstanceId, ManagedPdbSignature, ManagedPdbAge, ManagedPdbPath, NativePdbSignature, NativePdbAge, NativePdbPath) +#define FireEtwModuleDCStart_V1_or_V2(ullModuleId, ullAssemblyId, ulFlags, ulReservedFlags, szDtraceOutput1, szDtraceOutput2, clrInstanceId, ManagedPdbSignature, ManagedPdbAge, ManagedPdbPath, NativePdbSignature, NativePdbAge, NativePdbPath) \ + FireEtwModuleDCStart_V2(ullModuleId, ullAssemblyId, ulFlags, ulReservedFlags, szDtraceOutput1, szDtraceOutput2, clrInstanceId, ManagedPdbSignature, ManagedPdbAge, ManagedPdbPath, NativePdbSignature, NativePdbAge, NativePdbPath) +#define FireEtwModuleDCEnd_V1_or_V2(ullModuleId, ullAssemblyId, ulFlags, ulReservedFlags, szDtraceOutput1, szDtraceOutput2, clrInstanceId, ManagedPdbSignature, ManagedPdbAge, ManagedPdbPath, NativePdbSignature, NativePdbAge, NativePdbPath) \ + FireEtwModuleDCEnd_V2(ullModuleId, ullAssemblyId, ulFlags, ulReservedFlags, szDtraceOutput1, szDtraceOutput2, clrInstanceId, ManagedPdbSignature, ManagedPdbAge, ManagedPdbPath, NativePdbSignature, NativePdbAge, NativePdbPath) +#endif // FEATURE_DTRACE + + +//--------------------------------------------------------------------------------------- +// +// Rather than checking the NGEN keyword on the runtime provider directly, use this +// helper that checks that the NGEN runtime provider keyword is enabled AND the +// OverrideAndSuppressNGenEvents keyword on the runtime provider is NOT enabled. +// +// OverrideAndSuppressNGenEvents allows controllers to set the expensive NGEN keyword for +// older runtimes (< 4.0) where NGEN PDB info is NOT available, while suppressing those +// expensive events on newer runtimes (>= 4.5) where NGEN PDB info IS available. Note +// that 4.0 has NGEN PDBS but unfortunately not the OverrideAndSuppressNGenEvents +// keyword, b/c NGEN PDBs were made publicly only after 4.0 shipped. So tools that need +// to consume both <4.0 and 4.0 events would neeed to enable the expensive NGEN events to +// deal properly with 3.5, even though those events aren't necessary on 4.0. +// +// On CoreCLR, this keyword is a no-op, because coregen PDBs don't exist (and thus we'll +// need the NGEN rundown to still work on Silverligth). +// +// Return Value: +// nonzero iff NGenKeyword is enabled on the runtime provider and +// OverrideAndSuppressNGenEventsKeyword is not enabled on the runtime provider. +// + +BOOL IsRuntimeNgenKeywordEnabledAndNotSuppressed() +{ + LIMITED_METHOD_CONTRACT; + + return + ( + ETW_TRACING_CATEGORY_ENABLED( + MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, + TRACE_LEVEL_INFORMATION, + CLR_NGEN_KEYWORD) + && ! ( ETW_TRACING_CATEGORY_ENABLED( + MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, + TRACE_LEVEL_INFORMATION, + CLR_OVERRIDEANDSUPPRESSNGENEVENTS_KEYWORD) ) + ); +} + +// Same as above, but for the rundown provider +BOOL IsRundownNgenKeywordEnabledAndNotSuppressed() +{ + LIMITED_METHOD_CONTRACT; + + return + ( + ETW_TRACING_CATEGORY_ENABLED( + MICROSOFT_WINDOWS_DOTNETRUNTIME_RUNDOWN_PROVIDER_Context, + TRACE_LEVEL_INFORMATION, + CLR_RUNDOWNNGEN_KEYWORD) + && ! ( ETW_TRACING_CATEGORY_ENABLED( + MICROSOFT_WINDOWS_DOTNETRUNTIME_RUNDOWN_PROVIDER_Context, + TRACE_LEVEL_INFORMATION, + CLR_RUNDOWNOVERRIDEANDSUPPRESSNGENEVENTS_KEYWORD) ) + ); +} + +/*******************************************************/ +/* Fast assembly function to get the topmost EBP frame */ +/*******************************************************/ +#if defined(TARGET_X86) +extern "C" +{ + CallStackFrame* GetEbp() + { + CallStackFrame *frame=NULL; + __asm + { + mov frame, ebp + } + return frame; + } +} +#endif //TARGET_X86 + +#ifndef FEATURE_PAL + +/*************************************/ +/* Function to append a frame to an existing stack */ +/*************************************/ +void ETW::SamplingLog::Append(SIZE_T currentFrame) +{ + LIMITED_METHOD_CONTRACT; + if(m_FrameCount < (ETW::SamplingLog::s_MaxStackSize-1) && + currentFrame != 0) + { + m_EBPStack[m_FrameCount] = currentFrame; + m_FrameCount++; + } +}; + +/********************************************************/ +/* Function to get the callstack on the current thread */ +/********************************************************/ +ETW::SamplingLog::EtwStackWalkStatus ETW::SamplingLog::GetCurrentThreadsCallStack(UINT32 *frameCount, PVOID **Stack) +{ + CONTRACTL + { + NOTHROW; + GC_NOTRIGGER; + MODE_ANY; + SO_TOLERANT; + } + CONTRACTL_END; + + // The stack walk performed below can cause allocations (thus entering the host). But + // this is acceptable, since we're not supporting the use of SQL/F1 profiling and + // full-blown ETW CLR stacks (which would be redundant). + PERMANENT_CONTRACT_VIOLATION(HostViolation, ReasonUnsupportedForSQLF1Profiling); + + m_FrameCount = 0; + ETW::SamplingLog::EtwStackWalkStatus stackwalkStatus = SaveCurrentStack(); + + _ASSERTE(m_FrameCount < ETW::SamplingLog::s_MaxStackSize); + + // this not really needed, but let's do it + // because we use the framecount while dumping the stack event + for(int i=m_FrameCount; im_State & Thread::TS_Hijacked) { + return ETW::SamplingLog::UnInitialized; + } + + if (pThread->IsEtwStackWalkInProgress()) + { + return ETW::SamplingLog::InProgress; + } + pThread->MarkEtwStackWalkInProgress(); + EX_TRY + { +#ifdef TARGET_X86 + CallStackFrame *currentEBP = GetEbp(); + CallStackFrame *lastEBP = NULL; + while(currentEBP) + { + lastEBP = currentEBP; + currentEBP = currentEBP->m_Next; + + // Skip the top N frames + if(skipTopNFrames) { + skipTopNFrames--; + continue; + } + + // Save the Return Address for symbol decoding + Append(lastEBP->m_ReturnAddress); + + // Check for stack limits + if((SIZE_T)currentEBP < (SIZE_T)Thread::GetStackLowerBound() || (SIZE_T)currentEBP > (SIZE_T)Thread::GetStackUpperBound()) + { + break; + } + + // If we have a too small address, we are probably bad + if((SIZE_T)currentEBP < (SIZE_T)0x10000) + break; + + if((SIZE_T)currentEBP < (SIZE_T)lastEBP) + { + break; + } + } +#else + CONTEXT ctx; + ClrCaptureContext(&ctx); + UINT_PTR ControlPc = 0; + UINT_PTR CurrentSP = 0, PrevSP = 0; + + while(1) + { + // Unwind to the caller + ControlPc = Thread::VirtualUnwindCallFrame(&ctx); + + // This is to take care of recursion + CurrentSP = (UINT_PTR)GetSP(&ctx); + + // when to break from this loop + if ( ControlPc == 0 || ( PrevSP == CurrentSP ) ) + { + break; + } + + // Skip the top N frames + if ( skipTopNFrames ) { + skipTopNFrames--; + continue; + } + + // Add the stack frame to the list + Append(ControlPc); + + PrevSP = CurrentSP; + } +#endif //TARGET_X86 + } EX_CATCH { } EX_END_CATCH(SwallowAllExceptions); + pThread->MarkEtwStackWalkCompleted(); +#endif //!DACCESS_COMPILE + + return ETW::SamplingLog::Completed; +} +#endif //!FEATURE_PAL + +#endif // !FEATURE_REDHAWK + + +#if defined(FEATURE_REDHAWK) || !defined(FEATURE_PAL) || defined(FEATURE_DTRACE) + +/****************************************************************************/ +/* Methods that are called from the runtime */ +/****************************************************************************/ + +#ifndef FEATURE_DTRACE +/****************************************************************************/ +/* Methods for rundown events */ +/* Since DTRACe does not support passing a method pointer as a callback when*/ +/* enable a events, rundown events are not supported on Mac */ +/****************************************************************************/ + +/***************************************************************************/ +/* This function should be called from the event tracing callback routine + when the private CLR provider is enabled */ +/***************************************************************************/ + +#ifndef FEATURE_REDHAWK + +void ETW::GCLog::GCSettingsEvent() +{ + if (GCHeapUtilities::IsGCHeapInitialized()) + { + if (ETW_TRACING_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_PRIVATE_PROVIDER_Context, + GCSettings)) + { + ETW::GCLog::ETW_GC_INFO Info; + + Info.GCSettings.ServerGC = GCHeapUtilities::IsServerHeap (); + Info.GCSettings.SegmentSize = GCHeapUtilities::GetGCHeap()->GetValidSegmentSize (FALSE); + Info.GCSettings.LargeObjectSegmentSize = GCHeapUtilities::GetGCHeap()->GetValidSegmentSize (TRUE); + FireEtwGCSettings_V1(Info.GCSettings.SegmentSize, Info.GCSettings.LargeObjectSegmentSize, Info.GCSettings.ServerGC, GetClrInstanceId()); + } + GCHeapUtilities::GetGCHeap()->TraceGCSegments(); + } +}; + +#endif // !FEATURE_REDHAWK + + +//--------------------------------------------------------------------------------------- +// Code for sending GC heap object events is generally the same for both FEATURE_REDHAWK +// and !FEATURE_REDHAWK builds +//--------------------------------------------------------------------------------------- + + +// Simple helpers called by the GC to decide whether it needs to do a walk of heap +// objects and / or roots. + +BOOL ETW::GCLog::ShouldWalkHeapObjectsForEtw() +{ + LIMITED_METHOD_CONTRACT; + return ETW_TRACING_CATEGORY_ENABLED( + MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, + TRACE_LEVEL_INFORMATION, + CLR_GCHEAPDUMP_KEYWORD); +} + +BOOL ETW::GCLog::ShouldWalkHeapRootsForEtw() +{ + LIMITED_METHOD_CONTRACT; + return ETW_TRACING_CATEGORY_ENABLED( + MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, + TRACE_LEVEL_INFORMATION, + CLR_GCHEAPDUMP_KEYWORD); +} + +BOOL ETW::GCLog::ShouldTrackMovementForEtw() +{ + LIMITED_METHOD_CONTRACT; + return ETW_TRACING_CATEGORY_ENABLED( + MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, + TRACE_LEVEL_INFORMATION, + CLR_GCHEAPSURVIVALANDMOVEMENT_KEYWORD); +} + +BOOL ETW::GCLog::ShouldWalkStaticsAndCOMForEtw() +{ + // @TODO: + return FALSE; +} + +void ETW::GCLog::WalkStaticsAndCOMForETW() +{ + // @TODO: +} + + +// Batches the list of moved/surviving references for the GCBulkMovedObjectRanges / +// GCBulkSurvivingObjectRanges events +struct EtwGcMovementContext +{ +public: + // An instance of EtwGcMovementContext is dynamically allocated and stored + // inside of MovedReferenceContextForEtwAndProfapi, which in turn is dynamically + // allocated and pointed to by a profiling_context pointer created by the GC on the stack. + // This is used to batch and send GCBulkSurvivingObjectRanges events and + // GCBulkMovedObjectRanges events. This method is passed a pointer to + // MovedReferenceContextForEtwAndProfapi::pctxEtw; if non-NULL it gets returned; + // else, a new EtwGcMovementContext is allocated, stored in that pointer, and + // then returned. Callers should test for NULL, which can be returned if out of + // memory + static EtwGcMovementContext * GetOrCreateInGCContext(EtwGcMovementContext ** ppContext) + { + LIMITED_METHOD_CONTRACT; + + _ASSERTE(ppContext != NULL); + + EtwGcMovementContext * pContext = *ppContext; + if (pContext == NULL) + { + pContext = new (nothrow) EtwGcMovementContext; + *ppContext = pContext; + } + return pContext; + } + + EtwGcMovementContext() : + iCurBulkSurvivingObjectRanges(0), + iCurBulkMovedObjectRanges(0) + { + LIMITED_METHOD_CONTRACT; + Clear(); + } + + // Resets structure for reuse on construction, and after each flush. + // (Intentionally leave iCurBulk* as is, since they persist across flushes within a GC.) + void Clear() + { + LIMITED_METHOD_CONTRACT; + cBulkSurvivingObjectRanges = 0; + cBulkMovedObjectRanges = 0; + ZeroMemory(rgGCBulkSurvivingObjectRanges, sizeof(rgGCBulkSurvivingObjectRanges)); + ZeroMemory(rgGCBulkMovedObjectRanges, sizeof(rgGCBulkMovedObjectRanges)); + } + + //--------------------------------------------------------------------------------------- + // GCBulkSurvivingObjectRanges + //--------------------------------------------------------------------------------------- + + // Sequence number for each GCBulkSurvivingObjectRanges event + UINT iCurBulkSurvivingObjectRanges; + + // Number of surviving object ranges currently filled out in rgGCBulkSurvivingObjectRanges array + UINT cBulkSurvivingObjectRanges; + + // Struct array containing the primary data for each GCBulkSurvivingObjectRanges + // event. Fix the size so the total event stays well below the 64K limit (leaving + // lots of room for non-struct fields that come before the values data) + EventStructGCBulkSurvivingObjectRangesValue rgGCBulkSurvivingObjectRanges[ + (cbMaxEtwEvent - 0x100) / sizeof(EventStructGCBulkSurvivingObjectRangesValue)]; + + //--------------------------------------------------------------------------------------- + // GCBulkMovedObjectRanges + //--------------------------------------------------------------------------------------- + + // Sequence number for each GCBulkMovedObjectRanges event + UINT iCurBulkMovedObjectRanges; + + // Number of Moved object ranges currently filled out in rgGCBulkMovedObjectRanges array + UINT cBulkMovedObjectRanges; + + // Struct array containing the primary data for each GCBulkMovedObjectRanges + // event. Fix the size so the total event stays well below the 64K limit (leaving + // lots of room for non-struct fields that come before the values data) + EventStructGCBulkMovedObjectRangesValue rgGCBulkMovedObjectRanges[ + (cbMaxEtwEvent - 0x100) / sizeof(EventStructGCBulkMovedObjectRangesValue)]; +}; + +// Contains above struct for ETW, plus extra info (opaque to us) used by the profiling +// API to track its own information. +struct MovedReferenceContextForEtwAndProfapi +{ + // An instance of MovedReferenceContextForEtwAndProfapi is dynamically allocated and + // pointed to by a profiling_context pointer created by the GC on the stack. This is used to + // batch and send GCBulkSurvivingObjectRanges events and GCBulkMovedObjectRanges + // events and the corresponding callbacks for profapi profilers. This method is + // passed a pointer to a MovedReferenceContextForEtwAndProfapi; if non-NULL it gets + // returned; else, a new MovedReferenceContextForEtwAndProfapi is allocated, stored + // in that pointer, and then returned. Callers should test for NULL, which can be + // returned if out of memory + static MovedReferenceContextForEtwAndProfapi * CreateInGCContext(LPVOID pvContext) + { + LIMITED_METHOD_CONTRACT; + + _ASSERTE(pvContext != NULL); + + MovedReferenceContextForEtwAndProfapi * pContext = *(MovedReferenceContextForEtwAndProfapi **) pvContext; + + // Shouldn't be called if the context was already created. Perhaps someone made + // one too many BeginMovedReferences calls, or didn't have an EndMovedReferences + // in between? + _ASSERTE(pContext == NULL); + + pContext = new (nothrow) MovedReferenceContextForEtwAndProfapi; + *(MovedReferenceContextForEtwAndProfapi **) pvContext = pContext; + + return pContext; + } + + + MovedReferenceContextForEtwAndProfapi() : + pctxProfAPI(NULL), + pctxEtw(NULL) + + { + LIMITED_METHOD_CONTRACT; + } + + LPVOID pctxProfAPI; + EtwGcMovementContext * pctxEtw; +}; + + +//--------------------------------------------------------------------------------------- +// +// Called by the GC for each moved or surviving reference that it encounters. This +// batches the info into our context's buffer, and flushes that buffer to ETW as it fills +// up. +// +// Arguments: +// * pbMemBlockStart - Start of moved/surviving block +// * pbMemBlockEnd - Next pointer after end of moved/surviving block +// * cbRelocDistance - How far did the block move? (0 for non-compacted / surviving +// references; negative if moved to earlier addresses) +// * profilingContext - Where our context is stored +// * fCompacting - Is this a compacting GC? Used to decide whether to send the moved +// or surviving event +// + +// static +void ETW::GCLog::MovedReference( + BYTE * pbMemBlockStart, + BYTE * pbMemBlockEnd, + ptrdiff_t cbRelocDistance, + size_t profilingContext, + BOOL fCompacting, + BOOL /*fAllowProfApiNotification*/) // @TODO: unused param from newer implementation +{ +#ifndef WINXP_AND_WIN2K3_BUILD_SUPPORT + CONTRACTL + { + NOTHROW; + GC_NOTRIGGER; + MODE_ANY; + CAN_TAKE_LOCK; // EEToProfInterfaceImpl::AllocateMovedReferencesData takes lock + } + CONTRACTL_END; + + MovedReferenceContextForEtwAndProfapi * pCtxForEtwAndProfapi = + (MovedReferenceContextForEtwAndProfapi *) profilingContext; + if (pCtxForEtwAndProfapi == NULL) + { + _ASSERTE(!"MovedReference() encountered a NULL profilingContext"); + return; + } + +#ifdef PROFILING_SUPPORTED + // ProfAPI + { + BEGIN_PIN_PROFILER(CORProfilerTrackGC()); + g_profControlBlock.pProfInterface->MovedReference(pbMemBlockStart, + pbMemBlockEnd, + cbRelocDistance, + &(pCtxForEtwAndProfapi->pctxProfAPI), + fCompacting); + END_PIN_PROFILER(); + } +#endif // PROFILING_SUPPORTED + + // ETW + + if (!ShouldTrackMovementForEtw()) + return; + + EtwGcMovementContext * pContext = + EtwGcMovementContext::GetOrCreateInGCContext(&pCtxForEtwAndProfapi->pctxEtw); + if (pContext == NULL) + return; + + if (fCompacting) + { + // Moved references + + _ASSERTE(pContext->cBulkMovedObjectRanges < _countof(pContext->rgGCBulkMovedObjectRanges)); + EventStructGCBulkMovedObjectRangesValue * pValue = + &pContext->rgGCBulkMovedObjectRanges[pContext->cBulkMovedObjectRanges]; + pValue->OldRangeBase = pbMemBlockStart; + pValue->NewRangeBase = pbMemBlockStart + cbRelocDistance; + pValue->RangeLength = pbMemBlockEnd - pbMemBlockStart; + pContext->cBulkMovedObjectRanges++; + + // If buffer is now full, empty it into ETW + if (pContext->cBulkMovedObjectRanges == _countof(pContext->rgGCBulkMovedObjectRanges)) + { + FireEtwGCBulkMovedObjectRanges( + pContext->iCurBulkMovedObjectRanges, + pContext->cBulkMovedObjectRanges, + GetClrInstanceId(), + sizeof(pContext->rgGCBulkMovedObjectRanges[0]), + &pContext->rgGCBulkMovedObjectRanges[0]); + + pContext->iCurBulkMovedObjectRanges++; + pContext->Clear(); + } + } + else + { + // Surviving references + + _ASSERTE(pContext->cBulkSurvivingObjectRanges < _countof(pContext->rgGCBulkSurvivingObjectRanges)); + EventStructGCBulkSurvivingObjectRangesValue * pValue = + &pContext->rgGCBulkSurvivingObjectRanges[pContext->cBulkSurvivingObjectRanges]; + pValue->RangeBase = pbMemBlockStart; + pValue->RangeLength = pbMemBlockEnd - pbMemBlockStart; + pContext->cBulkSurvivingObjectRanges++; + + // If buffer is now full, empty it into ETW + if (pContext->cBulkSurvivingObjectRanges == _countof(pContext->rgGCBulkSurvivingObjectRanges)) + { + FireEtwGCBulkSurvivingObjectRanges( + pContext->iCurBulkSurvivingObjectRanges, + pContext->cBulkSurvivingObjectRanges, + GetClrInstanceId(), + sizeof(pContext->rgGCBulkSurvivingObjectRanges[0]), + &pContext->rgGCBulkSurvivingObjectRanges[0]); + + pContext->iCurBulkSurvivingObjectRanges++; + pContext->Clear(); + } + } +#endif // WINXP_AND_WIN2K3_BUILD_SUPPORT +} + + +//--------------------------------------------------------------------------------------- +// +// Called by the GC just before it begins enumerating plugs. Gives us a chance to +// allocate our context structure, to allow us to batch plugs before firing events +// for them +// +// Arguments: +// * pProfilingContext - Points to location on stack (in GC function) where we can +// store a pointer to the context we allocate +// + +// static +void ETW::GCLog::BeginMovedReferences(size_t * pProfilingContext) +{ + LIMITED_METHOD_CONTRACT; + + MovedReferenceContextForEtwAndProfapi::CreateInGCContext(LPVOID(pProfilingContext)); +} + + +//--------------------------------------------------------------------------------------- +// +// Called by the GC at the end of a heap walk to give us a place to flush any remaining +// buffers of data to ETW or the profapi profiler +// +// Arguments: +// profilingContext - Our context we built up during the heap walk +// + +// static +void ETW::GCLog::EndMovedReferences(size_t profilingContext, + BOOL /*fAllowProfApiNotification*/) // @TODO: unused param from newer implementation +{ +#ifndef WINXP_AND_WIN2K3_BUILD_SUPPORT + CONTRACTL + { + NOTHROW; + GC_NOTRIGGER; + MODE_ANY; + CAN_TAKE_LOCK; + } + CONTRACTL_END; + + MovedReferenceContextForEtwAndProfapi * pCtxForEtwAndProfapi = (MovedReferenceContextForEtwAndProfapi *) profilingContext; + if (pCtxForEtwAndProfapi == NULL) + { + _ASSERTE(!"EndMovedReferences() encountered a NULL profilingContext"); + return; + } + +#ifdef PROFILING_SUPPORTED + // ProfAPI + { + BEGIN_PIN_PROFILER(CORProfilerTrackGC()); + g_profControlBlock.pProfInterface->EndMovedReferences(&(pCtxForEtwAndProfapi->pctxProfAPI)); + END_PIN_PROFILER(); + } +#endif //PROFILING_SUPPORTED + + // ETW + + if (!ShouldTrackMovementForEtw()) + return; + + // If context isn't already set up for us, then we haven't been collecting any data + // for ETW events. + EtwGcMovementContext * pContext = pCtxForEtwAndProfapi->pctxEtw; + if (pContext == NULL) + return; + + // Flush any remaining moved or surviving range data + + if (pContext->cBulkMovedObjectRanges > 0) + { + FireEtwGCBulkMovedObjectRanges( + pContext->iCurBulkMovedObjectRanges, + pContext->cBulkMovedObjectRanges, + GetClrInstanceId(), + sizeof(pContext->rgGCBulkMovedObjectRanges[0]), + &pContext->rgGCBulkMovedObjectRanges[0]); + } + + if (pContext->cBulkSurvivingObjectRanges > 0) + { + FireEtwGCBulkSurvivingObjectRanges( + pContext->iCurBulkSurvivingObjectRanges, + pContext->cBulkSurvivingObjectRanges, + GetClrInstanceId(), + sizeof(pContext->rgGCBulkSurvivingObjectRanges[0]), + &pContext->rgGCBulkSurvivingObjectRanges[0]); + } + + pCtxForEtwAndProfapi->pctxEtw = NULL; + delete pContext; +#endif // WINXP_AND_WIN2K3_BUILD_SUPPORT +} + +/***************************************************************************/ +/* This implements the public runtime provider's GCHeapCollectKeyword. It + performs a full, gen-2, blocking GC. +/***************************************************************************/ +void ETW::GCLog::ForceGC(LONGLONG l64ClientSequenceNumber) +{ + CONTRACTL + { + NOTHROW; + GC_TRIGGERS; + MODE_ANY; + } + CONTRACTL_END; + +#ifdef FEATURE_REDHAWK + if (!GCHeapUtilities::IsGCHeapInitialized()) + return; + + // No InterlockedExchange64 on Redhawk, even though there is one for + // InterlockedCompareExchange64. Technically, there's a race here by using + // InterlockedCompareExchange64, but it's not worth addressing. The race would be + // between two ETW controllers trying to trigger GCs simultaneously, in which case + // one will win and get its sequence number to appear in the GCStart event, while the + // other will lose. Rare, uninteresting, and low-impact. + PalInterlockedCompareExchange64(&s_l64LastClientSequenceNumber, l64ClientSequenceNumber, s_l64LastClientSequenceNumber); +#else // !FEATURE_REDHAWK + if (!IsGarbageCollectorFullyInitialized()) + return; + + InterlockedExchange64(&s_l64LastClientSequenceNumber, l64ClientSequenceNumber); +#endif // FEATURE_REDHAWK + + ForceGCForDiagnostics(); +} + +//--------------------------------------------------------------------------------------- +// +// Helper to fire the GCStart event. Figures out which version of GCStart to fire, and +// includes the client sequence number, if available. +// +// Arguments: +// pGcInfo - ETW_GC_INFO containing details from GC about this collection +// + +// static +void ETW::GCLog::FireGcStart(ETW_GC_INFO * pGcInfo) +{ + LIMITED_METHOD_CONTRACT; + +#if !defined(FEATURE_PAL) || defined(FEATURE_DTRACE) + + if (ETW_TRACING_CATEGORY_ENABLED( + MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, + TRACE_LEVEL_INFORMATION, + CLR_GC_KEYWORD)) + { +#if !defined(FEATURE_PAL) + // If the controller specified a client sequence number for us to log with this + // GCStart, then retrieve it + LONGLONG l64ClientSequenceNumberToLog = 0; + if ((s_l64LastClientSequenceNumber != 0) && + (pGcInfo->GCStart.Depth == GCHeapUtilities::GetGCHeap()->GetMaxGeneration()) && + (pGcInfo->GCStart.Reason == ETW_GC_INFO::GC_INDUCED)) + { +#ifdef FEATURE_REDHAWK + // No InterlockedExchange64 on Redhawk (presumably b/c there is no compiler + // intrinsic for this on x86, even though there is one for InterlockedCompareExchange64) + l64ClientSequenceNumberToLog = PalInterlockedCompareExchange64(&s_l64LastClientSequenceNumber, 0, s_l64LastClientSequenceNumber); +#else + l64ClientSequenceNumberToLog = InterlockedExchange64(&s_l64LastClientSequenceNumber, 0); +#endif + } + + FireEtwGCStart_V2(pGcInfo->GCStart.Count, pGcInfo->GCStart.Depth, pGcInfo->GCStart.Reason, pGcInfo->GCStart.Type, GetClrInstanceId(), l64ClientSequenceNumberToLog); + +#elif defined(FEATURE_DTRACE) + FireEtwGCStart(pGcInfo->GCStart.Count,pGcInfo->GCStart.Reason); +#endif + } + +#endif // defined(FEATURE_PAL) || defined(FEATURE_DTRACE) +} + +//--------------------------------------------------------------------------------------- +// +// Contains code common to profapi and ETW scenarios where the profiler wants to force +// the CLR to perform a GC. The important work here is to create a managed thread for +// the current thread BEFORE the GC begins. On both ETW and profapi threads, there may +// not yet be a managed thread object. But some scenarios require a managed thread +// object be present (notably if we need to call into Jupiter during the GC). +// +// Return Value: +// HRESULT indicating success or failure +// +// Assumptions: +// Caller should ensure that the EE has fully started up and that the GC heap is +// initialized enough to actually perform a GC +// + +// static +HRESULT ETW::GCLog::ForceGCForDiagnostics() +{ + CONTRACTL + { + NOTHROW; + GC_TRIGGERS; + MODE_ANY; + } + CONTRACTL_END; + + HRESULT hr = E_FAIL; + +#ifndef FEATURE_REDHAWK + // Caller should ensure we're past startup. + _ASSERTE(IsGarbageCollectorFullyInitialized()); + + // In immersive apps the GarbageCollect() call below will call into Jupiter, + // which will call back into the runtime to track references. This call + // chain would cause a Thread object to be created for this thread while code + // higher on the stack owns the ThreadStoreLock. This will lead to asserts + // since the ThreadStoreLock is non-reentrant. To avoid this we'll create + // the Thread object here instead. + if (GetThreadNULLOk() == NULL) + { + HRESULT hr = E_FAIL; + SetupThreadNoThrow(&hr); + if (FAILED(hr)) + return hr; + } + + ASSERT_NO_EE_LOCKS_HELD(); + + EX_TRY + { + // Need to switch to cooperative mode as the thread will access managed + // references (through Jupiter callbacks). + GCX_COOP(); + +#else // FEATURE_REDHAWK + _ASSERTE(GCHeapUtilities::IsGCHeapInitialized()); + + ThreadStore::AttachCurrentThread(); + Thread * pThread = ThreadStore::GetCurrentThread(); + + // Doing this prevents the GC from trying to walk this thread's stack for roots. + pThread->SetGCSpecial(true); + + // While doing the GC, much code assumes & asserts the thread doing the GC is in + // cooperative mode. + pThread->DisablePreemptiveMode(); +#endif // FEATURE_REDHAWK + + hr = GCHeapUtilities::GetGCHeap()->GarbageCollect( + -1, // all generations should be collected + FALSE, // low_memory_p + collection_blocking); + +#ifdef FEATURE_REDHAWK + // In case this thread (generated by the ETW OS APIs) hangs around a while, + // better stick it back into preemptive mode, so it doesn't block any other GCs + pThread->EnablePreemptiveMode(); +#else // !FEATURE_REDHAWK + } + EX_CATCH { } + EX_END_CATCH(RethrowCorruptingExceptions); +#endif // FEATURE_REDHAWK + + return hr; +} + + +//--------------------------------------------------------------------------------------- +// BulkTypeValue / BulkTypeEventLogger: These take care of batching up types so they can +// be logged via ETW in bulk +//--------------------------------------------------------------------------------------- + +BulkTypeValue::BulkTypeValue() : cTypeParameters(0), rgTypeParameters() +#ifdef FEATURE_REDHAWK +, ullSingleTypeParameter(0) +#else // FEATURE_REDHAWK +, sName() +#endif // FEATURE_REDHAWK +{ + LIMITED_METHOD_CONTRACT; + ZeroMemory(&fixedSizedData, sizeof(fixedSizedData)); +} + +//--------------------------------------------------------------------------------------- +// +// Clears a BulkTypeValue so it can be reused after the buffer is flushed to ETW +// + +void BulkTypeValue::Clear() +{ + CONTRACTL + { + THROWS; + GC_NOTRIGGER; + MODE_ANY; + } + CONTRACTL_END; + + ZeroMemory(&fixedSizedData, sizeof(fixedSizedData)); + cTypeParameters = 0; +#ifdef FEATURE_REDHAWK + ullSingleTypeParameter = 0; + rgTypeParameters.Release(); +#else // FEATURE_REDHAWK + sName.Clear(); + rgTypeParameters.Clear(); +#endif // FEATURE_REDHAWK +} + +//--------------------------------------------------------------------------------------- +// +// Fire an ETW event for all the types we batched so far, and then reset our state +// so we can start batching new types at the beginning of the array. +// +// + +void BulkTypeEventLogger::FireBulkTypeEvent() +{ +#ifndef WINXP_AND_WIN2K3_BUILD_SUPPORT + LIMITED_METHOD_CONTRACT; + + if (m_nBulkTypeValueCount == 0) + { + // No types were batched up, so nothing to send + return; + } + + // Normally, we'd use the MC-generated FireEtwBulkType for all this gunk, but + // it's insufficient as the bulk type event is too complex (arrays of structs of + // varying size). So we directly log the event via EventDataDescCreate and + // EventWrite + + // We use one descriptor for the count + one for the ClrInstanceID + 4 + // per batched type (to include fixed-size data + name + param count + param + // array). But the system limit of 128 descriptors per event kicks in way + // before the 64K event size limit, and we already limit our batch size + // (m_nBulkTypeValueCount) to stay within the 128 descriptor limit. + EVENT_DATA_DESCRIPTOR EventData[128]; + UINT16 nClrInstanceID = GetClrInstanceId(); + + UINT iDesc = 0; + + _ASSERTE(iDesc < _countof(EventData)); + EventDataDescCreate(&EventData[iDesc++], &m_nBulkTypeValueCount, sizeof(m_nBulkTypeValueCount)); + + _ASSERTE(iDesc < _countof(EventData)); + EventDataDescCreate(&EventData[iDesc++], &nClrInstanceID, sizeof(nClrInstanceID)); + + for (int iTypeData = 0; iTypeData < m_nBulkTypeValueCount; iTypeData++) + { + // Do fixed-size data as one bulk copy + _ASSERTE(iDesc < _countof(EventData)); + EventDataDescCreate( + &EventData[iDesc++], + &(m_rgBulkTypeValues[iTypeData].fixedSizedData), + sizeof(m_rgBulkTypeValues[iTypeData].fixedSizedData)); + + // Do var-sized data individually per field + + // Type name (nonexistent and thus empty on FEATURE_REDHAWK) + _ASSERTE(iDesc < _countof(EventData)); +#ifdef FEATURE_REDHAWK + EventDataDescCreate(&EventData[iDesc++], L"", sizeof(WCHAR)); +#else // FEATURE_REDHAWK + LPCWSTR wszName = m_rgBulkTypeValues[iTypeData].sName.GetUnicode(); + EventDataDescCreate( + &EventData[iDesc++], + (wszName == NULL) ? L"" : wszName, + (wszName == NULL) ? sizeof(WCHAR) : (m_rgBulkTypeValues[iTypeData].sName.GetCount() + 1) * sizeof(WCHAR)); +#endif // FEATURE_REDHAWK + + // Type parameter count +#ifndef FEATURE_REDHAWK + m_rgBulkTypeValues[iTypeData].cTypeParameters = m_rgBulkTypeValues[iTypeData].rgTypeParameters.GetCount(); +#endif // FEATURE_REDHAWK + _ASSERTE(iDesc < _countof(EventData)); + EventDataDescCreate( + &EventData[iDesc++], + &(m_rgBulkTypeValues[iTypeData].cTypeParameters), + sizeof(m_rgBulkTypeValues[iTypeData].cTypeParameters)); + + // Type parameter array + if (m_rgBulkTypeValues[iTypeData].cTypeParameters > 0) + { + _ASSERTE(iDesc < _countof(EventData)); + EventDataDescCreate( + &EventData[iDesc++], +#ifdef FEATURE_REDHAWK + ((m_rgBulkTypeValues[iTypeData].cTypeParameters == 1) ? + &(m_rgBulkTypeValues[iTypeData].ullSingleTypeParameter) : + (ULONGLONG *) (m_rgBulkTypeValues[iTypeData].rgTypeParameters)), +#else + m_rgBulkTypeValues[iTypeData].rgTypeParameters.GetElements(), +#endif + sizeof(ULONGLONG) * m_rgBulkTypeValues[iTypeData].cTypeParameters); + } + } + + Win32EventWrite(Microsoft_Windows_DotNETRuntimeHandle, &BulkType, iDesc, EventData); + + // Reset state + m_nBulkTypeValueCount = 0; + m_nBulkTypeValueByteCount = 0; +#endif // WINXP_AND_WIN2K3_BUILD_SUPPORT +} + +#ifndef FEATURE_REDHAWK + +//--------------------------------------------------------------------------------------- +// +// Batches a single type into the array, flushing the array to ETW if it fills up. Most +// interaction with the type system (to analyze the type) is done here. This does not +// recursively batch up any parameter types (for arrays or generics), but does add their +// TypeHandles to the rgTypeParameters array. LogTypeAndParameters is responsible for +// initiating any recursive calls to deal with type parameters. +// +// Arguments: +// th - TypeHandle to batch +// +// Return Value: +// Index into array of where this type got batched. -1 if there was a failure. +// + +int BulkTypeEventLogger::LogSingleType(TypeHandle th) +{ + CONTRACTL + { + NOTHROW; + GC_NOTRIGGER; + MODE_ANY; + CAN_TAKE_LOCK; // some of the type system stuff can take locks + } + CONTRACTL_END; + + // If there's no room for another type, flush what we've got + if (m_nBulkTypeValueCount == _countof(m_rgBulkTypeValues)) + { + FireBulkTypeEvent(); + } + + _ASSERTE(m_nBulkTypeValueCount < _countof(m_rgBulkTypeValues)); + + if (!th.IsTypeDesc() && th.GetMethodTable()->IsArray()) + { + _ASSERTE(!"BulkTypeEventLogger::LogSingleType called with MethodTable array"); + return -1; + } + + BulkTypeValue * pVal = &m_rgBulkTypeValues[m_nBulkTypeValueCount]; + + // Clear out pVal before filling it out (array elements can get reused if there + // are enough types that we need to flush to multiple events). Clearing the + // contained SBuffer can throw, so deal with exceptions + BOOL fSucceeded = FALSE; + EX_TRY + { + pVal->Clear(); + fSucceeded = TRUE; + } + EX_CATCH + { + fSucceeded = FALSE; + } + EX_END_CATCH(RethrowCorruptingExceptions); + if (!fSucceeded) + return -1; + + pVal->fixedSizedData.TypeID = (ULONGLONG) th.AsTAddr(); + pVal->fixedSizedData.ModuleID = (ULONGLONG) (TADDR) th.GetModule(); + pVal->fixedSizedData.TypeNameID = (th.GetMethodTable() == NULL) ? 0 : th.GetCl(); + pVal->fixedSizedData.Flags = 0; + pVal->fixedSizedData.CorElementType = (BYTE) th.GetInternalCorElementType(); + + if (th.IsArray()) + { + // Normal typedesc array + pVal->fixedSizedData.Flags |= kEtwTypeFlagsArray; + + // Fetch TypeHandle of array elements + fSucceeded = FALSE; + EX_TRY + { + pVal->rgTypeParameters.Append((ULONGLONG) th.AsArray()->GetArrayElementTypeHandle().AsTAddr()); + fSucceeded = TRUE; + } + EX_CATCH + { + fSucceeded = FALSE; + } + EX_END_CATCH(RethrowCorruptingExceptions); + if (!fSucceeded) + return -1; + } + else if (th.IsTypeDesc()) + { + // Non-array Typedescs + PTR_TypeDesc pTypeDesc = th.AsTypeDesc(); + if (pTypeDesc->HasTypeParam()) + { + fSucceeded = FALSE; + EX_TRY + { + pVal->rgTypeParameters.Append((ULONGLONG) pTypeDesc->GetTypeParam().AsTAddr()); + fSucceeded = TRUE; + } + EX_CATCH + { + fSucceeded = FALSE; + } + EX_END_CATCH(RethrowCorruptingExceptions); + if (!fSucceeded) + return -1; + } + } + else + { + // Non-array MethodTable + + PTR_MethodTable pMT = th.AsMethodTable(); + + // Make CorElementType more specific if this is a string MT + if (pMT->IsString()) + { + pVal->fixedSizedData.CorElementType = ELEMENT_TYPE_STRING; + } + else if (pMT->IsObjectClass()) + { + pVal->fixedSizedData.CorElementType = ELEMENT_TYPE_OBJECT; + } + + // Generic arguments + DWORD cTypeParameters = pMT->GetNumGenericArgs(); + if (cTypeParameters > 0) + { + Instantiation inst = pMT->GetInstantiation(); + fSucceeded = FALSE; + EX_TRY + { + for (DWORD i=0; i < cTypeParameters; i++) + { + pVal->rgTypeParameters.Append((ULONGLONG) inst[i].AsTAddr()); + } + fSucceeded = TRUE; + } + EX_CATCH + { + fSucceeded = FALSE; + } + EX_END_CATCH(RethrowCorruptingExceptions); + if (!fSucceeded) + return -1; + } + + if (pMT->HasFinalizer()) + { + pVal->fixedSizedData.Flags |= kEtwTypeFlagsFinalizable; + } + if (pMT->IsDelegate()) + { + pVal->fixedSizedData.Flags |= kEtwTypeFlagsDelegate; + } + if (pMT->IsComObjectType()) + { + pVal->fixedSizedData.Flags |= kEtwTypeFlagsExternallyImplementedCOMObject; + } + } + + // If the profiler wants it, construct a name. Always normalize the string (even if + // type names are not requested) so that calls to sName.GetCount() can't throw + EX_TRY + { + if (ETW_TRACING_CATEGORY_ENABLED( + MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, + TRACE_LEVEL_INFORMATION, + CLR_GCHEAPANDTYPENAMES_KEYWORD)) + { + th.GetName(pVal->sName); + } + pVal->sName.Normalize(); + } + EX_CATCH + { + // If this failed, the name remains empty, which is ok; the event just + // won't have a name in it. + pVal->sName.Clear(); + } + EX_END_CATCH(RethrowCorruptingExceptions); + + // Now that we know the full size of this type's data, see if it fits in our + // batch or whether we need to flush + + int cbVal = pVal->GetByteCountInEvent(); + if (cbVal > kMaxBytesTypeValues) + { + // This type is apparently so huge, it's too big to squeeze into an event, even + // if it were the only type batched in the whole event. Bail + _ASSERTE(!"Type too big to log via ETW"); + return -1; + } + + if (m_nBulkTypeValueByteCount + cbVal > kMaxBytesTypeValues) + { + // Although this type fits into the array, its size is so big that the entire + // array can't be logged via ETW. So flush the array, and start over by + // calling ourselves--this refetches the type info and puts it at the + // beginning of the array. Since we know this type is small enough to be + // batched into an event on its own, this recursive call will not try to + // call itself again. + FireBulkTypeEvent(); + return LogSingleType(th); + } + + // The type fits into the batch, so update our state + m_nBulkTypeValueCount++; + m_nBulkTypeValueByteCount += cbVal; + return m_nBulkTypeValueCount - 1; // Index of type we just added +} + +void BulkTypeEventLogger::Cleanup() {} + +//--------------------------------------------------------------------------------------- +// +// High-level method to batch a type and (recursively) its type parameters, flushing to +// ETW as needed. This is called by (static) +// ETW::TypeSystemLog::LogTypeAndParametersIfNecessary, which is what clients use to log +// type events +// +// Arguments: +// * thAsAddr - Type to batch +// * typeLogBehavior - Reminder of whether the type system log lock is held +// (useful if we need to recurively call back into TypeSystemLog), and whether +// we even care to check if the type was already logged +// + +void BulkTypeEventLogger::LogTypeAndParameters(ULONGLONG thAsAddr, ETW::TypeSystemLog::TypeLogBehavior typeLogBehavior) +{ + CONTRACTL + { + NOTHROW; + GC_NOTRIGGER; + MODE_ANY; + CAN_TAKE_LOCK; // LogSingleType can take locks + } + CONTRACTL_END; + + TypeHandle th = TypeHandle::FromTAddr((TADDR) thAsAddr); + + // Batch up this type. This grabs useful info about the type, including any + // type parameters it may have, and sticks it in m_rgBulkTypeValues + int iBulkTypeEventData = LogSingleType(th); + if (iBulkTypeEventData == -1) + { + // There was a failure trying to log the type, so don't bother with its type + // parameters + return; + } + + // Look at the type info we just batched, so we can get the type parameters + BulkTypeValue * pVal = &m_rgBulkTypeValues[iBulkTypeEventData]; + + // We're about to recursively call ourselves for the type parameters, so make a + // local copy of their type handles first (else, as we log them we could flush + // and clear out m_rgBulkTypeValues, thus trashing pVal) + + StackSArray rgTypeParameters; + DWORD cParams = pVal->rgTypeParameters.GetCount(); + + BOOL fSucceeded = FALSE; + EX_TRY + { + for (COUNT_T i = 0; i < cParams; i++) + { + rgTypeParameters.Append(pVal->rgTypeParameters[i]); + } + fSucceeded = TRUE; + } + EX_CATCH + { + fSucceeded = FALSE; + } + EX_END_CATCH(RethrowCorruptingExceptions); + if (!fSucceeded) + return; + + // Before we recurse, adjust the special-cased type-log behavior that allows a + // top-level type to be logged without lookup, but still requires lookups to avoid + // dupes of type parameters + if (typeLogBehavior == ETW::TypeSystemLog::kTypeLogBehaviorAssumeLockAndAlwaysLogTopLevelType) + typeLogBehavior = ETW::TypeSystemLog::kTypeLogBehaviorAssumeLockAndLogIfFirstTime; + + // Recursively log any referenced parameter types + for (COUNT_T i=0; i < cParams; i++) + { + ETW::TypeSystemLog::LogTypeAndParametersIfNecessary(this, rgTypeParameters[i], typeLogBehavior); + } +} + +#endif // FEATURE_REDHAWK + +// Holds state that batches of roots, nodes, edges, and types as the GC walks the heap +// at the end of a collection. +class EtwGcHeapDumpContext +{ +public: + // An instance of EtwGcHeapDumpContext is dynamically allocated and stored inside of + // ProfilingScanContext and ProfilerWalkHeapContext, which are context structures + // that the GC heap walker sends back to the callbacks. This method is passed a + // pointer to ProfilingScanContext::pvEtwContext or + // ProfilerWalkHeapContext::pvEtwContext; if non-NULL it gets returned; else, a new + // EtwGcHeapDumpContext is allocated, stored in that pointer, and then returned. + // Callers should test for NULL, which can be returned if out of memory + static EtwGcHeapDumpContext * GetOrCreateInGCContext(LPVOID * ppvEtwContext) + { + LIMITED_METHOD_CONTRACT; + + _ASSERTE(ppvEtwContext != NULL); + + EtwGcHeapDumpContext * pContext = (EtwGcHeapDumpContext *) *ppvEtwContext; + if (pContext == NULL) + { + pContext = new (nothrow) EtwGcHeapDumpContext; + *ppvEtwContext = pContext; + } + return pContext; + } + + EtwGcHeapDumpContext() : + iCurBulkRootEdge(0), + iCurBulkRootConditionalWeakTableElementEdge(0), + iCurBulkNodeEvent(0), + iCurBulkEdgeEvent(0), + bulkTypeEventLogger() + { + LIMITED_METHOD_CONTRACT; + ClearRootEdges(); + ClearRootConditionalWeakTableElementEdges(); + ClearNodes(); + ClearEdges(); + } + + // These helpers clear the individual buffers, for use after a flush and on + // construction. They intentionally leave the indices (iCur*) alone, since they + // persist across flushes within a GC + + void ClearRootEdges() + { + LIMITED_METHOD_CONTRACT; + cGcBulkRootEdges = 0; + ZeroMemory(rgGcBulkRootEdges, sizeof(rgGcBulkRootEdges)); + } + + void ClearRootConditionalWeakTableElementEdges() + { + LIMITED_METHOD_CONTRACT; + cGCBulkRootConditionalWeakTableElementEdges = 0; + ZeroMemory(rgGCBulkRootConditionalWeakTableElementEdges, sizeof(rgGCBulkRootConditionalWeakTableElementEdges)); + } + + void ClearNodes() + { + LIMITED_METHOD_CONTRACT; + cGcBulkNodeValues = 0; + ZeroMemory(rgGcBulkNodeValues, sizeof(rgGcBulkNodeValues)); + } + + void ClearEdges() + { + LIMITED_METHOD_CONTRACT; + cGcBulkEdgeValues = 0; + ZeroMemory(rgGcBulkEdgeValues, sizeof(rgGcBulkEdgeValues)); + } + + //--------------------------------------------------------------------------------------- + // GCBulkRootEdge + // + // A "root edge" is the relationship between a source "GCRootID" (i.e., stack + // variable, handle, static, etc.) and the target "RootedNodeAddress" (the managed + // object that gets rooted). + // + //--------------------------------------------------------------------------------------- + + // Sequence number for each GCBulkRootEdge event + UINT iCurBulkRootEdge; + + // Number of root edges currently filled out in rgGcBulkRootEdges array + UINT cGcBulkRootEdges; + + // Struct array containing the primary data for each GCBulkRootEdge event. Fix the size so + // the total event stays well below the 64K + // limit (leaving lots of room for non-struct fields that come before the root edge data) + EventStructGCBulkRootEdgeValue rgGcBulkRootEdges[(cbMaxEtwEvent - 0x100) / sizeof(EventStructGCBulkRootEdgeValue)]; + + + //--------------------------------------------------------------------------------------- + // GCBulkRootConditionalWeakTableElementEdge + // + // These describe dependent handles, which simulate an edge connecting a key NodeID + // to a value NodeID. + // + //--------------------------------------------------------------------------------------- + + // Sequence number for each GCBulkRootConditionalWeakTableElementEdge event + UINT iCurBulkRootConditionalWeakTableElementEdge; + + // Number of root edges currently filled out in rgGCBulkRootConditionalWeakTableElementEdges array + UINT cGCBulkRootConditionalWeakTableElementEdges; + + // Struct array containing the primary data for each GCBulkRootConditionalWeakTableElementEdge event. Fix the size so + // the total event stays well below the 64K + // limit (leaving lots of room for non-struct fields that come before the root edge data) + EventStructGCBulkRootConditionalWeakTableElementEdgeValue rgGCBulkRootConditionalWeakTableElementEdges + [(cbMaxEtwEvent - 0x100) / sizeof(EventStructGCBulkRootConditionalWeakTableElementEdgeValue)]; + + //--------------------------------------------------------------------------------------- + // GCBulkNode + // + // A "node" is ANY managed object sitting on the heap, including RootedNodeAddresses + // as well as leaf nodes. + // + //--------------------------------------------------------------------------------------- + + // Sequence number for each GCBulkNode event + UINT iCurBulkNodeEvent; + + // Number of nodes currently filled out in rgGcBulkNodeValues array + UINT cGcBulkNodeValues; + + // Struct array containing the primary data for each GCBulkNode event. Fix the size so + // the total event stays well below the 64K + // limit (leaving lots of room for non-struct fields that come before the node data) + EventStructGCBulkNodeValue rgGcBulkNodeValues[(cbMaxEtwEvent - 0x100) / sizeof(EventStructGCBulkNodeValue)]; + + //--------------------------------------------------------------------------------------- + // GCBulkEdge + // + // An "edge" is the relationship between a source node and its referenced target + // node. Edges are reported in bulk, separately from Nodes, but it is expected that + // the consumer read the Node and Edge streams together. One takes the first node + // from the Node stream, and then reads EdgeCount entries in the Edge stream, telling + // you all of that Node's targets. Then, one takes the next node in the Node stream, + // and reads the next entries in the Edge stream (using this Node's EdgeCount to + // determine how many) to find all of its targets. This continues on until the Node + // and Edge streams have been fully read. + // + // GCBulkRootEdges are not duplicated in the GCBulkEdge events. GCBulkEdge events + // begin at the GCBulkRootEdge.RootedNodeAddress and move forward. + // + //--------------------------------------------------------------------------------------- + + // Sequence number for each GCBulkEdge event + UINT iCurBulkEdgeEvent; + + // Number of nodes currently filled out in rgGcBulkEdgeValues array + UINT cGcBulkEdgeValues; + + // Struct array containing the primary data for each GCBulkEdge event. Fix the size so + // the total event stays well below the 64K + // limit (leaving lots of room for non-struct fields that come before the edge data) + EventStructGCBulkEdgeValue rgGcBulkEdgeValues[(cbMaxEtwEvent - 0x100) / sizeof(EventStructGCBulkEdgeValue)]; + + + //--------------------------------------------------------------------------------------- + // BulkType + // + // Types are a bit more complicated to batch up, since their data is of varying + // size. BulkTypeEventLogger takes care of the pesky details for us + //--------------------------------------------------------------------------------------- + + BulkTypeEventLogger bulkTypeEventLogger; +}; + + + +//--------------------------------------------------------------------------------------- +// +// Called during a heap walk for each root reference encountered. Batches up the root in +// the ETW context +// +// Arguments: +// * pvHandle - If the root is a handle, this points to the handle +// * pRootedNode - Points to object that is rooted +// * pSecondaryNodeForDependentHandle - For dependent handles, this is the +// secondary object +// * fDependentHandle - nonzero iff this is for a dependent handle +// * profilingScanContext - The shared profapi/etw context built up during the heap walk. +// * dwGCFlags - Bitmask of "GC_"-style flags set by GC +// * rootFlags - Bitmask of EtwGCRootFlags describing the root +// + +// static +void ETW::GCLog::RootReference( + LPVOID pvHandle, + Object * pRootedNode, + Object * pSecondaryNodeForDependentHandle, + BOOL fDependentHandle, + ProfilingScanContext * profilingScanContext, + DWORD dwGCFlags, + DWORD rootFlags) +{ +#ifndef WINXP_AND_WIN2K3_BUILD_SUPPORT + LIMITED_METHOD_CONTRACT; + + if (pRootedNode == NULL) + return; + + EtwGcHeapDumpContext * pContext = + EtwGcHeapDumpContext::GetOrCreateInGCContext(&profilingScanContext->pvEtwContext); + if (pContext == NULL) + return; + + // Determine root kind, root ID, and handle-specific flags + LPVOID pvRootID = NULL; + BYTE nRootKind = (BYTE) profilingScanContext->dwEtwRootKind; + switch (nRootKind) + { + case kEtwGCRootKindStack: +#ifndef FEATURE_REDHAWK + pvRootID = profilingScanContext->pMD; +#endif // !FEATURE_REDHAWK + break; + + case kEtwGCRootKindHandle: + pvRootID = pvHandle; + break; + + case kEtwGCRootKindFinalizer: + _ASSERTE(pvRootID == NULL); + break; + + case kEtwGCRootKindOther: + default: + _ASSERTE(nRootKind == kEtwGCRootKindOther); + _ASSERTE(pvRootID == NULL); + break; + } + + // Convert GC root flags to ETW root flags + if (dwGCFlags & GC_CALL_INTERIOR) + rootFlags |= kEtwGCRootFlagsInterior; + if (dwGCFlags & GC_CALL_PINNED) + rootFlags |= kEtwGCRootFlagsPinning; + + // Add root edge to appropriate buffer + if (fDependentHandle) + { + _ASSERTE(pContext->cGCBulkRootConditionalWeakTableElementEdges < + _countof(pContext->rgGCBulkRootConditionalWeakTableElementEdges)); + EventStructGCBulkRootConditionalWeakTableElementEdgeValue * pRCWTEEdgeValue = + &pContext->rgGCBulkRootConditionalWeakTableElementEdges[pContext->cGCBulkRootConditionalWeakTableElementEdges]; + pRCWTEEdgeValue->GCKeyNodeID = pRootedNode; + pRCWTEEdgeValue->GCValueNodeID = pSecondaryNodeForDependentHandle; + pRCWTEEdgeValue->GCRootID = pvRootID; + pContext->cGCBulkRootConditionalWeakTableElementEdges++; + + // If RCWTE edge buffer is now full, empty it into ETW + if (pContext->cGCBulkRootConditionalWeakTableElementEdges == + _countof(pContext->rgGCBulkRootConditionalWeakTableElementEdges)) + { + FireEtwGCBulkRootConditionalWeakTableElementEdge( + pContext->iCurBulkRootConditionalWeakTableElementEdge, + pContext->cGCBulkRootConditionalWeakTableElementEdges, + GetClrInstanceId(), + sizeof(pContext->rgGCBulkRootConditionalWeakTableElementEdges[0]), + &pContext->rgGCBulkRootConditionalWeakTableElementEdges[0]); + + pContext->iCurBulkRootConditionalWeakTableElementEdge++; + pContext->ClearRootConditionalWeakTableElementEdges(); + } + } + else + { + _ASSERTE(pContext->cGcBulkRootEdges < _countof(pContext->rgGcBulkRootEdges)); + EventStructGCBulkRootEdgeValue * pBulkRootEdgeValue = &pContext->rgGcBulkRootEdges[pContext->cGcBulkRootEdges]; + pBulkRootEdgeValue->RootedNodeAddress = pRootedNode; + pBulkRootEdgeValue->GCRootKind = nRootKind; + pBulkRootEdgeValue->GCRootFlag = rootFlags; + pBulkRootEdgeValue->GCRootID = pvRootID; + pContext->cGcBulkRootEdges++; + + // If root edge buffer is now full, empty it into ETW + if (pContext->cGcBulkRootEdges == _countof(pContext->rgGcBulkRootEdges)) + { + FireEtwGCBulkRootEdge( + pContext->iCurBulkRootEdge, + pContext->cGcBulkRootEdges, + GetClrInstanceId(), + sizeof(pContext->rgGcBulkRootEdges[0]), + &pContext->rgGcBulkRootEdges[0]); + + pContext->iCurBulkRootEdge++; + pContext->ClearRootEdges(); + } + } +#endif // WINXP_AND_WIN2K3_BUILD_SUPPORT +} + + +//--------------------------------------------------------------------------------------- +// +// Called during a heap walk for each object reference encountered. Batches up the +// corresponding node, edges, and type data for the ETW events. +// +// Arguments: +// * profilerWalkHeapContext - The shared profapi/etw context built up during the heap walk. +// * pObjReferenceSource - Object doing the pointing +// * typeID - Type of pObjReferenceSource +// * fDependentHandle - nonzero iff this is for a dependent handle +// * cRefs - Count of objects being pointed to +// * rgObjReferenceTargets - Array of objects being pointed to +// + +// static +void ETW::GCLog::ObjectReference( + ProfilerWalkHeapContext * profilerWalkHeapContext, + Object * pObjReferenceSource, + ULONGLONG typeID, + ULONGLONG cRefs, + Object ** rgObjReferenceTargets) +{ +#ifndef WINXP_AND_WIN2K3_BUILD_SUPPORT + CONTRACTL + { + NOTHROW; + GC_NOTRIGGER; + MODE_ANY; + + // LogTypeAndParametersIfNecessary can take a lock + CAN_TAKE_LOCK; + } + CONTRACTL_END; + + EtwGcHeapDumpContext * pContext = + EtwGcHeapDumpContext::GetOrCreateInGCContext(&profilerWalkHeapContext->pvEtwContext); + if (pContext == NULL) + return; + + //--------------------------------------------------------------------------------------- + // GCBulkNode events + //--------------------------------------------------------------------------------------- + + // Add Node (pObjReferenceSource) to buffer + _ASSERTE(pContext->cGcBulkNodeValues < _countof(pContext->rgGcBulkNodeValues)); + EventStructGCBulkNodeValue * pBulkNodeValue = &pContext->rgGcBulkNodeValues[pContext->cGcBulkNodeValues]; + pBulkNodeValue->Address = pObjReferenceSource; + pBulkNodeValue->Size = pObjReferenceSource->GetSize(); + pBulkNodeValue->TypeID = typeID; + pBulkNodeValue->EdgeCount = cRefs; + pContext->cGcBulkNodeValues++; + + // If Node buffer is now full, empty it into ETW + if (pContext->cGcBulkNodeValues == _countof(pContext->rgGcBulkNodeValues)) + { + FireEtwGCBulkNode( + pContext->iCurBulkNodeEvent, + pContext->cGcBulkNodeValues, + GetClrInstanceId(), + sizeof(pContext->rgGcBulkNodeValues[0]), + &pContext->rgGcBulkNodeValues[0]); + + pContext->iCurBulkNodeEvent++; + pContext->ClearNodes(); + } + + //--------------------------------------------------------------------------------------- + // BulkType events + //--------------------------------------------------------------------------------------- + + // We send type information as necessary--only for nodes, and only for nodes that we + // haven't already sent type info for + if (typeID != 0) + { + ETW::TypeSystemLog::LogTypeAndParametersIfNecessary( + &pContext->bulkTypeEventLogger, // Batch up this type with others to minimize events + typeID, + + // During heap walk, GC holds the lock for us, so we can directly enter the + // hash to see if the type has already been logged + ETW::TypeSystemLog::kTypeLogBehaviorAssumeLockAndLogIfFirstTime + ); + } + + //--------------------------------------------------------------------------------------- + // GCBulkEdge events + //--------------------------------------------------------------------------------------- + + // Add Edges (rgObjReferenceTargets) to buffer. Buffer could fill up before all edges + // are added (it could even fill up multiple times during this one call if there are + // a lot of edges), so empty Edge buffer into ETW as we go along, as many times as we + // need. + + for (ULONGLONG i=0; i < cRefs; i++) + { + _ASSERTE(pContext->cGcBulkEdgeValues < _countof(pContext->rgGcBulkEdgeValues)); + EventStructGCBulkEdgeValue * pBulkEdgeValue = &pContext->rgGcBulkEdgeValues[pContext->cGcBulkEdgeValues]; + pBulkEdgeValue->Value = rgObjReferenceTargets[i]; + // FUTURE: ReferencingFieldID + pBulkEdgeValue->ReferencingFieldID = 0; + pContext->cGcBulkEdgeValues++; + + // If Edge buffer is now full, empty it into ETW + if (pContext->cGcBulkEdgeValues == _countof(pContext->rgGcBulkEdgeValues)) + { + FireEtwGCBulkEdge( + pContext->iCurBulkEdgeEvent, + pContext->cGcBulkEdgeValues, + GetClrInstanceId(), + sizeof(pContext->rgGcBulkEdgeValues[0]), + &pContext->rgGcBulkEdgeValues[0]); + + pContext->iCurBulkEdgeEvent++; + pContext->ClearEdges(); + } + } +#endif // WINXP_AND_WIN2K3_BUILD_SUPPORT +} + +//--------------------------------------------------------------------------------------- +// +// Called by GC at end of heap dump to give us a convenient time to flush any remaining +// buffers of data to ETW +// +// Arguments: +// profilerWalkHeapContext - Context containing data we've batched up +// + +// static +void ETW::GCLog::EndHeapDump(ProfilerWalkHeapContext * profilerWalkHeapContext) +{ +#ifndef WINXP_AND_WIN2K3_BUILD_SUPPORT + LIMITED_METHOD_CONTRACT; + + // If context isn't already set up for us, then we haven't been collecting any data + // for ETW events. + EtwGcHeapDumpContext * pContext = (EtwGcHeapDumpContext *) profilerWalkHeapContext->pvEtwContext; + if (pContext == NULL) + return; + + // If the GC events are enabled, flush any remaining root, node, and / or edge data + if (ETW_TRACING_CATEGORY_ENABLED( + MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, + TRACE_LEVEL_INFORMATION, + CLR_GCHEAPDUMP_KEYWORD)) + { + if (pContext->cGcBulkRootEdges > 0) + { + FireEtwGCBulkRootEdge( + pContext->iCurBulkRootEdge, + pContext->cGcBulkRootEdges, + GetClrInstanceId(), + sizeof(pContext->rgGcBulkRootEdges[0]), + &pContext->rgGcBulkRootEdges[0]); + } + + if (pContext->cGCBulkRootConditionalWeakTableElementEdges > 0) + { + FireEtwGCBulkRootConditionalWeakTableElementEdge( + pContext->iCurBulkRootConditionalWeakTableElementEdge, + pContext->cGCBulkRootConditionalWeakTableElementEdges, + GetClrInstanceId(), + sizeof(pContext->rgGCBulkRootConditionalWeakTableElementEdges[0]), + &pContext->rgGCBulkRootConditionalWeakTableElementEdges[0]); + } + + if (pContext->cGcBulkNodeValues > 0) + { + FireEtwGCBulkNode( + pContext->iCurBulkNodeEvent, + pContext->cGcBulkNodeValues, + GetClrInstanceId(), + sizeof(pContext->rgGcBulkNodeValues[0]), + &pContext->rgGcBulkNodeValues[0]); + } + + if (pContext->cGcBulkEdgeValues > 0) + { + FireEtwGCBulkEdge( + pContext->iCurBulkEdgeEvent, + pContext->cGcBulkEdgeValues, + GetClrInstanceId(), + sizeof(pContext->rgGcBulkEdgeValues[0]), + &pContext->rgGcBulkEdgeValues[0]); + } + } + + // Ditto for type events + if (ETW_TRACING_CATEGORY_ENABLED( + MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, + TRACE_LEVEL_INFORMATION, + CLR_TYPE_KEYWORD)) + { + pContext->bulkTypeEventLogger.FireBulkTypeEvent(); + pContext->bulkTypeEventLogger.Cleanup(); + } + + // Delete any GC state built up in the context + profilerWalkHeapContext->pvEtwContext = NULL; + delete pContext; +#endif // WINXP_AND_WIN2K3_BUILD_SUPPORT +} + + +#ifndef FEATURE_REDHAWK + +//--------------------------------------------------------------------------------------- +// +// Helper to send public finalize object & type events, and private finalize object +// event. If Type events are enabled, this will send the Type event for the finalized +// objects. It will not be batched with other types (except type parameters, if any), +// and will not check if the Type has already been logged (may thus result in dupe +// logging of the Type). +// +// Arguments: +// pMT - MT of object getting finalized +// pObj - object getting finalized +// + +// static +void ETW::GCLog::SendFinalizeObjectEvent(MethodTable * pMT, Object * pObj) +{ + CONTRACTL + { + NOTHROW; + GC_NOTRIGGER; + MODE_ANY; + + // LogTypeAndParameters locks, and we take our own lock if typeLogBehavior says to + CAN_TAKE_LOCK; + } + CONTRACTL_END; + + // Send public finalize object event, if it's enabled + if (ETW_EVENT_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, FinalizeObject)) + { + FireEtwFinalizeObject(pMT, pObj, GetClrInstanceId()); + + // This function checks if type events are enabled; if so, it sends event for + // finalized object's type (and parameter types, if any) + ETW::TypeSystemLog::LogTypeAndParametersIfNecessary( + NULL, // Not batching this type with others + (TADDR) pMT, + + // Don't spend the time entering the lock and checking the hash table to see + // if we've already logged the type; just log it (if type events are enabled). + ETW::TypeSystemLog::kTypeLogBehaviorAlwaysLog + ); + } + + // Send private finalize object event, if it's enabled + if (ETW_EVENT_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_PRIVATE_PROVIDER_Context, PrvFinalizeObject)) + { + EX_TRY + { + DefineFullyQualifiedNameForClassWOnStack(); + FireEtwPrvFinalizeObject(pMT, pObj, GetClrInstanceId(), GetFullyQualifiedNameForClassNestedAwareW(pMT)); + } + EX_CATCH + { + } + EX_END_CATCH(RethrowCorruptingExceptions); + } +} + +DWORD ETW::ThreadLog::GetEtwThreadFlags(Thread * pThread) +{ + LIMITED_METHOD_CONTRACT; + + DWORD dwEtwThreadFlags = 0; + + if (pThread->IsThreadPoolThread()) + { + dwEtwThreadFlags |= kEtwThreadFlagThreadPoolWorker; + } + if (pThread->IsGCSpecial()) + { + dwEtwThreadFlags |= kEtwThreadFlagGCSpecial; + } + if (IsGarbageCollectorFullyInitialized() && + (pThread == GCHeapUtilities::GetGCHeap()->GetFinalizerThread())) + { + dwEtwThreadFlags |= kEtwThreadFlagFinalizer; + } + + return dwEtwThreadFlags; +} + +void ETW::ThreadLog::FireThreadCreated(Thread * pThread) +{ + LIMITED_METHOD_CONTRACT; + + FireEtwThreadCreated( + (ULONGLONG)pThread, + (ULONGLONG)pThread->GetDomain(), + GetEtwThreadFlags(pThread), + pThread->GetThreadId(), + pThread->GetOSThreadId(), + GetClrInstanceId()); +} + +void ETW::ThreadLog::FireThreadDC(Thread * pThread) +{ + LIMITED_METHOD_CONTRACT; + + FireEtwThreadDC( + (ULONGLONG)pThread, + (ULONGLONG)pThread->GetDomain(), + GetEtwThreadFlags(pThread), + pThread->GetThreadId(), + pThread->GetOSThreadId(), + GetClrInstanceId()); +} + + + +// TypeSystemLog implementation +// +// We keep track of which TypeHandles have been logged, and stats on instances of these +// TypeHandles that have been allocated, by a hash table of hash tables. The outer hash +// table maps Module*'s to an inner hash table that contains all the TypeLoggingInfos for that +// Module*. Arranging things this way makes it easy to deal with Module unloads, as we +// can simply remove the corresponding inner hash table from the outer hash table. + +// The following help define the "inner" hash table: a hash table of TypeLoggingInfos +// from a particular Module (key = TypeHandle, value = TypeLoggingInfo. + +class LoggedTypesFromModuleTraits : public NoRemoveSHashTraits< DefaultSHashTraits > +{ +public: + + // explicitly declare local typedefs for these traits types, otherwise + // the compiler may get confused + typedef NoRemoveSHashTraits< DefaultSHashTraits > PARENT; + typedef PARENT::element_t element_t; + typedef PARENT::count_t count_t; + + typedef TypeHandle key_t; + + static key_t GetKey(const element_t &e) + { + LIMITED_METHOD_CONTRACT; + return e.th; + } + + static BOOL Equals(key_t k1, key_t k2) + { + LIMITED_METHOD_CONTRACT; + return (k1 == k2); + } + + static count_t Hash(key_t k) + { + LIMITED_METHOD_CONTRACT; + return (count_t) k.AsTAddr(); + } + + static bool IsNull(const element_t &e) + { + LIMITED_METHOD_CONTRACT; + return (e.th.AsTAddr() == NULL); + } + + static const element_t Null() + { + LIMITED_METHOD_CONTRACT; + return ETW::TypeLoggingInfo(NULL); + } +}; +typedef SHash LoggedTypesFromModuleHash; + +// The inner hash table is housed inside this class, which acts as an entry in the outer +// hash table. +class ETW::LoggedTypesFromModule +{ +public: + Module * pModule; + LoggedTypesFromModuleHash loggedTypesFromModuleHash; + + // These are used by the outer hash table (mapping Module*'s to instances of + // LoggedTypesFromModule). + static COUNT_T Hash(Module * pModule) + { + LIMITED_METHOD_CONTRACT; + return (COUNT_T) (SIZE_T) pModule; + } + Module * GetKey() + { + LIMITED_METHOD_CONTRACT; + return pModule; + } + + LoggedTypesFromModule(Module * pModuleParam) : loggedTypesFromModuleHash() + { + LIMITED_METHOD_CONTRACT; + pModule = pModuleParam; + } + + ~LoggedTypesFromModule() + { + LIMITED_METHOD_CONTRACT; + } +}; + +// The following define the outer hash table (mapping Module*'s to instances of +// LoggedTypesFromModule). + +class AllLoggedTypesTraits : public DefaultSHashTraits +{ +public: + + // explicitly declare local typedefs for these traits types, otherwise + // the compiler may get confused + typedef DefaultSHashTraits PARENT; + typedef PARENT::element_t element_t; + typedef PARENT::count_t count_t; + + typedef Module * key_t; + + static key_t GetKey(const element_t &e) + { + LIMITED_METHOD_CONTRACT; + return e->pModule; + } + + static BOOL Equals(key_t k1, key_t k2) + { + LIMITED_METHOD_CONTRACT; + return (k1 == k2); + } + + static count_t Hash(key_t k) + { + LIMITED_METHOD_CONTRACT; + return (count_t) (size_t) k; + } + + static bool IsNull(const element_t &e) + { + LIMITED_METHOD_CONTRACT; + return (e == NULL); + } + + static const element_t Null() + { + LIMITED_METHOD_CONTRACT; + return NULL; + } +}; + +typedef SHash AllLoggedTypesHash; + +// The outer hash table (mapping Module*'s to instances of LoggedTypesFromModule) is +// housed in this struct, which is dynamically allocated the first time we decide we need +// it. +struct AllLoggedTypes +{ +public: + // This Crst protects the entire outer & inner hash tables. On a GC heap walk, it + // is entered once for the duration of the walk, so that we can freely access the + // hash tables during the walk. On each object allocation, this Crst must be + // entered individually each time. + static CrstStatic s_cs; + + // The outer hash table (mapping Module*'s to instances of LoggedTypesFromModule) + AllLoggedTypesHash allLoggedTypesHash; +}; + + +CrstStatic AllLoggedTypes::s_cs; +AllLoggedTypes * ETW::TypeSystemLog::s_pAllLoggedTypes = NULL; +BOOL ETW::TypeSystemLog::s_fHeapAllocEventEnabledOnStartup = FALSE; +BOOL ETW::TypeSystemLog::s_fHeapAllocHighEventEnabledNow = FALSE; +BOOL ETW::TypeSystemLog::s_fHeapAllocLowEventEnabledNow = FALSE; +int ETW::TypeSystemLog::s_nCustomMsBetweenEvents = 0; + + +//--------------------------------------------------------------------------------------- +// +// Initializes TypeSystemLog (specifically its crst). Called just before ETW providers +// are registered with the OS +// +// Return Value: +// HRESULT indicating success or failure +// + +// static +HRESULT ETW::TypeSystemLog::PreRegistrationInit() +{ + LIMITED_METHOD_CONTRACT; + + if (!AllLoggedTypes::s_cs.InitNoThrow( + CrstEtwTypeLogHash, + CRST_UNSAFE_ANYMODE)) // This lock is taken during a GC while walking the heap + { + return E_FAIL; + } + + return S_OK; +} + +//--------------------------------------------------------------------------------------- +// +// Initializes TypeSystemLog (specifically its crst). Called just after ETW providers +// are registered with the OS +// +// Return Value: +// HRESULT indicating success or failure +// + +// static +void ETW::TypeSystemLog::PostRegistrationInit() +{ + LIMITED_METHOD_CONTRACT; + + // Initialize our "current state" BOOLs that remember if low or high allocation + // sampling is turned on + BOOL s_fHeapAllocLowEventEnabledNow = ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, TRACE_LEVEL_INFORMATION, CLR_GCHEAPALLOCLOW_KEYWORD); + BOOL s_fHeapAllocHighEventEnabledNow = ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, TRACE_LEVEL_INFORMATION, CLR_GCHEAPALLOCHIGH_KEYWORD); + + // Snapshot the current state of the object allocated keyword (on startup), and rely + // on this snapshot for the rest of the process run. Since these events require the + // slow alloc JIT helper to be enabled, and that can only be done on startup, we + // remember in this BOOL that we did so, so that we can prevent the object allocated + // event from being fired if the fast allocation helper were enabled but had to + // degrade down to the slow helper (e.g., thread ran over its allocation limit). This + // keeps things consistent. + s_fHeapAllocEventEnabledOnStartup = (s_fHeapAllocLowEventEnabledNow || s_fHeapAllocHighEventEnabledNow); + + if (s_fHeapAllocEventEnabledOnStartup) + { + // Determine if a COMPLUS env var is overriding the frequency for the sampled + // object allocated events + + // Config value intentionally typed as string, b/c DWORD intepretation is hard-coded + // to hex, which is not what the user would expect. This way I can force the + // conversion to use decimal. + NewArrayHolder wszCustomObjectAllocationEventsPerTypePerSec(NULL); + if (FAILED(CLRConfig::GetConfigValue( + CLRConfig::UNSUPPORTED_ETW_ObjectAllocationEventsPerTypePerSec, + &wszCustomObjectAllocationEventsPerTypePerSec)) || + (wszCustomObjectAllocationEventsPerTypePerSec == NULL)) + { + return; + } + LPWSTR endPtr; + DWORD dwCustomObjectAllocationEventsPerTypePerSec = wcstoul( + wszCustomObjectAllocationEventsPerTypePerSec, + &endPtr, + 10 // Base 10 conversion + ); + + if (dwCustomObjectAllocationEventsPerTypePerSec == ULONG_MAX) + dwCustomObjectAllocationEventsPerTypePerSec = 0; + if (dwCustomObjectAllocationEventsPerTypePerSec != 0) + { + // MsBetweenEvents = (1000 ms/sec) / (custom desired events/sec) + s_nCustomMsBetweenEvents = 1000 / dwCustomObjectAllocationEventsPerTypePerSec; + } + } +} + + +//--------------------------------------------------------------------------------------- +// +// Update object allocation sampling frequency and / or Type hash table contents based +// on what keywords were changed. +// + +// static +void ETW::TypeSystemLog::OnKeywordsChanged() +{ + LIMITED_METHOD_CONTRACT; + + // If the desired frequencey for the GCSampledObjectAllocation events has changed, + // update our state. + s_fHeapAllocLowEventEnabledNow = ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, TRACE_LEVEL_INFORMATION, CLR_GCHEAPALLOCLOW_KEYWORD); + s_fHeapAllocHighEventEnabledNow = ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, TRACE_LEVEL_INFORMATION, CLR_GCHEAPALLOCHIGH_KEYWORD); + + // FUTURE: Would be nice here to log an error event if (s_fHeapAllocLowEventEnabledNow || + // s_fHeapAllocHighEventEnabledNow), but !s_fHeapAllocEventEnabledOnStartup + + // If the type events should be turned off, eliminate the hash tables that tracked + // which types were logged. (If type events are turned back on later, we'll re-log + // them all as we encounter them.) Note that all we can really test for is that the + // Types keyword on the runtime provider is off. Not necessarily that it was on and + // was just turned off with this request. But either way, TypeSystemLog can handle it + // because it is extremely smart. + if (!ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, TRACE_LEVEL_INFORMATION, CLR_TYPE_KEYWORD)) + OnTypesKeywordTurnedOff(); +} + + +//--------------------------------------------------------------------------------------- +// +// Based on keywords alone, determine the what the default sampling rate should be for +// object allocation events. (This function does not consider any COMPLUS overrides for +// the sampling rate.) +// + +// static +int ETW::TypeSystemLog::GetDefaultMsBetweenEvents() +{ + LIMITED_METHOD_CONTRACT; + + // We should only get here if the allocation event is enabled. In spirit, this assert + // is correct, but a race could cause the assert to fire (if someone toggled the + // event off after we decided that the event was on and we started down the path of + // calculating statistics to fire the event). In such a case we'll end up returning + // k_nDefaultMsBetweenEventsLow below, but next time we won't get here as we'll know + // early enough not to fire the event. + //_ASSERTE(IsHeapAllocEventEnabled()); + + // MsBetweenEvents = (1000 ms/sec) / (desired events/sec) + const int k_nDefaultMsBetweenEventsHigh = 1000 / 100; // 100 events per type per sec + const int k_nDefaultMsBetweenEventsLow = 1000 / 5; // 5 events per type per sec + + // If both are set, High takes precedence + if (s_fHeapAllocHighEventEnabledNow) + { + return k_nDefaultMsBetweenEventsHigh; + } + return k_nDefaultMsBetweenEventsLow; +} + +//--------------------------------------------------------------------------------------- +// +// Use this to decide whether to fire the object allocation event +// +// Return Value: +// nonzero iff we should fire the event. +// + +// static +BOOL ETW::TypeSystemLog::IsHeapAllocEventEnabled() +{ + LIMITED_METHOD_CONTRACT; + + return + // Only fire the event if it was enabled at startup (and thus the slow-JIT new + // helper is used in all cases) + s_fHeapAllocEventEnabledOnStartup && + + // AND a keyword is still enabled. (Thus people can turn off the event + // whenever they want; but they cannot turn it on unless it was also on at startup.) + (s_fHeapAllocHighEventEnabledNow || s_fHeapAllocLowEventEnabledNow); +} + +//--------------------------------------------------------------------------------------- +// +// Helper that adds (or updates) the TypeLoggingInfo inside the inner hash table passed +// in. +// +// Arguments: +// * pLoggedTypesFromModule - Inner hash table to update +// * pTypeLoggingInfo - TypeLoggingInfo to store +// +// Return Value: +// nonzero iff the add/replace was successful. +// +// Assumptions: +// Caller must be holding the hash crst +// + +// static +BOOL ETW::TypeSystemLog::AddOrReplaceTypeLoggingInfo(ETW::LoggedTypesFromModule * pLoggedTypesFromModule, const ETW::TypeLoggingInfo * pTypeLoggingInfo) +{ + LIMITED_METHOD_CONTRACT; + + _ASSERTE(GetHashCrst()->OwnedByCurrentThread()); + _ASSERTE(pLoggedTypesFromModule != NULL); + + BOOL fSucceeded = FALSE; + EX_TRY + { + pLoggedTypesFromModule->loggedTypesFromModuleHash.AddOrReplace(*pTypeLoggingInfo); + fSucceeded = TRUE; + } + EX_CATCH + { + fSucceeded = FALSE; + } + EX_END_CATCH(RethrowCorruptingExceptions); + + return fSucceeded; +} + +//--------------------------------------------------------------------------------------- +// +// Records stats about the object's allocation, and determines based on those stats whether +// to fires the high / low frequency GCSampledObjectAllocation ETW event +// +// Arguments: +// * pObject - Allocated object to log +// * th - TypeHandle for the object +// + +// static +void ETW::TypeSystemLog::SendObjectAllocatedEvent(Object * pObject) +{ + CONTRACTL + { + NOTHROW; + GC_NOTRIGGER; + MODE_COOPERATIVE; + } + CONTRACTL_END; + + // No-op if the appropriate keywords were not enabled on startup (or we're not yet + // started up) + if (!s_fHeapAllocEventEnabledOnStartup || !g_fEEStarted) + return; + + TypeHandle th = pObject->GetTypeHandle(); + + SIZE_T size = pObject->GetSize(); + if(size < MIN_OBJECT_SIZE) + { + size = PtrAlign(size); + } + + SIZE_T nTotalSizeForTypeSample = size; + DWORD dwTickNow = GetTickCount(); + DWORD dwObjectCountForTypeSample = 0; + + // BLOCK: Hold the crst around the type stats hash table while we read and update + // the type's stats + { + CrstHolder _crst(GetHashCrst()); + + // Get stats for type + TypeLoggingInfo typeLoggingInfo(NULL); + LoggedTypesFromModule * pLoggedTypesFromModule = NULL; + BOOL fCreatedNew = FALSE; + typeLoggingInfo = LookupOrCreateTypeLoggingInfo(th, &fCreatedNew, &pLoggedTypesFromModule); + if (typeLoggingInfo.th.IsNull()) + return; + + // Update stats with current allocation + typeLoggingInfo.dwAllocsSkippedForSample++; + typeLoggingInfo.cbIgnoredSizeForSample += size; + + // This is our filter. If we should ignore this alloc, then record our updated + // our stats, and bail without sending the event. Note that we always log objects + // over 10K in size. + if (size < 10000 && typeLoggingInfo.dwAllocsSkippedForSample < typeLoggingInfo.dwAllocsToSkipPerSample) + { + // Update hash table's copy of type logging info with these values. Sucks that + // we're doing another hash table lookup here. Could instead have used LookupPtr() + // if it gave us back a non-const pointer, and then we could have updated in-place + AddOrReplaceTypeLoggingInfo(pLoggedTypesFromModule, &typeLoggingInfo); + if (fCreatedNew) + { + // Although we're skipping logging the allocation, we still need to log + // the type (so it's available for resolving future allocation events to + // their types). + // + // (See other call to LogTypeAndParametersIfNecessary further down for + // more comments.) + LogTypeAndParametersIfNecessary( + NULL, + th.AsTAddr(), + kTypeLogBehaviorAssumeLockAndAlwaysLogTopLevelType); + } + return; + } + + // Based on observed allocation stats, adjust our sampling rate for this type + + typeLoggingInfo.dwAllocCountInCurrentBucket += typeLoggingInfo.dwAllocsSkippedForSample; + int delta = (dwTickNow - typeLoggingInfo.dwTickOfCurrentTimeBucket) & 0x7FFFFFFF; // make wrap around work. + + int nMinAllocPerMSec = typeLoggingInfo.dwAllocCountInCurrentBucket / 16; // This is an underestimation of the true rate. + if (delta >= 16 || (nMinAllocPerMSec > 2 && nMinAllocPerMSec > typeLoggingInfo.flAllocPerMSec * 1.5F)) + { + float flNewAllocPerMSec = 0; + if (delta >= 16) + { + // This is the normal case, our allocation rate is under control with the current throttling. + flNewAllocPerMSec = ((float) typeLoggingInfo.dwAllocCountInCurrentBucket) / delta; + // Do a exponential decay window that is 5 * max(16, AllocationInterval) + typeLoggingInfo.flAllocPerMSec = 0.8F * typeLoggingInfo.flAllocPerMSec + 0.2F * flNewAllocPerMSec; + typeLoggingInfo.dwTickOfCurrentTimeBucket = dwTickNow; + typeLoggingInfo.dwAllocCountInCurrentBucket = 0; + } + else + { + flNewAllocPerMSec = (float) nMinAllocPerMSec; + // This means the second clause above is true, which means our sampling rate is too low + // so we need to throttle quickly. + typeLoggingInfo.flAllocPerMSec = flNewAllocPerMSec; + } + + + // Obey the desired sampling rate, but don't ignore > 1000 allocations per second + // per type + int nDesiredMsBetweenEvents = (s_nCustomMsBetweenEvents == 0) ? GetDefaultMsBetweenEvents() : s_nCustomMsBetweenEvents; + typeLoggingInfo.dwAllocsToSkipPerSample = min((int) (typeLoggingInfo.flAllocPerMSec * nDesiredMsBetweenEvents), 1000); + if (typeLoggingInfo.dwAllocsToSkipPerSample == 1) + typeLoggingInfo.dwAllocsToSkipPerSample = 0; + } + + // We're logging this sample, so save the values we need into locals, and reset + // our counts for the next sample. + nTotalSizeForTypeSample = typeLoggingInfo.cbIgnoredSizeForSample; + dwObjectCountForTypeSample = typeLoggingInfo.dwAllocsSkippedForSample; + typeLoggingInfo.cbIgnoredSizeForSample = 0; + typeLoggingInfo.dwAllocsSkippedForSample = 0; + + // Save updated stats into hash table + if (!AddOrReplaceTypeLoggingInfo(pLoggedTypesFromModule, &typeLoggingInfo)) + { + return; + } + + // While we're still holding the crst, optionally log any relevant Types now (we may need + // to reconsult the hash in here if there are any type parameters, though we can + // optimize and NOT consult the hash for th itself). + if (fCreatedNew) + { + // We were the ones to add the Type to the hash. So it wasn't there before, + // which means it hasn't been logged yet. + LogTypeAndParametersIfNecessary( + + // No BulkTypeEventLogger, as we're not batching during a GC heap walk + NULL, + + th.AsTAddr(), + + // We've determined the type is not yet logged, so no need to check + kTypeLogBehaviorAssumeLockAndAlwaysLogTopLevelType); + } + } // RELEASE: CrstHolder _crst(GetHashCrst()); + + // Now log the allocation + if (s_fHeapAllocHighEventEnabledNow) + { + FireEtwGCSampledObjectAllocationHigh(pObject, (LPVOID) th.AsTAddr(), dwObjectCountForTypeSample, nTotalSizeForTypeSample, GetClrInstanceId()); + } + else + { + FireEtwGCSampledObjectAllocationLow(pObject, (LPVOID) th.AsTAddr(), dwObjectCountForTypeSample, nTotalSizeForTypeSample, GetClrInstanceId()); + } +} + +//--------------------------------------------------------------------------------------- +// +// Accessor for hash table crst +// +// Return Value: +// hash table crst +// + +// static +CrstBase * ETW::TypeSystemLog::GetHashCrst() +{ + LIMITED_METHOD_CONTRACT; + return &AllLoggedTypes::s_cs; +} + +//--------------------------------------------------------------------------------------- +// +// Outermost level of ETW-type-logging. Clients outside eventtrace.cpp call this to log +// a TypeHandle and (recursively) its type parameters when present. This guy then calls +// into the appropriate BulkTypeEventLogger to do the batching and logging +// +// Arguments: +// * pBulkTypeEventLogger - If our caller is keeping track of batched types, it +// passes this to us so we can use it to batch the current type (GC heap walk +// does this). If this is NULL, no batching is going on (e.g., we're called on +// object allocation, not a GC heal walk), in which case we create our own +// temporary BulkTypeEventLogger. +// * thAsAddr - TypeHandle to batch +// * typeLogBehavior - Optimization to tell us we don't need to enter the +// TypeSystemLog's crst, as the TypeSystemLog's hash table is already protected +// by a prior acquisition of the crst by our caller. (Or that we don't even +// need to check the hash in the first place.) +// + +// static +void ETW::TypeSystemLog::LogTypeAndParametersIfNecessary(BulkTypeEventLogger * pLogger, ULONGLONG thAsAddr, TypeLogBehavior typeLogBehavior) +{ + CONTRACTL + { + NOTHROW; + GC_NOTRIGGER; + MODE_ANY; + + // LogTypeAndParameters locks, and we take our own lock if typeLogBehavior says to + CAN_TAKE_LOCK; + } + CONTRACTL_END; + + if (!ETW_TRACING_CATEGORY_ENABLED( + MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, + TRACE_LEVEL_INFORMATION, + CLR_TYPE_KEYWORD)) + { + return; + } + + TypeHandle th = TypeHandle::FromTAddr((TADDR) thAsAddr); + if (!th.IsRestored()) + { + return; + } + + // Check to see if we've already logged this type. If so, bail immediately. + // Otherwise, mark that it's getting logged (by adding it to the hash), and fall + // through to the logging code below. If caller doesn't care, then don't even + // check; just log the type + BOOL fShouldLogType = ((typeLogBehavior == kTypeLogBehaviorAlwaysLog) || + (typeLogBehavior == kTypeLogBehaviorAssumeLockAndAlwaysLogTopLevelType)) ? + TRUE : + ((typeLogBehavior == kTypeLogBehaviorTakeLockAndLogIfFirstTime) ? + ShouldLogType(th) : + ShouldLogTypeNoLock(th)); + if (!fShouldLogType) + return; + + if (pLogger == NULL) + { + // We're not batching this type against previous types (e.g., we're being called + // on object allocate instead of a GC heap walk). So create a temporary logger + // on the stack. If there are generic parameters that need to be logged, then + // at least they'll get batched together with the type + BulkTypeEventLogger logger; + logger.LogTypeAndParameters(thAsAddr, typeLogBehavior); + + // Since this logger isn't being used to batch anything else, flush what we have + logger.FireBulkTypeEvent(); + } + else + { + // We are batching this type with others (e.g., we're being called at the end of + // a GC on a heap walk). So use the logger our caller set up for us. + pLogger->LogTypeAndParameters(thAsAddr, typeLogBehavior); + } +} + + +//--------------------------------------------------------------------------------------- +// +// Same as code:ETW::TypeSystemLog::ShouldLogTypeNoLock but acquires the lock first. + +// static +BOOL ETW::TypeSystemLog::ShouldLogType(TypeHandle th) +{ + CONTRACTL + { + NOTHROW; + GC_NOTRIGGER; + MODE_ANY; + CAN_TAKE_LOCK; + } + CONTRACTL_END; + + CrstHolder _crst(GetHashCrst()); + return ShouldLogTypeNoLock(th); +} + + +//--------------------------------------------------------------------------------------- +// +// Ask hash table if we've already logged the type, without first acquiring the lock +// (our caller already did this). As a side-effect, a TypeLoggingInfo will be created +// for this type (so future calls to this function will return FALSE to avoid dupe type +// logging). +// +// Arguments: +// pth - TypeHandle to query +// +// Return Value: +// nonzero iff type should be logged (i.e., not previously logged) +// +// Assumptions: +// Caller must own the hash table's crst +// + +// static +BOOL ETW::TypeSystemLog::ShouldLogTypeNoLock(TypeHandle th) +{ + LIMITED_METHOD_CONTRACT; + + _ASSERTE(GetHashCrst()->OwnedByCurrentThread()); + + // Check to see if TypeLoggingInfo exists yet for *pth. If not, creates one and + // adds it to the hash. + BOOL fCreatedNew = FALSE; + LookupOrCreateTypeLoggingInfo(th, &fCreatedNew); + + // Return whether we had to create the TypeLoggingInfo (indicating it was not yet in + // the hash, and thus that we hadn't yet logged the type). + return fCreatedNew; +} + + +//--------------------------------------------------------------------------------------- +// +// Helper that returns (creating if necessary) the TypeLoggingInfo in the hash table +// corresponding with the specified TypeHandle +// +// Arguments: +// * th - Key to lookup the TypeLoggingInfo +// * pfCreatedNew - [out] Points to nonzero iff a new TypeLoggingInfo was created +// (i.e., none existed yet in the hash for th). +// * ppLoggedTypesFromModule - [out] Points to the inner hash that was used to do +// the lookup. (An otpimization so the caller doesn't have to find this again, +// if it needs to do further operations on it.) +// +// Return Value: +// TypeLoggingInfo found or created. +// +// Assumptions: +// Hash crst must be held by caller +// + +// static +ETW::TypeLoggingInfo ETW::TypeSystemLog::LookupOrCreateTypeLoggingInfo(TypeHandle th, BOOL * pfCreatedNew, LoggedTypesFromModule ** ppLoggedTypesFromModule /* = NULL */) +{ + LIMITED_METHOD_CONTRACT; + + _ASSERTE(pfCreatedNew != NULL); + _ASSERTE(GetHashCrst()->OwnedByCurrentThread()); + + if (ppLoggedTypesFromModule != NULL) + { + *ppLoggedTypesFromModule = NULL; + } + + BOOL fSucceeded = FALSE; + + if (s_pAllLoggedTypes == NULL) + { + s_pAllLoggedTypes = new (nothrow) AllLoggedTypes; + if (s_pAllLoggedTypes == NULL) + { + // out of memory. Bail on ETW stuff + *pfCreatedNew = FALSE; + return TypeLoggingInfo(NULL); + } + } + + // Step 1: go from LoaderModule to hash of types. + + Module * pLoaderModule = th.GetLoaderModule(); + _ASSERTE(pLoaderModule != NULL); + LoggedTypesFromModule * pLoggedTypesFromModule = s_pAllLoggedTypes->allLoggedTypesHash.Lookup(pLoaderModule); + if (pLoggedTypesFromModule == NULL) + { + pLoggedTypesFromModule = new (nothrow) LoggedTypesFromModule(pLoaderModule); + if (pLoggedTypesFromModule == NULL) + { + // out of memory. Bail on ETW stuff + *pfCreatedNew = FALSE; + return TypeLoggingInfo(NULL); + } + + fSucceeded = FALSE; + EX_TRY + { + s_pAllLoggedTypes->allLoggedTypesHash.Add(pLoggedTypesFromModule); + fSucceeded = TRUE; + } + EX_CATCH + { + fSucceeded = FALSE; + } + EX_END_CATCH(RethrowCorruptingExceptions); + if (!fSucceeded) + { + *pfCreatedNew = FALSE; + return TypeLoggingInfo(NULL); + } + } + + if (ppLoggedTypesFromModule != NULL) + { + *ppLoggedTypesFromModule = pLoggedTypesFromModule; + } + + // Step 2: From hash of types, see if our TypeHandle is there already + TypeLoggingInfo typeLoggingInfoPreexisting = pLoggedTypesFromModule->loggedTypesFromModuleHash.Lookup(th); + if (!typeLoggingInfoPreexisting.th.IsNull()) + { + // Type is already hashed, so it's already logged, so we don't need to + // log it again. + *pfCreatedNew = FALSE; + return typeLoggingInfoPreexisting; + } + + // We haven't logged this type, so we need to continue with this function to + // log it below. Add it to the hash table first so any recursive calls will + // see that this type is already being taken care of + fSucceeded = FALSE; + TypeLoggingInfo typeLoggingInfoNew(th); + EX_TRY + { + pLoggedTypesFromModule->loggedTypesFromModuleHash.Add(typeLoggingInfoNew); + fSucceeded = TRUE; + } + EX_CATCH + { + fSucceeded = FALSE; + } + EX_END_CATCH(RethrowCorruptingExceptions); + if (!fSucceeded) + { + *pfCreatedNew = FALSE; + return TypeLoggingInfo(NULL); + } + + *pfCreatedNew = TRUE; + return typeLoggingInfoNew; +} + + +//--------------------------------------------------------------------------------------- +// +// Called when we determine if a module was unloaded, so we can clear out that module's +// set of types from our hash table +// +// Arguments: +// pModule - Module getting unloaded +// + +// static +void ETW::TypeSystemLog::OnModuleUnload(Module * pModule) +{ + CONTRACTL + { + NOTHROW; + GC_NOTRIGGER; + MODE_ANY; + CAN_TAKE_LOCK; + } + CONTRACTL_END; + + if (!ETW_TRACING_CATEGORY_ENABLED( + MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, + TRACE_LEVEL_INFORMATION, + CLR_TYPE_KEYWORD)) + { + return; + } + + LoggedTypesFromModule * pLoggedTypesFromModule = NULL; + + { + CrstHolder _crst(GetHashCrst()); + + if (s_pAllLoggedTypes == NULL) + return; + + // Is there a TypesHash for this module? + pLoggedTypesFromModule = s_pAllLoggedTypes->allLoggedTypesHash.Lookup(pModule); + if (pLoggedTypesFromModule == NULL) + return; + + // Remove TypesHash from master hash mapping modules to their TypesHash + s_pAllLoggedTypes->allLoggedTypesHash.Remove(pModule); + } + + // Destruct this TypesHash we just removed + delete pLoggedTypesFromModule; + pLoggedTypesFromModule = NULL; +} + +//--------------------------------------------------------------------------------------- +// +// Whenever we detect that the Types keyword is off, this gets called. This eliminates the +// hash tables that tracked which types were logged (if the hash tables had been created +// previously). If type events are turned back on later, we'll re-log them all as we +// encounter them. +// + +// static +void ETW::TypeSystemLog::OnTypesKeywordTurnedOff() +{ + CONTRACTL + { + NOTHROW; + GC_NOTRIGGER; + MODE_ANY; + CAN_TAKE_LOCK; + } + CONTRACTL_END; + + CrstHolder _crst(GetHashCrst()); + + if (s_pAllLoggedTypes == NULL) + return; + + // Destruct each of the per-module TypesHashes + AllLoggedTypesHash * pLoggedTypesHash = &s_pAllLoggedTypes->allLoggedTypesHash; + for (AllLoggedTypesHash::Iterator iter = pLoggedTypesHash->Begin(); + iter != pLoggedTypesHash->End(); + ++iter) + { + LoggedTypesFromModule * pLoggedTypesFromModule = *iter; + delete pLoggedTypesFromModule; + } + + // This causes the default ~AllLoggedTypes() to be called, and thus + // ~AllLoggedTypesHash() to be called + delete s_pAllLoggedTypes; + s_pAllLoggedTypes = NULL; +} + + +/****************************************************************************/ +/* Called when ETW is turned ON on an existing process and ModuleRange events are to + be fired */ +/****************************************************************************/ +void ETW::EnumerationLog::ModuleRangeRundown() +{ + CONTRACTL { + NOTHROW; + GC_TRIGGERS; + } CONTRACTL_END; + + EX_TRY + { + if (ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_PRIVATE_PROVIDER_Context, + TRACE_LEVEL_INFORMATION, + CLR_PERFTRACK_PRIVATE_KEYWORD)) + { + ETW::EnumerationLog::EnumerationHelper(NULL, NULL, ETW::EnumerationLog::EnumerationStructs::ModuleRangeLoadPrivate); + } + } EX_CATCH { } EX_END_CATCH(SwallowAllExceptions); +} + +/****************************************************************************/ +/* Called when ETW is turned ON on an existing process */ +/****************************************************************************/ +void ETW::EnumerationLog::StartRundown() +{ + CONTRACTL { + NOTHROW; + GC_TRIGGERS; + } CONTRACTL_END; + + EX_TRY + { + BOOL bIsArmRundownEnabled = ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_RUNDOWN_PROVIDER_Context, + TRACE_LEVEL_INFORMATION, + CLR_RUNDOWNAPPDOMAINRESOURCEMANAGEMENT_KEYWORD); + BOOL bIsPerfTrackRundownEnabled = ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_RUNDOWN_PROVIDER_Context, + TRACE_LEVEL_INFORMATION, + CLR_RUNDOWNPERFTRACK_KEYWORD); + BOOL bIsThreadingRundownEnabled = ETW_TRACING_CATEGORY_ENABLED( + MICROSOFT_WINDOWS_DOTNETRUNTIME_RUNDOWN_PROVIDER_Context, + TRACE_LEVEL_INFORMATION, + CLR_RUNDOWNTHREADING_KEYWORD); + + if(ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_RUNDOWN_PROVIDER_Context, + TRACE_LEVEL_INFORMATION, + CLR_RUNDOWNJIT_KEYWORD) + || + ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_RUNDOWN_PROVIDER_Context, + TRACE_LEVEL_INFORMATION, + CLR_RUNDOWNLOADER_KEYWORD) + || + IsRundownNgenKeywordEnabledAndNotSuppressed() + || + ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_RUNDOWN_PROVIDER_Context, + TRACE_LEVEL_INFORMATION, + CLR_RUNDOWNJITTEDMETHODILTONATIVEMAP_KEYWORD) + || + bIsArmRundownEnabled + || + bIsPerfTrackRundownEnabled + || + bIsThreadingRundownEnabled) + { + // begin marker event will go to the rundown provider + FireEtwDCStartInit_V1(GetClrInstanceId()); + + // The rundown flag is expected to be checked in the caller, so no need to check here again + DWORD enumerationOptions=ETW::EnumerationLog::EnumerationStructs::None; + if(ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_RUNDOWN_PROVIDER_Context, + TRACE_LEVEL_INFORMATION, + CLR_RUNDOWNLOADER_KEYWORD)) + { + enumerationOptions |= ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleDCStart; + } + if(ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_RUNDOWN_PROVIDER_Context, + TRACE_LEVEL_INFORMATION, + CLR_RUNDOWNJIT_KEYWORD)) + { + enumerationOptions |= ETW::EnumerationLog::EnumerationStructs::JitMethodDCStart; + } + if(IsRundownNgenKeywordEnabledAndNotSuppressed()) + { + enumerationOptions |= ETW::EnumerationLog::EnumerationStructs::NgenMethodDCStart; + } + if(ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_RUNDOWN_PROVIDER_Context, + TRACE_LEVEL_INFORMATION, + CLR_RUNDOWNJITTEDMETHODILTONATIVEMAP_KEYWORD)) + { + enumerationOptions |= ETW::EnumerationLog::EnumerationStructs::MethodDCStartILToNativeMap; + } + if(bIsPerfTrackRundownEnabled) + { + enumerationOptions |= ETW::EnumerationLog::EnumerationStructs::ModuleRangeDCStart; + } + + ETW::EnumerationLog::EnumerationHelper(NULL, NULL, enumerationOptions); + + if (bIsArmRundownEnabled) + { + // When an ETW event consumer asks for ARM rundown, that not only enables + // the ETW events, but also causes some minor behavioral changes in the + // CLR, such as gathering CPU usage baselines for each thread right now, + // and also gathering resource usage information later on (keyed off of + // g_fEnableARM, which we'll set right now). + EnableARM(); + } + + if (bIsArmRundownEnabled || bIsThreadingRundownEnabled) + { + SendThreadRundownEvent(); + } + + // end marker event will go to the rundown provider + FireEtwDCStartComplete_V1(GetClrInstanceId()); + } + } EX_CATCH { } EX_END_CATCH(SwallowAllExceptions); +} + +//--------------------------------------------------------------------------------------- +// +// Simple helper to convert the currently active keywords on the runtime provider into a +// bitmask of enumeration options as defined in ETW::EnumerationLog::EnumerationStructs +// +// Return Value: +// ETW::EnumerationLog::EnumerationStructs bitmask corresponding to the currently +// active keywords on the runtime provider +// + +// static +DWORD ETW::EnumerationLog::GetEnumerationOptionsFromRuntimeKeywords() +{ + LIMITED_METHOD_CONTRACT; + + DWORD enumerationOptions=ETW::EnumerationLog::EnumerationStructs::None; + if(ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, + TRACE_LEVEL_INFORMATION, + CLR_LOADER_KEYWORD)) + { + enumerationOptions |= ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleUnload; + } + if(ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, + TRACE_LEVEL_INFORMATION, + CLR_JIT_KEYWORD) && + ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, + TRACE_LEVEL_INFORMATION, + CLR_ENDENUMERATION_KEYWORD)) + { + enumerationOptions |= ETW::EnumerationLog::EnumerationStructs::JitMethodUnload; + } + if(IsRuntimeNgenKeywordEnabledAndNotSuppressed() && + ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, + TRACE_LEVEL_INFORMATION, + CLR_ENDENUMERATION_KEYWORD)) + { + enumerationOptions |= ETW::EnumerationLog::EnumerationStructs::NgenMethodUnload; + } + + return enumerationOptions; +} + +//--------------------------------------------------------------------------------------- +// +// Executes a flavor of rundown initiated by a CAPTURE_STATE request to +// code:#EtwCallback. CAPTURE_STATE is the "ETW-sanctioned" way of performing a +// rundown, whereas the CLR's rundown provider was *our* version of this, implemented +// before CAPTURE_STATE was standardized. +// +// When doing a CAPTURE_STATE, the CLR rundown provider is completely unused. Instead, +// we pay attention to the runtime keywords active at the time the CAPTURE_STATE was +// requested, and enumerate through the appropriate objects (AppDomains, assemblies, +// modules, types, methods, threads) and send runtime events for each of them. +// +// CAPTURE_STATE is intended to be used primarily by PerfTrack. Implementing this form +// of rundown allows PerfTrack to be blissfully unaware of the CLR's rundown provider. +// + +// static +void ETW::EnumerationLog::EnumerateForCaptureState() +{ + CONTRACTL + { + NOTHROW; + GC_TRIGGERS; + } + CONTRACTL_END; + + EX_TRY + { + if(ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, TRACE_LEVEL_INFORMATION, KEYWORDZERO)) + { + DWORD enumerationOptions = GetEnumerationOptionsFromRuntimeKeywords(); + + // Send unload events for all remaining domains, including shared domain and + // default domain. + ETW::EnumerationLog::EnumerationHelper(NULL /* module filter */, NULL /* domain filter */, enumerationOptions); + + // Send thread created events for all currently active threads, if requested + if (ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, + TRACE_LEVEL_INFORMATION, + CLR_THREADING_KEYWORD)) + { + SendThreadRundownEvent(); + } + } + } EX_CATCH { } EX_END_CATCH(SwallowAllExceptions); +} + +/**************************************************************************************/ +/* Called when ETW is turned OFF on an existing process .Will be used by the controller for end rundown*/ +/**************************************************************************************/ +void ETW::EnumerationLog::EndRundown() +{ + CONTRACTL { + NOTHROW; + GC_TRIGGERS; + } CONTRACTL_END; + + EX_TRY + { + BOOL bIsPerfTrackRundownEnabled = ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_RUNDOWN_PROVIDER_Context, + TRACE_LEVEL_INFORMATION, + CLR_RUNDOWNPERFTRACK_KEYWORD); + BOOL bIsThreadingRundownEnabled = ETW_TRACING_CATEGORY_ENABLED( + MICROSOFT_WINDOWS_DOTNETRUNTIME_RUNDOWN_PROVIDER_Context, + TRACE_LEVEL_INFORMATION, + CLR_RUNDOWNTHREADING_KEYWORD); + if(ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_RUNDOWN_PROVIDER_Context, + TRACE_LEVEL_INFORMATION, + CLR_RUNDOWNJIT_KEYWORD) + || + ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_RUNDOWN_PROVIDER_Context, + TRACE_LEVEL_INFORMATION, + CLR_RUNDOWNLOADER_KEYWORD) + || + IsRundownNgenKeywordEnabledAndNotSuppressed() + || + ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_RUNDOWN_PROVIDER_Context, + TRACE_LEVEL_INFORMATION, + CLR_RUNDOWNJITTEDMETHODILTONATIVEMAP_KEYWORD) + || + bIsPerfTrackRundownEnabled + || + bIsThreadingRundownEnabled + ) + { + // begin marker event will go to the rundown provider + FireEtwDCEndInit_V1(GetClrInstanceId()); + + // The rundown flag is expected to be checked in the caller, so no need to check here again + DWORD enumerationOptions=ETW::EnumerationLog::EnumerationStructs::None; + if(ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_RUNDOWN_PROVIDER_Context, + TRACE_LEVEL_INFORMATION, + CLR_RUNDOWNLOADER_KEYWORD)) + { + enumerationOptions |= ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleDCEnd; + } + if(ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_RUNDOWN_PROVIDER_Context, + TRACE_LEVEL_INFORMATION, + CLR_RUNDOWNJIT_KEYWORD)) + { + enumerationOptions |= ETW::EnumerationLog::EnumerationStructs::JitMethodDCEnd; + } + if(IsRundownNgenKeywordEnabledAndNotSuppressed()) + { + enumerationOptions |= ETW::EnumerationLog::EnumerationStructs::NgenMethodDCEnd; + } + if(ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_RUNDOWN_PROVIDER_Context, + TRACE_LEVEL_INFORMATION, + CLR_RUNDOWNJITTEDMETHODILTONATIVEMAP_KEYWORD)) + { + enumerationOptions |= ETW::EnumerationLog::EnumerationStructs::MethodDCEndILToNativeMap; + } + if(bIsPerfTrackRundownEnabled) + { + enumerationOptions |= ETW::EnumerationLog::EnumerationStructs::ModuleRangeDCEnd; + } + + ETW::EnumerationLog::EnumerationHelper(NULL, NULL, enumerationOptions); + + if (bIsThreadingRundownEnabled) + { + SendThreadRundownEvent(); + } + + // end marker event will go to the rundown provider + FireEtwDCEndComplete_V1(GetClrInstanceId()); + } + } EX_CATCH { + STRESS_LOG1(LF_ALWAYS, LL_ERROR, "Exception during Rundown Enumeration, EIP of last AV = %p", g_LastAccessViolationEIP); + } EX_END_CATCH(SwallowAllExceptions); +} + +// #Registration +/*++ + +Routine Description: + + Registers provider with ETW tracing framework. + This function should not be called more than once, on + Dll Process attach only. + Not thread safe. + +Arguments: + none + +Return Value: + Returns the return value from RegisterTraceGuids or EventRegister. + +--*/ + +void InitializeEventTracing() +{ + CONTRACTL + { + THROWS; + GC_TRIGGERS; + MODE_ANY; + } + CONTRACTL_END; + + // Do startup-only initialization of any state required by the ETW classes before + // events can be fired + HRESULT hr = ETW::TypeSystemLog::PreRegistrationInit(); + if (FAILED(hr)) + return; + + // Register CLR providers with the OS + if (g_pEtwTracer == NULL) + { + NewHolder tempEtwTracer (new (nothrow) ETW::CEtwTracer()); + if (tempEtwTracer != NULL && tempEtwTracer->Register () == ERROR_SUCCESS) + g_pEtwTracer = tempEtwTracer.Extract (); + } + + g_nClrInstanceId = GetRuntimeId() & 0x0000FFFF; // This will give us duplicate ClrInstanceId after UINT16_MAX + + // Any classes that need some initialization to happen after we've registered the + // providers can do so now + ETW::TypeSystemLog::PostRegistrationInit(); +} + +HRESULT ETW::CEtwTracer::Register() +{ + WRAPPER_NO_CONTRACT; + + OSVERSIONINFO osVer; + osVer.dwOSVersionInfoSize = sizeof(OSVERSIONINFO); + + if (GetOSVersion(&osVer) == FALSE) { + return HRESULT_FROM_WIN32(ERROR_NOT_SUPPORTED); + } + else if (osVer.dwMajorVersion < ETW_SUPPORTED_MAJORVER) { + return HRESULT_FROM_WIN32(ERROR_NOT_SUPPORTED); + } + + // if running on OS < Longhorn, skip registration unless reg key is set + // since ETW reg is expensive (in both time and working set) on older OSes + if (osVer.dwMajorVersion < ETW_ENABLED_MAJORVER && !g_fEnableETW && !CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_PreVistaETWEnabled)) + return HRESULT_FROM_WIN32(ERROR_NOT_SUPPORTED); + + // If running on OS >= Longhorn, skip registration if ETW is not enabled + if (osVer.dwMajorVersion >= ETW_ENABLED_MAJORVER && !g_fEnableETW && !CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_VistaAndAboveETWEnabled)) + return HRESULT_FROM_WIN32(ERROR_NOT_SUPPORTED); + + EventRegisterMicrosoft_Windows_DotNETRuntime(); + EventRegisterMicrosoft_Windows_DotNETRuntimePrivate(); + EventRegisterMicrosoft_Windows_DotNETRuntimeRundown(); + + // Stress Log ETW events are available only on the desktop version of the runtime +#ifndef FEATURE_CORECLR + EventRegisterMicrosoft_Windows_DotNETRuntimeStress(); +#endif // !FEATURE_CORECLR + + MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context.RegistrationHandle = Microsoft_Windows_DotNETRuntimeHandle; + MICROSOFT_WINDOWS_DOTNETRUNTIME_PRIVATE_PROVIDER_Context.RegistrationHandle = Microsoft_Windows_DotNETRuntimePrivateHandle; + MICROSOFT_WINDOWS_DOTNETRUNTIME_RUNDOWN_PROVIDER_Context.RegistrationHandle = Microsoft_Windows_DotNETRuntimeRundownHandle; +#ifndef FEATURE_CORECLR + MICROSOFT_WINDOWS_DOTNETRUNTIME_STRESS_PROVIDER_Context.RegistrationHandle = Microsoft_Windows_DotNETRuntimeStressHandle; +#endif // !FEATURE_CORECLR + + return S_OK; +} + +// #Unregistration +/*++ + +Routine Description: + Unregisters the provider from ETW. This function + should only be called once from DllMain Detach process. + Not thread safe. + +Arguments: + none + +Return Value: + Returns ERROR_SUCCESS + +--*/ +HRESULT ETW::CEtwTracer::UnRegister() +{ + LIMITED_METHOD_CONTRACT; + + EventUnregisterMicrosoft_Windows_DotNETRuntime(); + EventUnregisterMicrosoft_Windows_DotNETRuntimePrivate(); + EventUnregisterMicrosoft_Windows_DotNETRuntimeRundown(); +#ifndef FEATURE_CORECLR + EventUnregisterMicrosoft_Windows_DotNETRuntimeStress(); +#endif // !FEATURE_CORECLR + return S_OK; +} + +extern "C" +{ + ETW_INLINE + void EtwCallout(REGHANDLE RegHandle, + PCEVENT_DESCRIPTOR Descriptor, + ULONG ArgumentCount, + PEVENT_DATA_DESCRIPTOR EventData) + { + WRAPPER_NO_CONTRACT; + UINT8 providerIndex = 0; + if(RegHandle == Microsoft_Windows_DotNETRuntimeHandle) { + providerIndex = 0; + } else if(RegHandle == Microsoft_Windows_DotNETRuntimeRundownHandle) { + providerIndex = 1; + } else if(RegHandle == Microsoft_Windows_DotNETRuntimeStressHandle) { + providerIndex = 2; + } else if(RegHandle == Microsoft_Windows_DotNETRuntimePrivateHandle) { + providerIndex = 3; + } else { + _ASSERTE(!"Provider not one of Runtime, Rundown, Private and Stress"); + return; + } + + // stacks are supposed to be fired for only the events with a bit set in the etwStackSupportedEvents bitmap + if(((etwStackSupportedEvents[providerIndex][Descriptor->Id/8]) & + (1<<(Descriptor->Id%8))) != 0) + { + if(RegHandle == Microsoft_Windows_DotNETRuntimeHandle) { + ETW::SamplingLog::SendStackTrace(MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, &CLRStackWalk, &CLRStackId); + } else if(RegHandle == Microsoft_Windows_DotNETRuntimeRundownHandle) { + ETW::SamplingLog::SendStackTrace(MICROSOFT_WINDOWS_DOTNETRUNTIME_RUNDOWN_PROVIDER_Context, &CLRStackWalkDCStart, &CLRStackRundownId); + } else if(RegHandle == Microsoft_Windows_DotNETRuntimePrivateHandle) { + ETW::SamplingLog::SendStackTrace(MICROSOFT_WINDOWS_DOTNETRUNTIME_PRIVATE_PROVIDER_Context, &CLRStackWalkPrivate, &CLRStackPrivateId); + } else if(RegHandle == Microsoft_Windows_DotNETRuntimeStressHandle) { + ETW::SamplingLog::SendStackTrace(MICROSOFT_WINDOWS_DOTNETRUNTIME_STRESS_PROVIDER_Context, &CLRStackWalkStress, &CLRStackStressId); + } + } + } +} + +extern "C" +{ + + // #EtwCallback: + // During the build, MC generates the code to register our provider, and to register + // our ETW callback. (This is buried under Intermediates, in a path like + // Intermediate\clr\corguids.nativeproj_1723354836\obj1c\x86\ClrEtwAll.h.) The ETW + // callback is also generated for us by MC. But we can hook into this generated + // callback by #defining MCGEN_PRIVATE_ENABLE_CALLBACK_V2 to be a call to this + // function (EtwCallback), thus causing EtwCallback to get called after the + // MC-generated code executes. + // + // This callback function is called whenever an ETW session is enabled or disabled. A + // callback function needs to be specified when the provider is registered. C style + // callback wrappers are needed during event registration. To handle the callback + // action in this class, we pass "this" during provider registration and modify the + // context to the relevant context in the C callback later. + ETW_INLINE + void EtwCallback( + _In_ LPCGUID SourceId, + _In_ ULONG ControlCode, + _In_ UCHAR Level, + _In_ ULONGLONG MatchAnyKeyword, + _In_ ULONGLONG MatchAllKeyword, + _In_opt_ PEVENT_FILTER_DESCRIPTOR FilterData, + _Inout_opt_ PVOID CallbackContext) + { + CONTRACTL { + NOTHROW; + if(g_fEEStarted) {GC_TRIGGERS;} else {DISABLED(GC_NOTRIGGER);}; + MODE_ANY; + CAN_TAKE_LOCK; + STATIC_CONTRACT_FAULT; + SO_NOT_MAINLINE; + } CONTRACTL_END; + + // Mark that we are the special ETWRundown thread. Currently all this does + // is insure that AVs thrown in this thread are treated as normal exceptions. + // This allows us to catch and swallow them. We can do this because we have + // a reasonably strong belief that doing ETW Rundown does not change runtime state + // and thus if an AV happens it is better to simply give up logging ETW and + // instead of terminating the process (which is what we would do normally) + ClrFlsThreadTypeSwitch etwRundownThreadHolder(ThreadType_ETWRundownThread); + PMCGEN_TRACE_CONTEXT context = (PMCGEN_TRACE_CONTEXT)CallbackContext; + + BOOLEAN bIsPublicTraceHandle = +#ifdef WINXP_AND_WIN2K3_BUILD_SUPPORT + McGenPreVista ? ((ULONGLONG)Microsoft_Windows_DotNETRuntimeHandle==(ULONGLONG)context) : +#endif + (context->RegistrationHandle==Microsoft_Windows_DotNETRuntimeHandle); + + BOOLEAN bIsPrivateTraceHandle = +#ifdef WINXP_AND_WIN2K3_BUILD_SUPPORT + McGenPreVista ? ((ULONGLONG)Microsoft_Windows_DotNETRuntimePrivateHandle==(ULONGLONG)context) : +#endif + (context->RegistrationHandle==Microsoft_Windows_DotNETRuntimePrivateHandle); + + BOOLEAN bIsRundownTraceHandle = +#ifdef WINXP_AND_WIN2K3_BUILD_SUPPORT + McGenPreVista ? ((ULONGLONG)Microsoft_Windows_DotNETRuntimeRundownHandle==(ULONGLONG)context) : +#endif + (context->RegistrationHandle==Microsoft_Windows_DotNETRuntimeRundownHandle); + + + // A manifest based provider can be enabled to multiple event tracing sessions + // As long as there is atleast 1 enabled session, IsEnabled will be TRUE + // Since classic providers can be enabled to only a single session, + // IsEnabled will be TRUE when it is enabled and FALSE when disabled + BOOL bEnabled = + ((ControlCode == EVENT_CONTROL_CODE_ENABLE_PROVIDER) || + (ControlCode == EVENT_CONTROL_CODE_CAPTURE_STATE)); + if(bEnabled) + { + // TypeSystemLog needs a notification when certain keywords are modified, so + // give it a hook here. + if (g_fEEStarted && !g_fEEShutDown && bIsPublicTraceHandle) + { + ETW::TypeSystemLog::OnKeywordsChanged(); + } + + if (bIsPrivateTraceHandle) + { + ETW::GCLog::GCSettingsEvent(); + if(g_fEEStarted && !g_fEEShutDown) + { + ETW::EnumerationLog::ModuleRangeRundown(); + } + } + +#ifdef _WIN64 // We only do this on 64 bit (NOT ARM, because ARM uses frame based stack crawling) + // If we have turned on the JIT keyword to the VERBOSE setting (needed to get JIT names) then + // we assume that we also want good stack traces so we need to publish unwind information so + // ETW can get at it + if(bIsPublicTraceHandle && ETW_CATEGORY_ENABLED((*context), TRACE_LEVEL_VERBOSE, CLR_RUNDOWNJIT_KEYWORD)) + UnwindInfoTable::PublishUnwindInfo(g_fEEStarted != FALSE); +#endif + if(g_fEEStarted && !g_fEEShutDown && bIsRundownTraceHandle) + { + // Fire the runtime information event + ETW::InfoLog::RuntimeInformation(ETW::InfoLog::InfoStructs::Callback); + + // Start and End Method/Module Rundowns + // Used to fire events that we missed since we started the controller after the process started + // flags for immediate start rundown + if(ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_RUNDOWN_PROVIDER_Context, + TRACE_LEVEL_INFORMATION, + CLR_RUNDOWNSTART_KEYWORD)) + ETW::EnumerationLog::StartRundown(); + + // flags delayed end rundown + if(ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_RUNDOWN_PROVIDER_Context, + TRACE_LEVEL_INFORMATION, + CLR_RUNDOWNEND_KEYWORD)) + ETW::EnumerationLog::EndRundown(); + } + + if (g_fEEStarted && !g_fEEShutDown && (ControlCode == EVENT_CONTROL_CODE_CAPTURE_STATE)) + { + ETW::EnumerationLog::EnumerateForCaptureState(); + } + + // Special check for the runtime provider's GCHeapCollectKeyword. Profilers + // flick this to force a full GC. + if (g_fEEStarted && !g_fEEShutDown && bIsPublicTraceHandle && + ((MatchAnyKeyword & CLR_GCHEAPCOLLECT_KEYWORD) != 0)) + { + // Profilers may (optionally) specify extra data in the filter parameter + // to log with the GCStart event. + LONGLONG l64ClientSequenceNumber = 0; + if ((FilterData != NULL) && + (FilterData->Type == 1) && + (FilterData->Size == sizeof(l64ClientSequenceNumber))) + { + l64ClientSequenceNumber = *(LONGLONG *) (FilterData->Ptr); + } + ETW::GCLog::ForceGC(l64ClientSequenceNumber); + } + } +#ifdef FEATURE_COMINTEROP + if (ETW_EVENT_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_PRIVATE_PROVIDER_Context, CCWRefCountChange)) + g_pConfig->SetLogCCWRefCountChangeEnabled(bEnabled != 0); +#endif // FEATURE_COMINTEROP + + } +} + +#endif // FEATURE_REDHAWK +#else // !FEATURE_DTRACE + +/**************************************************************************************/ +/* Helper data structure for supporting string in Dtrace probes. Since Dtrace does not support Unicode */ +/* in its printf API, we cast the unicode string to UFT8 string and then output them. */ +/**************************************************************************************/ +#define DTRACE_OUTPUT_STRING_LEN 512 +const CHAR szDtraceOutputNULL[]="NULL"; +INT32 WideCharToMultiByte(LPCWSTR wszSrcStr, LPSTR szDstStr); + +#include + +// The possible value of COMPlus_ETWEnabled should be '0' or '1' +#define SIZE_ETWEnabled 2 +// The possible value of COMPlus_EventInfo should be a string in the following format: +// GUID:HexNumfer:Level +// GUID: For example e13c0d23-ccbc-4e12-931b-d9cc2eee27e4 (36 bytes) +// HewNumber: 0xffffffff (10 bytes) +// Level: 0~9 (1 bytes) +// Therefore the length of it should be 36 + 1 + 10 + 1 + 1 + 1 = 50 +#define SIZE_EventInfo 50 + +ULONG ETW::CEtwTracer::Register() +{ + // Get Env Var COMPlus_ETWEnabled + char szETWEnabled[SIZE_ETWEnabled]; + DWORD newLen = GetEnvironmentVariableA("COMPlus_ETWEnabled", szETWEnabled, SIZE_ETWEnabled); + if (newLen == 0 || newLen >= SIZE_ETWEnabled || strcmp(szETWEnabled, "1") != 0) + return 0; + + // Get Env Var COMPlus_EventInfo + char szEventInfo[SIZE_EventInfo]; + newLen = GetEnvironmentVariableA("COMPlus_EventInfo", szEventInfo, SIZE_EventInfo); + if (newLen == 0 || newLen >= SIZE_EventInfo || strchr(szEventInfo, ' ') != NULL) + return 0; + + // Get Env Var COMPlus_EventLogFileName + char szEventLogFN[_MAX_FNAME]; + newLen = GetEnvironmentVariableA("COMPlus_EventLogFileName", szEventLogFN, _MAX_FNAME); + if (newLen == 0 || newLen >= _MAX_FNAME || strchr(szEventLogFN, '|') != NULL) + return 0; + char szEventLogFullPath[_MAX_PATH]; + newLen = GetFullPathNameA(szEventLogFN, _MAX_PATH, szEventLogFullPath, NULL); + if (newLen == 0 || newLen > _MAX_PATH || strchr(szEventLogFN, '|') != NULL) + return 0; + + // Get the process id which is ued in dtrace to fire the probes of the process + int nProcessId = GetCurrentProcessId(); + + // Start the log (By calling an PAL API to connect to a Unix Domain Server) + PAL_StartLog(szEventInfo, szEventLogFullPath, nProcessId); + + return 0; +} + +INT32 WideCharToMultiByte(LPCWSTR wszSrcStr, LPSTR szDstStr) +{ + INT32 nSize = WideCharToMultiByte(CP_UTF8, 0, wszSrcStr, -1, NULL, 0, NULL, NULL); + if (0 == nSize) + { + return 0; + } + if (nSize > DTRACE_OUTPUT_STRING_LEN-1) + { + nSize = DTRACE_OUTPUT_STRING_LEN-1; + } + INT32 nSize2 = WideCharToMultiByte(CP_UTF8, 0, wszSrcStr, -1, szDstStr, nSize, NULL, NULL); + if(nSize2 != nSize || nSize2 <=0 ) + { + return 0; + } + return nSize; +} + +void EEConfigSetup_V1() +{ + FireEtwEEConfigSetup_V1(GetClrInstanceId()); +} + +void EEConfigSetupEnd_V1() +{ + FireEtwEEConfigSetupEnd_V1(GetClrInstanceId()); +} + +void LdSysBases_V1() +{ + FireEtwLdSysBases_V1(GetClrInstanceId()); +} + +void LdSysBasesEnd_V1() +{ + FireEtwLdSysBasesEnd_V1(GetClrInstanceId()); +} + +void ExecExe_V1() +{ + FireEtwExecExe_V1(GetClrInstanceId()); +} + +void ExecExeEnd_V1() +{ + FireEtwExecExeEnd_V1(GetClrInstanceId()); +} + +void Main_V1() +{ + FireEtwMain_V1(GetClrInstanceId()); +} + +void MainEnd_V1() +{ + FireEtwMainEnd_V1(GetClrInstanceId()); +} + + +void ApplyPolicyStart_V1() +{ + FireEtwApplyPolicyStart_V1(GetClrInstanceId()); +} + +void ApplyPolicyEnd_V1() +{ + FireEtwApplyPolicyEnd_V1(GetClrInstanceId()); +} + +void PrestubWorker_V1() +{ + FireEtwPrestubWorker_V1(GetClrInstanceId()); +} + +void PrestubWorkerEnd_V1() +{ + FireEtwPrestubWorkerEnd_V1(GetClrInstanceId()); +} + +void ExplicitBindStart_V1() +{ + FireEtwExplicitBindStart_V1(GetClrInstanceId()); +} + +void ExplicitBindEnd_V1() +{ + FireEtwExplicitBindEnd_V1(GetClrInstanceId()); +} + +void ParseXml_V1() +{ + FireEtwParseXml_V1(GetClrInstanceId()); +} + +void ParseXmlEnd_V1() +{ + FireEtwParseXmlEnd_V1(GetClrInstanceId()); +} + +void InitDefaultDomain_V1() +{ + FireEtwInitDefaultDomain_V1(GetClrInstanceId()); +} + +void InitDefaultDomainEnd_V1() +{ + FireEtwInitDefaultDomainEnd_V1(GetClrInstanceId()); +} +void AllowBindingRedirs_V1() +{ + FireEtwAllowBindingRedirs_V1(GetClrInstanceId()); +} + +void AllowBindingRedirsEnd_V1() +{ + FireEtwAllowBindingRedirsEnd_V1(GetClrInstanceId()); +} + +void EEConfigSync_V1() +{ + FireEtwEEConfigSync_V1(GetClrInstanceId()); +} + +void EEConfigSyncEnd_V1() +{ + FireEtwEEConfigSyncEnd_V1(GetClrInstanceId()); +} + +void FusionBinding_V1() +{ + FireEtwFusionBinding_V1(GetClrInstanceId()); +} + +void FusionBindingEnd_V1() +{ + FireEtwFusionBindingEnd_V1(GetClrInstanceId()); +} + +void LoaderCatchCall_V1() +{ + FireEtwLoaderCatchCall_V1(GetClrInstanceId()); +} + +void LoaderCatchCallEnd_V1() +{ + FireEtwLoaderCatchCallEnd_V1(GetClrInstanceId()); +} + +void FusionInit_V1() +{ + FireEtwFusionInit_V1(GetClrInstanceId()); +} + +void FusionInitEnd_V1() +{ + FireEtwFusionInitEnd_V1(GetClrInstanceId()); +} + +void FusionAppCtx_V1() +{ + FireEtwFusionAppCtx_V1(GetClrInstanceId()); +} + +void FusionAppCtxEnd_V1() +{ + FireEtwFusionAppCtxEnd_V1(GetClrInstanceId()); +} + +void SecurityCatchCall_V1() +{ + FireEtwSecurityCatchCall_V1(GetClrInstanceId()); +} + +void SecurityCatchCallEnd_V1() +{ + FireEtwSecurityCatchCallEnd_V1(GetClrInstanceId()); +} + + +#endif // !FEATURE_DTRACE + +#ifndef FEATURE_REDHAWK + +/****************************************************************************/ +/* This is called by the runtime when an exception is thrown */ +/****************************************************************************/ +void ETW::ExceptionLog::ExceptionThrown(CrawlFrame *pCf, BOOL bIsReThrownException, BOOL bIsNewException) +{ + CONTRACTL { + NOTHROW; + GC_TRIGGERS; + PRECONDITION(GetThread() != NULL); + PRECONDITION(GetThread()->GetThrowable() != NULL); + } CONTRACTL_END; + + if(!(bIsReThrownException || bIsNewException)) + { + return; + } + if(!ETW_EVENT_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, ExceptionThrown_V1)) + { + return; + } + EX_TRY + { + SString exceptionType(L""); + LPWSTR exceptionMessage = NULL; + BOOL bIsCLSCompliant=FALSE, bIsCSE=FALSE, bIsNestedException=FALSE, bHasInnerException=FALSE; + UINT16 exceptionFlags=0; + PVOID exceptionEIP=0; + + Thread *pThread = GetThread(); + + struct + { + OBJECTREF exceptionObj; + OBJECTREF innerExceptionObj; + STRINGREF exceptionMessageRef; + } gc; + ZeroMemory(&gc, sizeof(gc)); + GCPROTECT_BEGIN(gc); + + gc.exceptionObj = pThread->GetThrowable(); + gc.innerExceptionObj = ((EXCEPTIONREF)gc.exceptionObj)->GetInnerException(); + + ThreadExceptionState *pExState = pThread->GetExceptionState(); +#ifndef WIN64EXCEPTIONS + PTR_ExInfo pExInfo = NULL; +#else + PTR_ExceptionTracker pExInfo = NULL; +#endif //!WIN64EXCEPTIONS + pExInfo = pExState->GetCurrentExceptionTracker(); + _ASSERTE(pExInfo != NULL); + bIsNestedException = (pExInfo->GetPreviousExceptionTracker() != NULL); + bIsCSE = (pExInfo->GetCorruptionSeverity() == ProcessCorrupting); + bIsCLSCompliant = IsException((gc.exceptionObj)->GetMethodTable()) && + ((gc.exceptionObj)->GetMethodTable() != MscorlibBinder::GetException(kRuntimeWrappedException)); + + // A rethrown exception is also a nested exception + // but since we have a separate flag for it, lets unset the nested flag + if(bIsReThrownException) + { + bIsNestedException = FALSE; + } + bHasInnerException = (gc.innerExceptionObj) != NULL; + + exceptionFlags = ((bHasInnerException ? ETW::ExceptionLog::ExceptionStructs::HasInnerException : 0) | + (bIsNestedException ? ETW::ExceptionLog::ExceptionStructs::IsNestedException : 0) | + (bIsReThrownException ? ETW::ExceptionLog::ExceptionStructs::IsReThrownException : 0) | + (bIsCSE ? ETW::ExceptionLog::ExceptionStructs::IsCSE : 0) | + (bIsCLSCompliant ? ETW::ExceptionLog::ExceptionStructs::IsCLSCompliant : 0)); + + if (pCf->IsFrameless()) + { +#ifndef _WIN64 + exceptionEIP = (PVOID)pCf->GetRegisterSet()->ControlPC; +#else + exceptionEIP = (PVOID)GetIP(pCf->GetRegisterSet()->pContext); +#endif //!_WIN64 + } + else + { + exceptionEIP = (PVOID)(pCf->GetFrame()->GetIP()); + } + + // On platforms other than IA64, we are at the instruction after the faulting instruction + // This check has been copied from StackTraceInfo::AppendElement + if (!(pCf->HasFaulted() || pCf->IsIPadjusted()) && exceptionEIP != 0) + { + exceptionEIP = (PVOID)((UINT_PTR)exceptionEIP - 1); + } + + gc.exceptionMessageRef = ((EXCEPTIONREF)gc.exceptionObj)->GetMessage(); + TypeHandle exceptionTypeHandle = (gc.exceptionObj)->GetTypeHandle(); + exceptionTypeHandle.GetName(exceptionType); + WCHAR *exceptionTypeName = (WCHAR *)exceptionType.GetUnicode(); + + if(gc.exceptionMessageRef != NULL) + { + exceptionMessage = (gc.exceptionMessageRef)->GetBuffer(); + } + + HRESULT exceptionHRESULT = ((EXCEPTIONREF)gc.exceptionObj)->GetHResult(); + + FireEtwExceptionThrown_V1(exceptionTypeName, + exceptionMessage, + exceptionEIP, + exceptionHRESULT, + exceptionFlags, + GetClrInstanceId()); + GCPROTECT_END(); + } EX_CATCH { } EX_END_CATCH(SwallowAllExceptions); +} + +/****************************************************************************/ +/* This is called by the runtime when a domain is loaded */ +/****************************************************************************/ +void ETW::LoaderLog::DomainLoadReal(BaseDomain *pDomain, __in_opt LPWSTR wszFriendlyName) +{ + CONTRACTL { + NOTHROW; + GC_TRIGGERS; + } CONTRACTL_END; + + EX_TRY + { + if(ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, + TRACE_LEVEL_INFORMATION, + CLR_LOADER_KEYWORD)) + { + DWORD dwEventOptions = ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleLoad; + ETW::LoaderLog::SendDomainEvent(pDomain, dwEventOptions, wszFriendlyName); + } + } EX_CATCH { } EX_END_CATCH(SwallowAllExceptions); +} + +/****************************************************************************/ +/* This is called by the runtime when an AppDomain is unloaded */ +/****************************************************************************/ +void ETW::LoaderLog::DomainUnload(AppDomain *pDomain) +{ + CONTRACTL { + NOTHROW; + GC_TRIGGERS; + } CONTRACTL_END; + + EX_TRY + { + if(ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, + TRACE_LEVEL_INFORMATION, + KEYWORDZERO)) + { + if(!pDomain->NoAccessToHandleTable()) + { + DWORD enumerationOptions = ETW::EnumerationLog::GetEnumerationOptionsFromRuntimeKeywords(); + + // Domain unload also causes type unload events + if(ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, + TRACE_LEVEL_INFORMATION, + CLR_TYPE_KEYWORD)) + { + enumerationOptions |= ETW::EnumerationLog::EnumerationStructs::TypeUnload; + } + + ETW::EnumerationLog::EnumerationHelper(NULL, pDomain, enumerationOptions); + } + } + } EX_CATCH { } EX_END_CATCH(SwallowAllExceptions); +} + +/****************************************************************************/ +/* This is called by the runtime when a LoaderAllocator is unloaded */ +/****************************************************************************/ +void ETW::LoaderLog::CollectibleLoaderAllocatorUnload(AssemblyLoaderAllocator *pLoaderAllocator) +{ + CONTRACTL { + NOTHROW; + GC_TRIGGERS; + } CONTRACTL_END; + + EX_TRY + { + if(ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, + TRACE_LEVEL_INFORMATION, + KEYWORDZERO)) + { + DWORD enumerationOptions = ETW::EnumerationLog::GetEnumerationOptionsFromRuntimeKeywords(); + + // Collectible Loader Allocator unload also causes type unload events + if(ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, + TRACE_LEVEL_INFORMATION, + CLR_TYPE_KEYWORD)) + { + enumerationOptions |= ETW::EnumerationLog::EnumerationStructs::TypeUnload; + } + + ETW::EnumerationLog::IterateCollectibleLoaderAllocator(pLoaderAllocator, enumerationOptions); + } + } EX_CATCH { } EX_END_CATCH(SwallowAllExceptions); +} + +/****************************************************************************/ +/* This is called by the runtime when the runtime is loaded + Function gets called by both the Callback mechanism and regular ETW events. + Type is used to differentiate whether its a callback or a normal call*/ +/****************************************************************************/ +void ETW::InfoLog::RuntimeInformation(INT32 type) +{ + CONTRACTL { + NOTHROW; + GC_TRIGGERS; + } CONTRACTL_END; + + EX_TRY { + if((type == ETW::InfoLog::InfoStructs::Normal && ETW_EVENT_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, RuntimeInformationStart)) +#ifndef FEATURE_PAL + || + (type == ETW::InfoLog::InfoStructs::Callback && ETW_EVENT_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_RUNDOWN_PROVIDER_Context, RuntimeInformationDCStart)) +#endif //!FEATURE_PAL + ) + { +#ifndef FEATURE_DTRACE + PCWSTR szDtraceOutput1=L"",szDtraceOutput2=L""; +#else + CHAR szDtraceOutput1[DTRACE_OUTPUT_STRING_LEN]; + CHAR szDtraceOutput2[DTRACE_OUTPUT_STRING_LEN]; +#endif // !FEATURE_DTRACE + UINT8 startupMode = 0; + UINT startupFlags = 0; + WCHAR dllPath[MAX_PATH+1] = {0}; + UINT8 Sku = 0; + _ASSERTE(g_fEEManagedEXEStartup || //CLR started due to a managed exe + g_fEEIJWStartup || //CLR started as a mixed mode Assembly + CLRHosted() || g_fEEHostedStartup || //CLR started through one of the Hosting API CLRHosted() returns true if CLR started through the V2 Interface while + // g_fEEHostedStartup is true if CLR is hosted through the V1 API. + g_fEEComActivatedStartup || //CLR started as a COM object + g_fEEOtherStartup ); //In case none of the 4 above mentioned cases are true for example ngen, ildasm then we asssume its a "other" startup + +#ifdef FEATURE_CORECLR + Sku = ETW::InfoLog::InfoStructs::CoreCLR; +#else + Sku = ETW::InfoLog::InfoStructs::DesktopCLR; +#endif //FEATURE_CORECLR + + //version info for clr.dll + USHORT vmMajorVersion = VER_MAJORVERSION; + USHORT vmMinorVersion = VER_MINORVERSION; + USHORT vmBuildVersion = VER_PRODUCTBUILD; + USHORT vmQfeVersion = VER_PRODUCTBUILD_QFE; + + //version info for mscorlib.dll + USHORT bclMajorVersion = VER_ASSEMBLYMAJORVERSION; + USHORT bclMinorVersion = VER_ASSEMBLYMINORVERSION; + USHORT bclBuildVersion = VER_ASSEMBLYBUILD; + USHORT bclQfeVersion = VER_ASSEMBLYBUILD_QFE; + +#ifndef FEATURE_PAL + LPCGUID comGUID=g_fEEComObjectGuid; +#else + unsigned int comGUID=0; +#endif //!FEATURE_PAL + +#ifndef FEATURE_DTRACE + LPWSTR lpwszCommandLine = L""; + LPWSTR lpwszRuntimeDllPath = (LPWSTR)dllPath; +#else + SIZE_T lpwszCommandLine = (SIZE_T)szDtraceOutput1; + SIZE_T lpwszRuntimeDllPath = (SIZE_T)szDtraceOutput2; +#endif //!FEATURE_DTRACE + +#ifndef FEATURE_CORECLR + startupFlags = CorHost2::GetStartupFlags(); +#endif //!FEATURE_CORECLR + + // Determine the startupmode + if(g_fEEIJWStartup) + { + //IJW Mode + startupMode = ETW::InfoLog::InfoStructs::IJW; + } + else if(g_fEEManagedEXEStartup) + { + //managed exe + startupMode = ETW::InfoLog::InfoStructs::ManagedExe; +#ifndef FEATURE_DTRACE + lpwszCommandLine = WszGetCommandLine(); +#else + INT32 nSize = WideCharToMultiByte(WszGetCommandLine(), szDtraceOutput1); + if(nSize > 0) { + lpwszCommandLine = (SIZE_T)szDtraceOutput1; + } +#endif //!FEATURE_DTRACE + } + else if (CLRHosted() || g_fEEHostedStartup) + { + //Hosted CLR + startupMode = ETW::InfoLog::InfoStructs::HostedCLR; + } + else if(g_fEEComActivatedStartup) + { + //com activated + startupMode = ETW::InfoLog::InfoStructs::COMActivated; + } + else if(g_fEEOtherStartup) + { + //startup type is other + startupMode = ETW::InfoLog::InfoStructs::Other; + } + + _ASSERTE (NumItems(dllPath) > MAX_PATH); + // if WszGetModuleFileName fails, we return an empty string + if (!WszGetModuleFileName(GetCLRModule(), dllPath, MAX_PATH)) { + dllPath[0] = 0; + } + dllPath[MAX_PATH] = 0; +#ifdef FEATURE_DTRACE + _ASSERTE (NumItems(szDtraceOutput2) >= NumItems(dllPath)); + INT32 nSize = WideCharToMultiByte(dllPath, szDtraceOutput2); + if(nSize > 0) { + lpwszRuntimeDllPath = (SIZE_T)szDtraceOutput2; + } +#endif // FEATURE_DTRACE + + if(type == ETW::InfoLog::InfoStructs::Callback) + { + FireEtwRuntimeInformationDCStart( GetClrInstanceId(), + Sku, + bclMajorVersion, + bclMinorVersion, + bclBuildVersion, + bclQfeVersion, + vmMajorVersion, + vmMinorVersion, + vmBuildVersion, + vmQfeVersion, + startupFlags, + startupMode, + lpwszCommandLine, + comGUID, + lpwszRuntimeDllPath ); + } + else + { + FireEtwRuntimeInformationStart( GetClrInstanceId(), + Sku, + bclMajorVersion, + bclMinorVersion, + bclBuildVersion, + bclQfeVersion, + vmMajorVersion, + vmMinorVersion, + vmBuildVersion, + vmQfeVersion, + startupFlags, + startupMode, + lpwszCommandLine, + comGUID, + lpwszRuntimeDllPath ); + } + } + } EX_CATCH { } EX_END_CATCH(SwallowAllExceptions); +} + +/*******************************************************/ +/* This is called by the runtime when a method is jitted completely */ +/*******************************************************/ +void ETW::MethodLog::MethodJitted(MethodDesc *pMethodDesc, SString *namespaceOrClassName, SString *methodName, SString *methodSignature, SIZE_T pCode, ReJITID rejitID) +{ + CONTRACTL { + NOTHROW; + GC_TRIGGERS; + } CONTRACTL_END; + + EX_TRY + { + if(ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, + TRACE_LEVEL_INFORMATION, + CLR_JIT_KEYWORD)) + { + ETW::MethodLog::SendMethodEvent(pMethodDesc, ETW::EnumerationLog::EnumerationStructs::JitMethodLoad, TRUE, namespaceOrClassName, methodName, methodSignature, pCode, rejitID); + } +#ifndef WINXP_AND_WIN2K3_BUILD_SUPPORT + if(ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, + TRACE_LEVEL_INFORMATION, + CLR_JITTEDMETHODILTONATIVEMAP_KEYWORD)) + { + // The call to SendMethodILToNativeMapEvent assumes that the debugger's lazy + // data has already been initialized. + + // g_pDebugInterface is initialized on startup on desktop CLR, regardless of whether a debugger + // or profiler is loaded. So it should always be available. + _ASSERTE(g_pDebugInterface != NULL); + g_pDebugInterface->InitializeLazyDataIfNecessary(); + + ETW::MethodLog::SendMethodILToNativeMapEvent(pMethodDesc, ETW::EnumerationLog::EnumerationStructs::JitMethodILToNativeMap, rejitID); + } +#endif // WINXP_AND_WIN2K3_BUILD_SUPPORT + } EX_CATCH { } EX_END_CATCH(SwallowAllExceptions); +} + +/*************************************************/ +/* This is called by the runtime when method jitting started */ +/*************************************************/ +void ETW::MethodLog::MethodJitting(MethodDesc *pMethodDesc, SString *namespaceOrClassName, SString *methodName, SString *methodSignature) +{ + CONTRACTL { + NOTHROW; + GC_TRIGGERS; + PRECONDITION(pMethodDesc != NULL); + } CONTRACTL_END; + + EX_TRY + { + if(ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, + TRACE_LEVEL_VERBOSE, + CLR_JIT_KEYWORD)) + { + pMethodDesc->GetMethodInfo(*namespaceOrClassName, *methodName, *methodSignature); + ETW::MethodLog::SendMethodJitStartEvent(pMethodDesc, namespaceOrClassName, methodName, methodSignature); + } + } EX_CATCH { } EX_END_CATCH(SwallowAllExceptions); +} + +/**********************************************************************/ +/* This is called by the runtime when a single jit helper method with stub is initialized */ +/**********************************************************************/ +void ETW::MethodLog::StubInitialized(ULONGLONG ullHelperStartAddress, LPCWSTR pHelperName) +{ + CONTRACTL { + NOTHROW; + GC_TRIGGERS; + PRECONDITION(ullHelperStartAddress != 0); + } CONTRACTL_END; + + EX_TRY + { + if(ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, + TRACE_LEVEL_INFORMATION, + CLR_JIT_KEYWORD)) + { + DWORD dwHelperSize=0; + Stub::RecoverStubAndSize((TADDR)ullHelperStartAddress, &dwHelperSize); + ETW::MethodLog::SendHelperEvent(ullHelperStartAddress, dwHelperSize, pHelperName); + } + } EX_CATCH { } EX_END_CATCH(SwallowAllExceptions); +} + +/**********************************************************/ +/* This is called by the runtime when helpers with stubs are initialized */ +/**********************************************************/ +void ETW::MethodLog::StubsInitialized(PVOID *pHelperStartAddresss, PVOID *pHelperNames, LONG lNoOfHelpers) +{ + WRAPPER_NO_CONTRACT; + + if(ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, + TRACE_LEVEL_INFORMATION, + CLR_JIT_KEYWORD)) + { + for(int i=0; iIsThunking()) +#endif + { + MethodTable::MethodIterator iter(pMethodTable); + for (; iter.IsValid(); iter.Next()) + { + MethodDesc *pMD = (MethodDesc *)(iter.GetMethodDesc()); + if(pMD && pMD->IsRestored() && pMD->GetMethodTable_NoLogging() == pMethodTable) + ETW::MethodLog::SendMethodEvent(pMD, ETW::EnumerationLog::EnumerationStructs::NgenMethodLoad, FALSE); + } + } + } + } EX_CATCH { } EX_END_CATCH(SwallowAllExceptions); +} + + +/****************************************************************************/ +/* This is called by the runtime when a Strong Name Verification Starts */ +/****************************************************************************/ +void ETW::SecurityLog::StrongNameVerificationStart(DWORD dwInFlags, __in LPWSTR strFullyQualifiedAssemblyName) +{ + WRAPPER_NO_CONTRACT; +#ifndef FEATURE_CORECLR +#ifndef FEATURE_DTRACE + FireEtwStrongNameVerificationStart_V1(dwInFlags, 0, strFullyQualifiedAssemblyName, GetClrInstanceId()); +#else + CHAR szDtraceOutput1[DTRACE_OUTPUT_STRING_LEN]; + // since DTrace do not support UNICODE string, they need to be converted to ANSI string + INT32 nSize = WideCharToMultiByte(strFullyQualifiedAssemblyName, szDtraceOutput1); + if (nSize != 0) + FireEtwStrongNameVerificationStart_V1(dwInFlags, 0, szDtraceOutput1, GetClrInstanceId()); +#endif +#endif // !FEATURE_CORECLR +} + + +/****************************************************************************/ +/* This is called by the runtime when a Strong Name Verification Ends */ +/****************************************************************************/ +void ETW::SecurityLog::StrongNameVerificationStop(DWORD dwInFlags,ULONG result, __in LPWSTR strFullyQualifiedAssemblyName) +{ + WRAPPER_NO_CONTRACT; +#ifndef FEATURE_CORECLR +#ifndef FEATURE_DTRACE + FireEtwStrongNameVerificationStop_V1(dwInFlags, result, strFullyQualifiedAssemblyName, GetClrInstanceId()); +#else + CHAR szDtraceOutput1[DTRACE_OUTPUT_STRING_LEN]; + // since DTrace do not support UNICODE string, they need to be converted to ANSI string + INT32 nSize = WideCharToMultiByte(strFullyQualifiedAssemblyName, szDtraceOutput1); + if (nSize != 0) + FireEtwStrongNameVerificationStop_V1(dwInFlags, result, szDtraceOutput1, GetClrInstanceId()); +#endif +#endif // !FEATURE_CORECLR +} + +/****************************************************************************/ +/* This is called by the runtime when field transparency calculations begin */ +/****************************************************************************/ +void ETW::SecurityLog::FireFieldTransparencyComputationStart(LPCWSTR wszFieldName, + LPCWSTR wszModuleName, + DWORD dwAppDomain) +{ + WRAPPER_NO_CONTRACT; +#ifndef FEATURE_DTRACE + FireEtwFieldTransparencyComputationStart(wszFieldName, wszModuleName, dwAppDomain, GetClrInstanceId()); +#else // FEATURE_DTRACE + CHAR szDtraceOutput1[DTRACE_OUTPUT_STRING_LEN]; + CHAR szDtraceOutput2[DTRACE_OUTPUT_STRING_LEN]; + // since DTrace does not support UNICODE string, they need to be converted to ANSI string + INT32 nSizeField = WideCharToMultiByte(wszFieldName, szDtraceOutput1); + INT32 nSizeModule = WideCharToMultiByte(wszModuleName, szDtraceOutput2); + + if (nSizeField != 0 && nSizeModule != 0) + FireEtwFieldTransparencyComputationStart(szDtraceOutput1, szDtraceOutput2, dwAppDomain, GetClrInstanceId()); +#endif // !FEATURE_DTRACE +} + +/****************************************************************************/ +/* This is called by the runtime when field transparency calculations end */ +/****************************************************************************/ +void ETW::SecurityLog::FireFieldTransparencyComputationEnd(LPCWSTR wszFieldName, + LPCWSTR wszModuleName, + DWORD dwAppDomain, + BOOL fIsCritical, + BOOL fIsTreatAsSafe) +{ + WRAPPER_NO_CONTRACT; +#ifndef FEATURE_DTRACE + FireEtwFieldTransparencyComputationEnd(wszFieldName, wszModuleName, dwAppDomain, fIsCritical, fIsTreatAsSafe, GetClrInstanceId()); +#else // FEATURE_DTRACE + CHAR szDtraceOutput1[DTRACE_OUTPUT_STRING_LEN]; + CHAR szDtraceOutput2[DTRACE_OUTPUT_STRING_LEN]; + // since DTrace does not support UNICODE string, they need to be converted to ANSI string + INT32 nSizeField = WideCharToMultiByte(wszFieldName, szDtraceOutput1); + INT32 nSizeModule = WideCharToMultiByte(wszModuleName, szDtraceOutput2); + + if (nSizeField != 0 && nSizeModule != 0) + FireEtwFieldTransparencyComputationEnd(szDtraceOutput1, szDtraceOutput2, dwAppDomain, fIsCritical, fIsTreatAsSafe, GetClrInstanceId()); +#endif // !FEATURE_DTRACE +} + +/*****************************************************************************/ +/* This is called by the runtime when method transparency calculations begin */ +/*****************************************************************************/ +void ETW::SecurityLog::FireMethodTransparencyComputationStart(LPCWSTR wszMethodName, + LPCWSTR wszModuleName, + DWORD dwAppDomain) +{ + WRAPPER_NO_CONTRACT; +#ifndef FEATURE_DTRACE + FireEtwMethodTransparencyComputationStart(wszMethodName, wszModuleName, dwAppDomain, GetClrInstanceId()); +#else // FEATURE_DTRACE + CHAR szDtraceOutput1[DTRACE_OUTPUT_STRING_LEN]; + CHAR szDtraceOutput2[DTRACE_OUTPUT_STRING_LEN]; + // since DTrace does not support UNICODE string, they need to be converted to ANSI string + INT32 nSizeMethod = WideCharToMultiByte(wszMethodName, szDtraceOutput1); + INT32 nSizeModule = WideCharToMultiByte(wszModuleName, szDtraceOutput2); + + if (nSizeMethod != 0 && nSizeModule != 0) + FireEtwMethodTransparencyComputationStart(szDtraceOutput1, szDtraceOutput2, dwAppDomain, GetClrInstanceId()); +#endif // !FEATURE_DTRACE +} + +/*****************************************************************************/ +/* This is called by the runtime when method transparency calculations end */ +/********************************************(********************************/ +void ETW::SecurityLog::FireMethodTransparencyComputationEnd(LPCWSTR wszMethodName, + LPCWSTR wszModuleName, + DWORD dwAppDomain, + BOOL fIsCritical, + BOOL fIsTreatAsSafe) +{ + WRAPPER_NO_CONTRACT; +#ifndef FEATURE_DTRACE + FireEtwMethodTransparencyComputationEnd(wszMethodName, wszModuleName, dwAppDomain, fIsCritical, fIsTreatAsSafe, GetClrInstanceId()); +#else // FEATURE_DTRACE + CHAR szDtraceOutput1[DTRACE_OUTPUT_STRING_LEN]; + CHAR szDtraceOutput2[DTRACE_OUTPUT_STRING_LEN]; + // since DTrace does not support UNICODE string, they need to be converted to ANSI string + INT32 nSizeMethod = WideCharToMultiByte(wszMethodName, szDtraceOutput1); + INT32 nSizeModule = WideCharToMultiByte(wszModuleName, szDtraceOutput2); + + if (nSizeMethod != 0 && nSizeModule != 0) + FireEtwMethodTransparencyComputationEnd(szDtraceOutput1, szDtraceOutput2, dwAppDomain, fIsCritical, fIsTreatAsSafe, GetClrInstanceId()); +#endif // !FEATURE_DTRACE +} + +/*****************************************************************************/ +/* This is called by the runtime when module transparency calculations begin */ +/*****************************************************************************/ +void ETW::SecurityLog::FireModuleTransparencyComputationStart(LPCWSTR wszModuleName, + DWORD dwAppDomain) +{ + WRAPPER_NO_CONTRACT; +#ifndef FEATURE_DTRACE + FireEtwModuleTransparencyComputationStart(wszModuleName, dwAppDomain, GetClrInstanceId()); +#else // FEATURE_DTRACE + CHAR szDtraceOutput1[DTRACE_OUTPUT_STRING_LEN]; + // since DTrace does not support UNICODE string, they need to be converted to ANSI string + INT32 nSizeModule = WideCharToMultiByte(wszModuleName, szDtraceOutput1); + + if (nSizeModule != 0) + FireEtwModuleTransparencyComputationStart(szDtraceOutput1, dwAppDomain, GetClrInstanceId()); +#endif // !FEATURE_DTRACE +} + +/****************************************************************************/ +/* This is called by the runtime when module transparency calculations end */ +/****************************************************************************/ +void ETW::SecurityLog::FireModuleTransparencyComputationEnd(LPCWSTR wszModuleName, + DWORD dwAppDomain, + BOOL fIsAllCritical, + BOOL fIsAllTransparent, + BOOL fIsTreatAsSafe, + BOOL fIsOpportunisticallyCritical, + DWORD dwSecurityRuleSet) +{ + WRAPPER_NO_CONTRACT; +#ifndef FEATURE_DTRACE + FireEtwModuleTransparencyComputationEnd(wszModuleName, dwAppDomain, fIsAllCritical, fIsAllTransparent, fIsTreatAsSafe, fIsOpportunisticallyCritical, dwSecurityRuleSet, GetClrInstanceId()); +#else // FEATURE_DTRACE + CHAR szDtraceOutput1[DTRACE_OUTPUT_STRING_LEN]; + // since DTrace does not support UNICODE string, they need to be converted to ANSI string + INT32 nSizeModule = WideCharToMultiByte(wszModuleName, szDtraceOutput1); + + if (nSizeModule != 0) + FireEtwModuleTransparencyComputationEnd(szDtraceOutput1, dwAppDomain, fIsAllCritical, fIsAllTransparent, fIsTreatAsSafe, fIsOpportunisticallyCritical, dwSecurityRuleSet, GetClrInstanceId()); +#endif // !FEATURE_DTRACE +} + +/****************************************************************************/ +/* This is called by the runtime when token transparency calculations begin */ +/****************************************************************************/ +void ETW::SecurityLog::FireTokenTransparencyComputationStart(DWORD dwToken, + LPCWSTR wszModuleName, + DWORD dwAppDomain) +{ + WRAPPER_NO_CONTRACT; +#ifndef FEATURE_DTRACE + FireEtwTokenTransparencyComputationStart(dwToken, wszModuleName, dwAppDomain, GetClrInstanceId()); +#else // FEATURE_DTRACE + CHAR szDtraceOutput1[DTRACE_OUTPUT_STRING_LEN]; + // since DTrace does not support UNICODE string, they need to be converted to ANSI string + INT32 nSizeModule = WideCharToMultiByte(wszModuleName, szDtraceOutput1); + + if (nSizeModule != 0) + FireEtwTokenTransparencyComputationStart(dwToken, szDtraceOutput1, dwAppDomain, GetClrInstanceId()); +#endif // !FEATURE_DTRACE +} + +/****************************************************************************/ +/* This is called by the runtime when token transparency calculations end */ +/****************************************************************************/ +void ETW::SecurityLog::FireTokenTransparencyComputationEnd(DWORD dwToken, + LPCWSTR wszModuleName, + DWORD dwAppDomain, + BOOL fIsCritical, + BOOL fIsTreatAsSafe) +{ + WRAPPER_NO_CONTRACT; +#ifndef FEATURE_DTRACE + FireEtwTokenTransparencyComputationEnd(dwToken, wszModuleName, dwAppDomain, fIsCritical, fIsTreatAsSafe, GetClrInstanceId()); +#else // FEATURE_DTRACE + CHAR szDtraceOutput1[DTRACE_OUTPUT_STRING_LEN]; + // since DTrace does not support UNICODE string, they need to be converted to ANSI string + INT32 nSizeModule = WideCharToMultiByte(wszModuleName, szDtraceOutput1); + + if (nSizeModule != 0) + FireEtwTokenTransparencyComputationEnd(dwToken, szDtraceOutput1, dwAppDomain, fIsCritical, fIsTreatAsSafe, GetClrInstanceId()); +#endif // !FEATURE_DTRACE +} + +/*****************************************************************************/ +/* This is called by the runtime when type transparency calculations begin */ +/*****************************************************************************/ +void ETW::SecurityLog::FireTypeTransparencyComputationStart(LPCWSTR wszTypeName, + LPCWSTR wszModuleName, + DWORD dwAppDomain) +{ + WRAPPER_NO_CONTRACT; +#ifndef FEATURE_DTRACE + FireEtwTypeTransparencyComputationStart(wszTypeName, wszModuleName, dwAppDomain, GetClrInstanceId()); +#else // FEATURE_DTRACE + CHAR szDtraceOutput1[DTRACE_OUTPUT_STRING_LEN]; + CHAR szDtraceOutput2[DTRACE_OUTPUT_STRING_LEN]; + // since DTrace does not support UNICODE string, they need to be converted to ANSI string + INT32 nSizeType = WideCharToMultiByte(wszTypeName, szDtraceOutput1); + INT32 nSizeModule = WideCharToMultiByte(wszModuleName, szDtraceOutput2); + + if (nSizeType != 0 && nSizeModule != 0) + FireEtwTypeTransparencyComputationStart(szDtraceOutput1, szDtraceOutput2, dwAppDomain, GetClrInstanceId()); +#endif // !FEATURE_DTRACE +} + +/****************************************************************************/ +/* This is called by the runtime when type transparency calculations end */ +/****************************************************************************/ +void ETW::SecurityLog::FireTypeTransparencyComputationEnd(LPCWSTR wszTypeName, + LPCWSTR wszModuleName, + DWORD dwAppDomain, + BOOL fIsAllCritical, + BOOL fIsAllTransparent, + BOOL fIsCritical, + BOOL fIsTreatAsSafe) +{ + WRAPPER_NO_CONTRACT; +#ifndef FEATURE_DTRACE + FireEtwTypeTransparencyComputationEnd(wszTypeName, wszModuleName, dwAppDomain, fIsAllCritical, fIsAllTransparent, fIsCritical, fIsTreatAsSafe, GetClrInstanceId()); +#else // FEATURE_DTRACE + CHAR szDtraceOutput1[DTRACE_OUTPUT_STRING_LEN]; + CHAR szDtraceOutput2[DTRACE_OUTPUT_STRING_LEN]; + // since DTrace does not support UNICODE string, they need to be converted to ANSI string + INT32 nSizeType = WideCharToMultiByte(wszTypeName, szDtraceOutput1); + INT32 nSizeModule = WideCharToMultiByte(wszModuleName, szDtraceOutput2); + + if (nSizeType != 0 && nSizeModule != 0) + FireEtwTypeTransparencyComputationEnd(szDtraceOutput1, szDtraceOutput2, dwAppDomain, fIsAllCritical, fIsAllTransparent, fIsCritical, fIsTreatAsSafe, GetClrInstanceId()); +#endif // !FEATURE_DTRACE +} + +/**********************************************************************************/ +/* This is called by the runtime when a module is loaded */ +/* liReportedSharedModule will be 0 when this module is reported for the 1st time */ +/**********************************************************************************/ +void ETW::LoaderLog::ModuleLoad(Module *pModule, LONG liReportedSharedModule) +{ + CONTRACTL { + NOTHROW; + GC_TRIGGERS; + } CONTRACTL_END; + + EX_TRY + { + DWORD enumerationOptions = ETW::EnumerationLog::EnumerationStructs::None; + if(ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, + TRACE_LEVEL_INFORMATION, + KEYWORDZERO)) + { + BOOL bTraceFlagLoaderSet = ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, + TRACE_LEVEL_INFORMATION, + CLR_LOADER_KEYWORD); + BOOL bTraceFlagNgenMethodSet = IsRuntimeNgenKeywordEnabledAndNotSuppressed(); + BOOL bTraceFlagStartRundownSet = ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, + TRACE_LEVEL_INFORMATION, + CLR_STARTENUMERATION_KEYWORD); + BOOL bTraceFlagPerfTrackSet = ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, + TRACE_LEVEL_INFORMATION, + CLR_PERFTRACK_KEYWORD); + + if(liReportedSharedModule == 0) + { + + if(bTraceFlagLoaderSet) + enumerationOptions |= ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleLoad; + if (bTraceFlagPerfTrackSet) + enumerationOptions |= ETW::EnumerationLog::EnumerationStructs::ModuleRangeLoad; + if(bTraceFlagNgenMethodSet && bTraceFlagStartRundownSet) + enumerationOptions |= ETW::EnumerationLog::EnumerationStructs::NgenMethodLoad; + + if(pModule->IsManifest() && bTraceFlagLoaderSet) + ETW::LoaderLog::SendAssemblyEvent(pModule->GetAssembly(), enumerationOptions); + + if(bTraceFlagLoaderSet || bTraceFlagPerfTrackSet) + ETW::LoaderLog::SendModuleEvent(pModule, ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleLoad | ETW::EnumerationLog::EnumerationStructs::ModuleRangeLoad); + + ETW::EnumerationLog::EnumerationHelper(pModule, NULL, enumerationOptions); + } + + // we want to report domainmodule events whenever they are loaded in any AppDomain + if(bTraceFlagLoaderSet) + ETW::LoaderLog::SendModuleEvent(pModule, ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleLoad, TRUE); + } + +#if !defined(FEATURE_PAL) + { + BOOL bTraceFlagPerfTrackPrivateSet = ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_PRIVATE_PROVIDER_Context, + TRACE_LEVEL_INFORMATION, + CLR_PERFTRACK_PRIVATE_KEYWORD); + if (liReportedSharedModule == 0 && bTraceFlagPerfTrackPrivateSet) + { + enumerationOptions |= ETW::EnumerationLog::EnumerationStructs::ModuleRangeLoadPrivate; + ETW::LoaderLog::SendModuleRange(pModule, enumerationOptions); + } + } +#endif + } EX_CATCH { } EX_END_CATCH(SwallowAllExceptions); +} + +/****************************************************************************/ +/* This is called by the runtime when the process is being shutdown */ +/****************************************************************************/ +void ETW::EnumerationLog::ProcessShutdown() +{ + CONTRACTL { + NOTHROW; + GC_TRIGGERS; + } CONTRACTL_END; + + EX_TRY + { + if(ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, TRACE_LEVEL_INFORMATION, KEYWORDZERO)) + { + DWORD enumerationOptions = GetEnumerationOptionsFromRuntimeKeywords(); + + // Send unload events for all remaining domains, including shared domain and + // default domain. + ETW::EnumerationLog::EnumerationHelper(NULL /* module filter */, NULL /* domain filter */, enumerationOptions); + } + } EX_CATCH { } EX_END_CATCH(SwallowAllExceptions); +} + +/****************************************************************************/ +/****************************************************************************/ +/* Begining of helper functions */ +/****************************************************************************/ +/****************************************************************************/ + +/****************************************************************************/ +/* This routine is used to send a domain load/unload or rundown event */ +/****************************************************************************/ +void ETW::LoaderLog::SendDomainEvent(BaseDomain *pBaseDomain, DWORD dwEventOptions, LPCWSTR wszFriendlyName) +{ + CONTRACTL { + THROWS; + GC_TRIGGERS; + } CONTRACTL_END; + + if(!pBaseDomain) + return; + +#ifndef FEATURE_DTRACE + PCWSTR szDtraceOutput1=L""; +#else + CHAR szDtraceOutput1[DTRACE_OUTPUT_STRING_LEN]; +#endif // !FEATURE_DTRACE + BOOL bIsDefaultDomain = pBaseDomain->IsDefaultDomain(); + BOOL bIsAppDomain = pBaseDomain->IsAppDomain(); + BOOL bIsExecutable = bIsAppDomain ? !(pBaseDomain->AsAppDomain()->IsPassiveDomain()) : FALSE; + BOOL bIsSharedDomain = pBaseDomain->IsSharedDomain(); + UINT32 uSharingPolicy = bIsAppDomain?(pBaseDomain->AsAppDomain()->GetSharePolicy()):0; + + ULONGLONG ullDomainId = (ULONGLONG)pBaseDomain; + ULONG ulDomainFlags = ((bIsDefaultDomain ? ETW::LoaderLog::LoaderStructs::DefaultDomain : 0) | + (bIsExecutable ? ETW::LoaderLog::LoaderStructs::ExecutableDomain : 0) | + (bIsSharedDomain ? ETW::LoaderLog::LoaderStructs::SharedDomain : 0) | + (uSharingPolicy<<28)); + + LPCWSTR wsEmptyString = L""; + LPCWSTR wsSharedString = L"SharedDomain"; + + LPWSTR lpswzDomainName = (LPWSTR)wsEmptyString; + + if(bIsAppDomain) + { + if(wszFriendlyName) + lpswzDomainName = (PWCHAR)wszFriendlyName; + else + lpswzDomainName = (PWCHAR)pBaseDomain->AsAppDomain()->GetFriendlyName(); + } + else + lpswzDomainName = (LPWSTR)wsSharedString; + + /* prepare events args for ETW and ETM */ +#ifndef FEATURE_DTRACE + szDtraceOutput1 = (PCWSTR)lpswzDomainName; +#else // !FEATURE_DTRACE + // since DTrace do not support UNICODE string, they need to be converted to ANSI string + INT32 nSize = WideCharToMultiByte(lpswzDomainName, szDtraceOutput1); + if (nSize == 0) + return; +#endif // !FEATURE_DTRACE + + if(dwEventOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleLoad) + { + FireEtwAppDomainLoad_V1(ullDomainId, ulDomainFlags, szDtraceOutput1, pBaseDomain->GetId().m_dwId, GetClrInstanceId()); + } + else if(dwEventOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleUnload) + { + FireEtwAppDomainUnload_V1(ullDomainId, ulDomainFlags, szDtraceOutput1, pBaseDomain->GetId().m_dwId, GetClrInstanceId()); + } + else if(dwEventOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleDCStart) + { + FireEtwAppDomainDCStart_V1(ullDomainId, ulDomainFlags, szDtraceOutput1, pBaseDomain->GetId().m_dwId, GetClrInstanceId()); + } + else if(dwEventOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleDCEnd) + { + FireEtwAppDomainDCEnd_V1(ullDomainId, ulDomainFlags, szDtraceOutput1, pBaseDomain->GetId().m_dwId, GetClrInstanceId()); + } + else + { + _ASSERTE((dwEventOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleLoad) || + (dwEventOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleUnload) || + (dwEventOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleDCStart) || + (dwEventOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleDCEnd)); + } +} + +/********************************************************/ +/* This routine is used to send thread rundown events when ARM is enabled */ +/********************************************************/ +void ETW::EnumerationLog::SendThreadRundownEvent() +{ + CONTRACTL { + THROWS; + GC_TRIGGERS; + } CONTRACTL_END; + +#ifndef DACCESS_COMPILE + Thread *pThread = NULL; + + // Take the thread store lock while we enumerate threads. + ThreadStoreLockHolder tsl; + while ((pThread = ThreadStore::GetThreadList(pThread)) != NULL) + { + if (pThread->IsUnstarted() || pThread->IsDead()) + continue; + + // Send thread rundown provider events and thread created runtime provider + // events (depending on which are enabled) + ThreadLog::FireThreadDC(pThread); + ThreadLog::FireThreadCreated(pThread); + } +#endif // !DACCESS_COMPILE +} + +/****************************************************************************/ +/* This routine is used to send an assembly load/unload or rundown event ****/ +/****************************************************************************/ +void ETW::LoaderLog::SendAssemblyEvent(Assembly *pAssembly, DWORD dwEventOptions) +{ + CONTRACTL { + THROWS; + GC_TRIGGERS; + } CONTRACTL_END; + + if(!pAssembly) + return; + +#ifndef FEATURE_DTRACE + PCWSTR szDtraceOutput1=L""; +#else + CHAR szDtraceOutput1[DTRACE_OUTPUT_STRING_LEN]; +#endif // !FEATURE_DTRACE + BOOL bIsDynamicAssembly = pAssembly->IsDynamic(); + BOOL bIsCollectibleAssembly = pAssembly->IsCollectible(); + BOOL bIsDomainNeutral = pAssembly->IsDomainNeutral() ; + BOOL bHasNativeImage = pAssembly->GetManifestFile()->HasNativeImage(); + + ULONGLONG ullAssemblyId = (ULONGLONG)pAssembly; + ULONGLONG ullDomainId = (ULONGLONG)pAssembly->GetDomain(); + ULONGLONG ullBindingID = 0; +#if (defined FEATURE_PREJIT) && (defined FEATURE_FUSION_DEPRECATE) + ullBindingID = pAssembly->GetManifestFile()->GetBindingID(); +#endif + ULONG ulAssemblyFlags = ((bIsDomainNeutral ? ETW::LoaderLog::LoaderStructs::DomainNeutralAssembly : 0) | + (bIsDynamicAssembly ? ETW::LoaderLog::LoaderStructs::DynamicAssembly : 0) | + (bHasNativeImage ? ETW::LoaderLog::LoaderStructs::NativeAssembly : 0) | + (bIsCollectibleAssembly ? ETW::LoaderLog::LoaderStructs::CollectibleAssembly : 0)); + + SString sAssemblyPath; + pAssembly->GetDisplayName(sAssemblyPath); + LPWSTR lpszAssemblyPath = (LPWSTR)sAssemblyPath.GetUnicode(); + +/* prepare events args for ETW and ETM */ +#ifndef FEATURE_DTRACE + szDtraceOutput1 = (PCWSTR)lpszAssemblyPath; +#else // !FEATURE_DTRACE + // since DTrace do not support UNICODE string, they need to be converted to ANSI string + INT32 nSize = WideCharToMultiByte(lpszAssemblyPath, szDtraceOutput1); + if (nSize == 0) + return; +#endif // !FEATURE_DTRACE + + if(dwEventOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleLoad) + { + FireEtwAssemblyLoad_V1(ullAssemblyId, ullDomainId, ullBindingID, ulAssemblyFlags, szDtraceOutput1, GetClrInstanceId()); + } + else if(dwEventOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleUnload) + { + FireEtwAssemblyUnload_V1(ullAssemblyId, ullDomainId, ullBindingID, ulAssemblyFlags, szDtraceOutput1, GetClrInstanceId()); + } + else if(dwEventOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleDCStart) + { + FireEtwAssemblyDCStart_V1(ullAssemblyId, ullDomainId, ullBindingID, ulAssemblyFlags, szDtraceOutput1, GetClrInstanceId()); + } + else if(dwEventOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleDCEnd) + { + FireEtwAssemblyDCEnd_V1(ullAssemblyId, ullDomainId, ullBindingID, ulAssemblyFlags, szDtraceOutput1, GetClrInstanceId()); + } + else + { + _ASSERTE((dwEventOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleLoad) || + (dwEventOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleUnload) || + (dwEventOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleDCStart) || + (dwEventOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleDCEnd)); + } +} + +#if !defined(FEATURE_PAL) +ETW_INLINE + ULONG + ETW::LoaderLog::SendModuleRange( + __in Module *pModule, + __in DWORD dwEventOptions) + +{ + ULONG Result = ERROR_SUCCESS; + + + // do not fire the ETW event when: + // 1. We did not load the native image + // 2. We do not have IBC data for the native image + if( !pModule || !pModule->HasNativeImage() || !pModule->IsIbcOptimized() ) + { + return Result; + } + + // get information about the hot sections from the native image that has been loaded + COUNT_T cbSizeOfSectionTable; + CORCOMPILE_VIRTUAL_SECTION_INFO* pVirtualSectionsTable = (CORCOMPILE_VIRTUAL_SECTION_INFO* )pModule->GetNativeImage()->GetVirtualSectionsTable(&cbSizeOfSectionTable); + + COUNT_T RangeCount = cbSizeOfSectionTable/sizeof(CORCOMPILE_VIRTUAL_SECTION_INFO); + + // if we do not have any hot ranges, we do not fire the ETW event + + // Figure out the rest of the event data + UINT16 ClrInstanceId = GetClrInstanceId(); + UINT64 ModuleID = (ULONGLONG)(TADDR) pModule; + + for (COUNT_T i = 0; i < RangeCount; ++i) + { + DWORD rangeBegin = pVirtualSectionsTable[i].VirtualAddress; + DWORD rangeSize = pVirtualSectionsTable[i].Size; + DWORD sectionType = pVirtualSectionsTable[i].SectionType; + + UINT8 ibcType = VirtualSectionData::IBCType(sectionType); + UINT8 rangeType = VirtualSectionData::RangeType(sectionType); + UINT16 virtualSectionType = VirtualSectionData::VirtualSectionType(sectionType); + BOOL isIBCProfiledColdSection = VirtualSectionData::IsIBCProfiledColdSection(sectionType); + if (dwEventOptions & ETW::EnumerationLog::EnumerationStructs::ModuleRangeLoad) + { + if (isIBCProfiledColdSection) + Result &= FireEtwModuleRangeLoad(ClrInstanceId, ModuleID, rangeBegin, rangeSize, rangeType); + } + else if (dwEventOptions & ETW::EnumerationLog::EnumerationStructs::ModuleRangeDCStart) + { + if (isIBCProfiledColdSection) + Result &= FireEtwModuleRangeDCStart(ClrInstanceId, ModuleID, rangeBegin, rangeSize, rangeType); + } + else if (dwEventOptions & ETW::EnumerationLog::EnumerationStructs::ModuleRangeDCEnd) + { + if (isIBCProfiledColdSection) + Result &= FireEtwModuleRangeDCEnd(ClrInstanceId, ModuleID, rangeBegin, rangeSize, rangeType); + } + // Fire private events if they are requested. + if (dwEventOptions & ETW::EnumerationLog::EnumerationStructs::ModuleRangeLoadPrivate) + { + Result &= FireEtwModuleRangeLoadPrivate(ClrInstanceId, ModuleID, rangeBegin, rangeSize, rangeType, ibcType, virtualSectionType); + } + } + return Result; +} +#endif // !FEATURE_PAL + +#ifndef FEATURE_DTRACE +//--------------------------------------------------------------------------------------- +// +// Helper that takes a module, and returns the managed and native PDB information +// corresponding to that module. Used by the routine that fires the module load / unload +// events. +// +// Arguments: +// * pModule - Module to examine +// * pCvInfoIL - [out] CV_INFO_PDB70 corresponding to managed PDB for this module +// (the last debug directory entry in the PE File), if it exists. If it doesn't +// exist, this is zeroed out. +// * pCvInfoNative - [out] CV_INFO_PDB70 corresponding to native NGEN PDB for this +// module (the next-to-last debug directory entry in the PE File), if it exists. +// If it doesn't exist, this is zeroed out. +// +// Notes: +// * This method only understands the CV_INFO_PDB70 / RSDS format. If the format +// changes, this function will act as if there are no debug directory entries. +// Module load / unload events will still be fired, but all PDB info will be +// zeroed out. +// * The raw data in the PE file's debug directory entries are assumed to be +// untrusted, and reported sizes of buffers are verified against their data. +// + +static void GetCodeViewInfo(Module * pModule, CV_INFO_PDB70 * pCvInfoIL, CV_INFO_PDB70 * pCvInfoNative) +{ + LIMITED_METHOD_CONTRACT; + + _ASSERTE (pModule != NULL); + _ASSERTE (pCvInfoIL != NULL); + _ASSERTE (pCvInfoNative != NULL); + + ZeroMemory(pCvInfoIL, sizeof(*pCvInfoIL)); + ZeroMemory(pCvInfoNative, sizeof(*pCvInfoNative)); + + PTR_PEFile pPEFile = pModule->GetFile(); + _ASSERTE(pPEFile != NULL); + + PTR_PEImageLayout pLayout = NULL; + if (pPEFile->HasNativeImage()) + { + pLayout = pPEFile->GetLoadedNative(); + } + else if (pPEFile->HasOpenedILimage()) + { + pLayout = pPEFile->GetLoadedIL(); + } + + if (pLayout == NULL) + { + // This can happen for reflection-loaded modules + return; + } + + if (!pLayout->HasNTHeaders()) + { + // Without NT headers, we'll have a tough time finding the debug directory + // entries. This can happen for nlp files. + return; + } + + if (!pLayout->HasDirectoryEntry(IMAGE_DIRECTORY_ENTRY_DEBUG)) + return; + + COUNT_T cbDebugEntries; + IMAGE_DEBUG_DIRECTORY * rgDebugEntries = + (IMAGE_DEBUG_DIRECTORY *) pLayout->GetDirectoryEntryData(IMAGE_DIRECTORY_ENTRY_DEBUG, &cbDebugEntries); + + if (cbDebugEntries < sizeof(IMAGE_DEBUG_DIRECTORY)) + return; + + // Since rgDebugEntries is an array of IMAGE_DEBUG_DIRECTORYs, cbDebugEntries + // should be a multiple of sizeof(IMAGE_DEBUG_DIRECTORY). + if (cbDebugEntries % sizeof(IMAGE_DEBUG_DIRECTORY) != 0) + return; + + // Temporary storage for a CV_INFO_PDB70 and its size (which could be less than + // sizeof(CV_INFO_PDB70); see below). + struct PdbInfo + { + CV_INFO_PDB70 * m_pPdb70; + ULONG m_cbPdb70; + }; + + // Iterate through all debug directory entries. The very last one will be the + // managed PDB entry. The next to last one (if it exists) will be the (native) NGEN + // PDB entry. Treat raw bytes we read as untrusted. + PdbInfo pdbInfoLast = {0}; + PdbInfo pdbInfoNextToLast = {0}; + int cEntries = cbDebugEntries / sizeof(IMAGE_DEBUG_DIRECTORY); + for (int i = 0; i < cEntries; i++) + { + if (rgDebugEntries[i].Type != IMAGE_DEBUG_TYPE_CODEVIEW) + continue; + + // Get raw data pointed to by this IMAGE_DEBUG_DIRECTORY + + // Some compilers set PointerToRawData but not AddressOfRawData as they put the + // data at the end of the file in an unmapped part of the file + RVA rvaOfRawData = (rgDebugEntries[i].AddressOfRawData != NULL) ? + rgDebugEntries[i].AddressOfRawData : + pLayout->OffsetToRva(rgDebugEntries[i].PointerToRawData); + + ULONG cbDebugData = rgDebugEntries[i].SizeOfData; + if (cbDebugData < (offsetof(CV_INFO_PDB70, magic) + sizeof(((CV_INFO_PDB70*)0)->magic))) + { + // raw data too small to contain magic number at expected spot, so its format + // is not recognizeable. Skip + continue; + } + + if (!pLayout->CheckRva(rvaOfRawData, cbDebugData)) + { + // Memory claimed to belong to the raw data does not fit. + // IMAGE_DEBUG_DIRECTORY is outright corrupt. Do not include PDB info in + // event at all. + return; + } + + // Verify the magic number is as expected + CV_INFO_PDB70 * pPdb70 = (CV_INFO_PDB70 *) pLayout->GetRvaData(rvaOfRawData); + if (pPdb70->magic != CV_SIGNATURE_RSDS) + { + // Unrecognized magic number. Skip + continue; + } + + // From this point forward, the format should adhere to the expected layout of + // CV_INFO_PDB70. If we find otherwise, then assume the IMAGE_DEBUG_DIRECTORY is + // outright corrupt, and do not include PDB info in event at all. The caller will + // still fire the module event, but have zeroed-out / empty PDB fields. + + // Verify sane size of raw data + if (cbDebugData > sizeof(CV_INFO_PDB70)) + return; + + // cbDebugData actually can be < sizeof(CV_INFO_PDB70), since the "path" field + // can be truncated to its actual data length (i.e., fewer than MAX_PATH chars + // may be present in the PE file). In some cases, though, cbDebugData will + // include all MAX_PATH chars even though path gets null-terminated well before + // the MAX_PATH limit. + + // Gotta have at least one byte of the path + if (cbDebugData < offsetof(CV_INFO_PDB70, path) + sizeof(char)) + return; + + // How much space is available for the path? + size_t cchPathMaxIncludingNullTerminator = (cbDebugData - offsetof(CV_INFO_PDB70, path)) / sizeof(char); + _ASSERTE(cchPathMaxIncludingNullTerminator >= 1); // Guaranteed above + + // Verify path string fits inside the declared size + size_t cchPathActualExcludingNullTerminator = strnlen(pPdb70->path, cchPathMaxIncludingNullTerminator); + if (cchPathActualExcludingNullTerminator == cchPathMaxIncludingNullTerminator) + { + // This is how strnlen indicates failure--it couldn't find the null + // terminator within the buffer size specified + return; + } + + // Looks valid. Remember it. + pdbInfoNextToLast = pdbInfoLast; + pdbInfoLast.m_pPdb70 = pPdb70; + pdbInfoLast.m_cbPdb70 = cbDebugData; + } + + // Return whatever we found + + if (pdbInfoLast.m_pPdb70 != NULL) + { + // The last guy is the IL (managed) PDB info + _ASSERTE(pdbInfoLast.m_cbPdb70 <= sizeof(*pCvInfoIL)); // Guaranteed by checks above + memcpy(pCvInfoIL, pdbInfoLast.m_pPdb70, pdbInfoLast.m_cbPdb70); + } + + if (pdbInfoNextToLast.m_pPdb70 != NULL) + { + // The next-to-last guy is the NGEN (native) PDB info + _ASSERTE(pdbInfoNextToLast.m_cbPdb70 <= sizeof(*pCvInfoNative)); // Guaranteed by checks above + memcpy(pCvInfoNative, pdbInfoNextToLast.m_pPdb70, pdbInfoNextToLast.m_cbPdb70); + } +} +#endif // FEATURE_DTRACE + + + +//--------------------------------------------------------------------------------------- +// +// send a module load/unload or rundown event and domainmodule load and rundown event +// +// Arguments: +// * pModule - Module loading or unloading +// * dwEventOptions - Bitmask of which events to fire +// * bFireDomainModuleEvents - nonzero if we are to fire DomainModule events; zero +// if we are to fire Module events +// +void ETW::LoaderLog::SendModuleEvent(Module *pModule, DWORD dwEventOptions, BOOL bFireDomainModuleEvents) +{ + CONTRACTL { + THROWS; + GC_TRIGGERS; + } CONTRACTL_END; + + if(!pModule) + return; + +#ifndef FEATURE_DTRACE + PCWSTR szDtraceOutput1=L"",szDtraceOutput2=L""; +#else + CHAR szDtraceOutput1[DTRACE_OUTPUT_STRING_LEN]; + CHAR szDtraceOutput2[DTRACE_OUTPUT_STRING_LEN]; +#endif // !FEATURE_DTRACE + BOOL bIsDynamicAssembly = pModule->GetAssembly()->IsDynamic(); + BOOL bHasNativeImage = FALSE; +#ifdef FEATURE_PREJIT + bHasNativeImage = pModule->HasNativeImage(); +#endif // FEATURE_PREJIT + BOOL bIsManifestModule = pModule->IsManifest(); + ULONGLONG ullAppDomainId = 0; // This is used only with DomainModule events + ULONGLONG ullModuleId = (ULONGLONG)(TADDR) pModule; + ULONGLONG ullAssemblyId = (ULONGLONG)pModule->GetAssembly(); + BOOL bIsDomainNeutral = pModule->GetAssembly()->IsDomainNeutral(); + BOOL bIsIbcOptimized = FALSE; + if(bHasNativeImage) + { + bIsIbcOptimized = pModule->IsIbcOptimized(); + } + ULONG ulReservedFlags = 0; + ULONG ulFlags = ((bIsDomainNeutral ? ETW::LoaderLog::LoaderStructs::DomainNeutralModule : 0) | + (bHasNativeImage ? ETW::LoaderLog::LoaderStructs::NativeModule : 0) | + (bIsDynamicAssembly ? ETW::LoaderLog::LoaderStructs::DynamicModule : 0) | + (bIsManifestModule ? ETW::LoaderLog::LoaderStructs::ManifestModule : 0) | + (bIsIbcOptimized ? ETW::LoaderLog::LoaderStructs::IbcOptimized : 0)); + +#ifndef FEATURE_DTRACE + // Grab PDB path, guid, and age for managed PDB and native (NGEN) PDB when + // available. Any failures are not fatal. The corresponding PDB info will remain + // zeroed out, and that's what we'll include in the event. + CV_INFO_PDB70 cvInfoIL = {0}; + CV_INFO_PDB70 cvInfoNative = {0}; + GetCodeViewInfo(pModule, &cvInfoIL, &cvInfoNative); +#endif // FEATURE_DTRACE + + PWCHAR ModuleILPath=L"", ModuleNativePath=L""; + + if(bFireDomainModuleEvents) + { + if(pModule->GetDomain()->IsSharedDomain()) // for shared domains, we do not fire domainmodule event + return; + ullAppDomainId = (ULONGLONG)pModule->FindDomainAssembly(pModule->GetDomain()->AsAppDomain())->GetAppDomain(); + } + + LPCWSTR pEmptyString = L""; +#ifndef FEATURE_PAL + SString moduleName = L""; +#else // !FEATURE_PAL + SString moduleName; +#endif // !FEATURE_PAL + if(!bIsDynamicAssembly) + { + ModuleILPath = (PWCHAR)pModule->GetAssembly()->GetManifestFile()->GetILimage()->GetPath().GetUnicode(); + ModuleNativePath = (PWCHAR)pEmptyString; + +#ifdef FEATURE_PREJIT + if(bHasNativeImage) + ModuleNativePath = (PWCHAR)pModule->GetNativeImage()->GetPath().GetUnicode(); +#endif // FEATURE_PREJIT + } + + // if we do not have a module path yet, we put the module name + if(bIsDynamicAssembly || ModuleILPath==NULL || wcslen(ModuleILPath) <= 2) + { + moduleName.SetUTF8(pModule->GetSimpleName()); + ModuleILPath = (PWCHAR)moduleName.GetUnicode(); + ModuleNativePath = (PWCHAR)pEmptyString; + } + + /* prepare events args for ETW and ETM */ +#ifndef FEATURE_DTRACE + szDtraceOutput1 = (PCWSTR)ModuleILPath; + szDtraceOutput2 = (PCWSTR)ModuleNativePath; + + // Convert PDB paths to UNICODE + StackSString managedPdbPath(SString::Utf8, cvInfoIL.path); + StackSString nativePdbPath(SString::Utf8, cvInfoNative.path); +#else // !FEATURE_DTRACE + // since DTrace do not support UNICODE string, they need to be converted to ANSI string + INT32 nSizeOfILPath = WideCharToMultiByte(ModuleILPath, szDtraceOutput1); + if (nSizeOfILPath == 0) + return; + INT32 nSizeOfNativePath = WideCharToMultiByte(ModuleNativePath, szDtraceOutput2); + if (nSizeOfNativePath == 0) + return; +#endif // !FEATURE_DTRACE + + if(bFireDomainModuleEvents) + { + if(dwEventOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleLoad) + { + FireEtwDomainModuleLoad_V1(ullModuleId, ullAssemblyId, ullAppDomainId, ulFlags, ulReservedFlags, szDtraceOutput1, szDtraceOutput2, GetClrInstanceId()); + } + else if(dwEventOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleDCStart) + { + FireEtwDomainModuleDCStart_V1(ullModuleId, ullAssemblyId, ullAppDomainId, ulFlags, ulReservedFlags, szDtraceOutput1, szDtraceOutput2, GetClrInstanceId()); + } + else if(dwEventOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleDCEnd) + { + FireEtwDomainModuleDCEnd_V1(ullModuleId, ullAssemblyId, ullAppDomainId, ulFlags, ulReservedFlags, szDtraceOutput1, szDtraceOutput2, GetClrInstanceId()); + } + else + { + _ASSERTE((dwEventOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleLoad) || + (dwEventOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleDCStart) || + (dwEventOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleDCEnd)); + } + } + else + { + if((dwEventOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleLoad) || (dwEventOptions & ETW::EnumerationLog::EnumerationStructs::ModuleRangeLoad)) + { + FireEtwModuleLoad_V1_or_V2(ullModuleId, ullAssemblyId, ulFlags, ulReservedFlags, szDtraceOutput1, szDtraceOutput2, GetClrInstanceId(), &cvInfoIL.signature, cvInfoIL.age, managedPdbPath, &cvInfoNative.signature, cvInfoNative.age, nativePdbPath); + } + else if(dwEventOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleUnload) + { + FireEtwModuleUnload_V1_or_V2(ullModuleId, ullAssemblyId, ulFlags, ulReservedFlags, szDtraceOutput1, szDtraceOutput2, GetClrInstanceId(), &cvInfoIL.signature, cvInfoIL.age, managedPdbPath, &cvInfoNative.signature, cvInfoNative.age, nativePdbPath); + } + else if((dwEventOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleDCStart) || (dwEventOptions & ETW::EnumerationLog::EnumerationStructs::ModuleRangeDCStart)) + { + FireEtwModuleDCStart_V1_or_V2(ullModuleId, ullAssemblyId, ulFlags, ulReservedFlags, szDtraceOutput1, szDtraceOutput2, GetClrInstanceId(), &cvInfoIL.signature, cvInfoIL.age, managedPdbPath, &cvInfoNative.signature, cvInfoNative.age, nativePdbPath); + } + else if((dwEventOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleDCEnd) || (dwEventOptions & ETW::EnumerationLog::EnumerationStructs::ModuleRangeDCEnd)) + { + FireEtwModuleDCEnd_V1_or_V2(ullModuleId, ullAssemblyId, ulFlags, ulReservedFlags, szDtraceOutput1, szDtraceOutput2, GetClrInstanceId(), &cvInfoIL.signature, cvInfoIL.age, managedPdbPath, &cvInfoNative.signature, cvInfoNative.age, nativePdbPath); + } + else + { + _ASSERTE((dwEventOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleLoad) || + (dwEventOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleUnload) || + (dwEventOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleDCStart) || + (dwEventOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleDCEnd) || + (dwEventOptions & ETW::EnumerationLog::EnumerationStructs::ModuleRangeEnabledAny)); + + } +#if !defined(FEATURE_PAL) + if (dwEventOptions & ETW::EnumerationLog::EnumerationStructs::ModuleRangeEnabledAny) + { + // Fire ModuleRangeLoad, ModuleRangeDCStart, ModuleRangeDCEnd or ModuleRangeLoadPrivate event for this Module + SendModuleRange(pModule, dwEventOptions); + } +#endif + } +} + +/*****************************************************************/ +/* This routine is used to send an ETW event just before a method starts jitting*/ +/*****************************************************************/ +void ETW::MethodLog::SendMethodJitStartEvent(MethodDesc *pMethodDesc, SString *namespaceOrClassName, SString *methodName, SString *methodSignature) +{ + CONTRACTL { + THROWS; + GC_TRIGGERS; + } CONTRACTL_END; + + Module *pModule = NULL; + Module *pLoaderModule = NULL; // This must not be used except for getting the ModuleID + + ULONGLONG ullMethodIdentifier=0; + ULONGLONG ullModuleID=0; + ULONG ulMethodToken=0; + ULONG ulMethodILSize=0; +#ifndef FEATURE_DTRACE + PCWSTR szDtraceOutput1=L"",szDtraceOutput2=L"",szDtraceOutput3=L""; +#else + CHAR szDtraceOutput1[DTRACE_OUTPUT_STRING_LEN]; + CHAR szDtraceOutput2[DTRACE_OUTPUT_STRING_LEN]; + CHAR szDtraceOutput3[DTRACE_OUTPUT_STRING_LEN]; +#endif // !FEATURE_DTRACE + + if(pMethodDesc) { + pModule = pMethodDesc->GetModule_NoLogging(); + + if(!pMethodDesc->IsRestored()) { + return; + } + + bool bIsDynamicMethod = pMethodDesc->IsDynamicMethod(); + BOOL bIsGenericMethod = FALSE; + if(pMethodDesc->GetMethodTable_NoLogging()) + bIsGenericMethod = pMethodDesc->HasClassOrMethodInstantiation_NoLogging(); + + ullModuleID = (ULONGLONG)(TADDR) pModule; + ullMethodIdentifier = (ULONGLONG)pMethodDesc; + + // Use MethodDesc if Dynamic or Generic methods + if( bIsDynamicMethod || bIsGenericMethod) + { + if(bIsGenericMethod) + ulMethodToken = (ULONG)pMethodDesc->GetMemberDef_NoLogging(); + if(bIsDynamicMethod) // if its a generic and a dynamic method, we would set the methodtoken to 0 + ulMethodToken = (ULONG)0; + } + else + ulMethodToken = (ULONG)pMethodDesc->GetMemberDef_NoLogging(); + + if(pMethodDesc->IsIL()) + { + COR_ILMETHOD_DECODER::DecoderStatus decoderstatus = COR_ILMETHOD_DECODER::FORMAT_ERROR; + COR_ILMETHOD_DECODER ILHeader(pMethodDesc->GetILHeader(), pMethodDesc->GetMDImport(), &decoderstatus); + ulMethodILSize = (ULONG)ILHeader.GetCodeSize(); + } + + SString tNamespace, tMethodName, tMethodSignature; + if(!namespaceOrClassName|| !methodName|| !methodSignature || (methodName->IsEmpty() && namespaceOrClassName->IsEmpty() && methodSignature->IsEmpty())) + { + pMethodDesc->GetMethodInfo(tNamespace, tMethodName, tMethodSignature); + namespaceOrClassName = &tNamespace; + methodName = &tMethodName; + methodSignature = &tMethodSignature; + } + + // fire method information + /* prepare events args for ETW and ETM */ +#ifndef FEATURE_DTRACE + szDtraceOutput1 = (PCWSTR)namespaceOrClassName->GetUnicode(); + szDtraceOutput2 = (PCWSTR)methodName->GetUnicode(); + szDtraceOutput3 = (PCWSTR)methodSignature->GetUnicode(); +#else // !FEATURE_DTRACE + // since DTrace do not support UNICODE string, they need to be converted to ANSI string + INT32 nSizeOfNamespaceOrClassName = WideCharToMultiByte((PCWSTR)namespaceOrClassName->GetUnicode(), szDtraceOutput1); + if (nSizeOfNamespaceOrClassName == 0) + return; + INT32 nSizeOfMethodName = WideCharToMultiByte((PCWSTR)methodName->GetUnicode(), szDtraceOutput2); + if (nSizeOfMethodName == 0) + return; + INT32 nSizeMethodsignature = WideCharToMultiByte((PCWSTR)methodSignature->GetUnicode(), szDtraceOutput3); + if (nSizeMethodsignature == 0) + return; +#endif // !FEATURE_DTRACE + + FireEtwMethodJittingStarted_V1(ullMethodIdentifier, + ullModuleID, + ulMethodToken, + ulMethodILSize, + szDtraceOutput1, + szDtraceOutput2, + szDtraceOutput3, + GetClrInstanceId()); + } +} + +/****************************************************************************/ +/* This routine is used to send a method load/unload or rundown event */ +/****************************************************************************/ +void ETW::MethodLog::SendMethodEvent(MethodDesc *pMethodDesc, DWORD dwEventOptions, BOOL bIsJit, SString *namespaceOrClassName, SString *methodName, SString *methodSignature, SIZE_T pCode, ReJITID rejitID) +{ + CONTRACTL { + THROWS; + GC_NOTRIGGER; + SO_NOT_MAINLINE; + } CONTRACTL_END; + + Module *pModule = NULL; + Module *pLoaderModule = NULL; // This must not be used except for getting the ModuleID + ULONGLONG ullMethodStartAddress=0, ullColdMethodStartAddress=0, ullModuleID=0, ullMethodIdentifier=0; + ULONG ulMethodSize=0, ulColdMethodSize=0, ulMethodToken=0, ulMethodFlags=0, ulColdMethodFlags=0; + PWCHAR pMethodName=NULL, pNamespaceName=NULL, pMethodSignature=NULL; + BOOL bHasNativeImage = FALSE, bShowVerboseOutput = FALSE, bIsDynamicMethod = FALSE, bHasSharedGenericCode = FALSE, bIsGenericMethod = FALSE; +#ifndef FEATURE_DTRACE + PCWSTR szDtraceOutput1=L"",szDtraceOutput2=L"",szDtraceOutput3=L""; +#else + CHAR szDtraceOutput1[DTRACE_OUTPUT_STRING_LEN]; + CHAR szDtraceOutput2[DTRACE_OUTPUT_STRING_LEN]; + CHAR szDtraceOutput3[DTRACE_OUTPUT_STRING_LEN]; +#endif // !FEATURE_DTRACE + + BOOL bIsRundownProvider = ((dwEventOptions & ETW::EnumerationLog::EnumerationStructs::JitMethodDCStart) || + (dwEventOptions & ETW::EnumerationLog::EnumerationStructs::JitMethodDCEnd) || + (dwEventOptions & ETW::EnumerationLog::EnumerationStructs::NgenMethodDCStart) || + (dwEventOptions & ETW::EnumerationLog::EnumerationStructs::NgenMethodDCEnd)); + + BOOL bIsRuntimeProvider = ((dwEventOptions & ETW::EnumerationLog::EnumerationStructs::JitMethodLoad) || + (dwEventOptions & ETW::EnumerationLog::EnumerationStructs::JitMethodUnload) || + (dwEventOptions & ETW::EnumerationLog::EnumerationStructs::NgenMethodLoad) || + (dwEventOptions & ETW::EnumerationLog::EnumerationStructs::NgenMethodUnload)); + + if (pMethodDesc == NULL) + return; + + if(!pMethodDesc->IsRestored()) + { + // Forcibly restoring ngen methods can cause all sorts of deadlocks and contract violations + // These events are therefore put under the private provider + if(ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_PRIVATE_PROVIDER_Context, + TRACE_LEVEL_INFORMATION, + CLR_PRIVATENGENFORCERESTORE_KEYWORD)) + { + PERMANENT_CONTRACT_VIOLATION(GCViolation, ReasonNonShippingCode); + pMethodDesc->CheckRestore(); + } + else + { + return; + } + } + + + if(bIsRundownProvider) + { + bShowVerboseOutput = ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_RUNDOWN_PROVIDER_Context, + TRACE_LEVEL_VERBOSE, + KEYWORDZERO); + } + else if(bIsRuntimeProvider) + { + bShowVerboseOutput = ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, + TRACE_LEVEL_VERBOSE, + KEYWORDZERO); + } + + pModule = pMethodDesc->GetModule_NoLogging(); +#ifdef FEATURE_PREJIT + bHasNativeImage = pModule->HasNativeImage(); +#endif // FEATURE_PREJIT + bIsDynamicMethod = (BOOL)pMethodDesc->IsDynamicMethod(); + bHasSharedGenericCode = pMethodDesc->IsSharedByGenericInstantiations(); + + if(pMethodDesc->GetMethodTable_NoLogging()) + bIsGenericMethod = pMethodDesc->HasClassOrMethodInstantiation_NoLogging(); + + ulMethodFlags = ((ulMethodFlags | + (bHasSharedGenericCode ? ETW::MethodLog::MethodStructs::SharedGenericCode : 0) | + (bIsGenericMethod ? ETW::MethodLog::MethodStructs::GenericMethod : 0) | + (bIsDynamicMethod ? ETW::MethodLog::MethodStructs::DynamicMethod : 0) | + (bIsJit ? ETW::MethodLog::MethodStructs::JittedMethod : 0))); + + // Intentionally set the extent flags (cold vs. hot) only after all the other common + // flags (above) have been set. + ulColdMethodFlags = ulMethodFlags | ETW::MethodLog::MethodStructs::ColdSection; // Method Extent (bits 28, 29, 30, 31) + ulMethodFlags = ulMethodFlags | ETW::MethodLog::MethodStructs::HotSection; // Method Extent (bits 28, 29, 30, 31) + + // MethodDesc ==> Code Address ==>JitMananger + TADDR start = pCode ? pCode : PCODEToPINSTR(pMethodDesc->GetNativeCode()); + if(start == 0) { + // this method hasn't been jitted + return; + } + + // EECodeInfo is technically initialized by a "PCODE", but it can also be initialized + // by a TADDR (i.e., w/out thumb bit set on ARM) + EECodeInfo codeInfo(start); + + // MethodToken ==> MethodRegionInfo + IJitManager::MethodRegionInfo methodRegionInfo; + codeInfo.GetMethodRegionInfo(&methodRegionInfo); + + ullMethodStartAddress = (ULONGLONG)methodRegionInfo.hotStartAddress; + ulMethodSize = (ULONG)methodRegionInfo.hotSize; + + ullModuleID = (ULONGLONG)(TADDR) pModule; + ullMethodIdentifier = (ULONGLONG)pMethodDesc; + + // Use MethodDesc if Dynamic or Generic methods + if( bIsDynamicMethod || bIsGenericMethod) + { + bShowVerboseOutput = TRUE; + if(bIsGenericMethod) + ulMethodToken = (ULONG)pMethodDesc->GetMemberDef_NoLogging(); + if(bIsDynamicMethod) // if its a generic and a dynamic method, we would set the methodtoken to 0 + ulMethodToken = (ULONG)0; + } + else + ulMethodToken = (ULONG)pMethodDesc->GetMemberDef_NoLogging(); + + if(bHasNativeImage) + { + ullColdMethodStartAddress = (ULONGLONG)methodRegionInfo.coldStartAddress; + ulColdMethodSize = (ULONG)methodRegionInfo.coldSize; // methodRegionInfo.coldSize is size_t and info.MethodLoadInfo.MethodSize is 32 bit; will give incorrect values on a 64-bit machine + } + + SString tNamespace, tMethodName, tMethodSignature; + + // if verbose method load info needed, only then + // find method name and signature and fire verbose method load info + if(bShowVerboseOutput) + { + if(!namespaceOrClassName|| !methodName|| !methodSignature || (methodName->IsEmpty() && namespaceOrClassName->IsEmpty() && methodSignature->IsEmpty())) + { + pMethodDesc->GetMethodInfo(tNamespace, tMethodName, tMethodSignature); + namespaceOrClassName = &tNamespace; + methodName = &tMethodName; + methodSignature = &tMethodSignature; + } + pNamespaceName = (PWCHAR)namespaceOrClassName->GetUnicode(); + pMethodName = (PWCHAR)methodName->GetUnicode(); + pMethodSignature = (PWCHAR)methodSignature->GetUnicode(); + } + + BOOL bFireEventForColdSection = (bHasNativeImage && ullColdMethodStartAddress && ulColdMethodSize); + + /* prepare events args for ETW and ETM */ +#ifndef FEATURE_DTRACE + szDtraceOutput1 = (PCWSTR)pNamespaceName; + szDtraceOutput2 = (PCWSTR)pMethodName; + szDtraceOutput3 = (PCWSTR)pMethodSignature; +#else // !FEATURE_DTRACE + // since DTrace do not support UNICODE string, they need to be converted to ANSI string + INT32 nSizeTempNamespaceName = WideCharToMultiByte(pNamespaceName, szDtraceOutput1); + if (nSizeTempNamespaceName == 0) + return; + INT32 nSizeTempMethodName = WideCharToMultiByte(pMethodName, szDtraceOutput2); + if (nSizeTempMethodName == 0) + return; + INT32 nSizeMothodSignature = WideCharToMultiByte(pMethodSignature, szDtraceOutput3); + if (nSizeMothodSignature == 0) + return; +#endif // !FEATURE_DTRACE + + if((dwEventOptions & ETW::EnumerationLog::EnumerationStructs::JitMethodLoad) || + (dwEventOptions & ETW::EnumerationLog::EnumerationStructs::NgenMethodLoad)) + { + if(bShowVerboseOutput) + { + FireEtwMethodLoadVerbose_V1_or_V2(ullMethodIdentifier, + ullModuleID, + ullMethodStartAddress, + ulMethodSize, + ulMethodToken, + ulMethodFlags, + szDtraceOutput1, + szDtraceOutput2, + szDtraceOutput3, + GetClrInstanceId(), + rejitID); + } + else + { + FireEtwMethodLoad_V1_or_V2(ullMethodIdentifier, + ullModuleID, + ullMethodStartAddress, + ulMethodSize, + ulMethodToken, + ulMethodFlags, + GetClrInstanceId(), + rejitID); + } + if(bFireEventForColdSection) + { + if(bShowVerboseOutput) + { + FireEtwMethodLoadVerbose_V1_or_V2(ullMethodIdentifier, + ullModuleID, + ullColdMethodStartAddress, + ulColdMethodSize, + ulMethodToken, + ulColdMethodFlags, + szDtraceOutput1, + szDtraceOutput2, + szDtraceOutput3, + GetClrInstanceId(), + rejitID); + } + else + { + FireEtwMethodLoad_V1_or_V2(ullMethodIdentifier, + ullModuleID, + ullColdMethodStartAddress, + ulColdMethodSize, + ulMethodToken, + ulColdMethodFlags, + GetClrInstanceId(), + rejitID); + } + } + } + else if((dwEventOptions & ETW::EnumerationLog::EnumerationStructs::JitMethodUnload) || + (dwEventOptions & ETW::EnumerationLog::EnumerationStructs::NgenMethodUnload)) + { + if(bShowVerboseOutput) + { + FireEtwMethodUnloadVerbose_V1_or_V2(ullMethodIdentifier, + ullModuleID, + ullMethodStartAddress, + ulMethodSize, + ulMethodToken, + ulMethodFlags, + szDtraceOutput1, + szDtraceOutput2, + szDtraceOutput3, + GetClrInstanceId(), + rejitID); + } + else + { + FireEtwMethodUnload_V1_or_V2(ullMethodIdentifier, + ullModuleID, + ullMethodStartAddress, + ulMethodSize, + ulMethodToken, + ulMethodFlags, + GetClrInstanceId(), + rejitID); + } + if(bFireEventForColdSection) + { + if(bShowVerboseOutput) + { + FireEtwMethodUnloadVerbose_V1_or_V2(ullMethodIdentifier, + ullModuleID, + ullColdMethodStartAddress, + ulColdMethodSize, + ulMethodToken, + ulColdMethodFlags, + szDtraceOutput1, + szDtraceOutput2, + szDtraceOutput3, + GetClrInstanceId(), + rejitID); + } + else + { + FireEtwMethodUnload_V1_or_V2(ullMethodIdentifier, + ullModuleID, + ullColdMethodStartAddress, + ulColdMethodSize, + ulMethodToken, + ulColdMethodFlags, + GetClrInstanceId(), + rejitID); + } + } + } + else if((dwEventOptions & ETW::EnumerationLog::EnumerationStructs::JitMethodDCStart) || + (dwEventOptions & ETW::EnumerationLog::EnumerationStructs::NgenMethodDCStart)) + { + if(bShowVerboseOutput) + { + FireEtwMethodDCStartVerbose_V1_or_V2(ullMethodIdentifier, + ullModuleID, + ullMethodStartAddress, + ulMethodSize, + ulMethodToken, + ulMethodFlags, + szDtraceOutput1, + szDtraceOutput2, + szDtraceOutput3, + GetClrInstanceId(), + rejitID); + } + else + { + FireEtwMethodDCStart_V1_or_V2(ullMethodIdentifier, + ullModuleID, + ullMethodStartAddress, + ulMethodSize, + ulMethodToken, + ulMethodFlags, + GetClrInstanceId(), + rejitID); + } + if(bFireEventForColdSection) + { + if(bShowVerboseOutput) + { + FireEtwMethodDCStartVerbose_V1_or_V2(ullMethodIdentifier, + ullModuleID, + ullColdMethodStartAddress, + ulColdMethodSize, + ulMethodToken, + ulColdMethodFlags, + szDtraceOutput1, + szDtraceOutput2, + szDtraceOutput3, + GetClrInstanceId(), + rejitID); + } + else + { + FireEtwMethodDCStart_V1_or_V2(ullMethodIdentifier, + ullModuleID, + ullColdMethodStartAddress, + ulColdMethodSize, + ulMethodToken, + ulColdMethodFlags, + GetClrInstanceId(), + rejitID); + } + } + } + else if((dwEventOptions & ETW::EnumerationLog::EnumerationStructs::JitMethodDCEnd) || + (dwEventOptions & ETW::EnumerationLog::EnumerationStructs::NgenMethodDCEnd)) + { + if(bShowVerboseOutput) + { + FireEtwMethodDCEndVerbose_V1_or_V2(ullMethodIdentifier, + ullModuleID, + ullMethodStartAddress, + ulMethodSize, + ulMethodToken, + ulMethodFlags, + szDtraceOutput1, + szDtraceOutput2, + szDtraceOutput3, + GetClrInstanceId(), + rejitID); + } + else + { + FireEtwMethodDCEnd_V1_or_V2(ullMethodIdentifier, + ullModuleID, + ullMethodStartAddress, + ulMethodSize, + ulMethodToken, + ulMethodFlags, + GetClrInstanceId(), + rejitID); + } + if(bFireEventForColdSection) + { + if(bShowVerboseOutput) + { + FireEtwMethodDCEndVerbose_V1_or_V2(ullMethodIdentifier, + ullModuleID, + ullColdMethodStartAddress, + ulColdMethodSize, + ulMethodToken, + ulColdMethodFlags, + szDtraceOutput1, + szDtraceOutput2, + szDtraceOutput3, + GetClrInstanceId(), + rejitID); + } + else + { + FireEtwMethodDCEnd_V1_or_V2(ullMethodIdentifier, + ullModuleID, + ullColdMethodStartAddress, + ulColdMethodSize, + ulMethodToken, + ulColdMethodFlags, + GetClrInstanceId(), + rejitID); + } + } + } + else + { + _ASSERTE((dwEventOptions & ETW::EnumerationLog::EnumerationStructs::JitMethodLoad) || + (dwEventOptions & ETW::EnumerationLog::EnumerationStructs::JitMethodUnload) || + (dwEventOptions & ETW::EnumerationLog::EnumerationStructs::JitMethodDCStart) || + (dwEventOptions & ETW::EnumerationLog::EnumerationStructs::JitMethodDCEnd) || + (dwEventOptions & ETW::EnumerationLog::EnumerationStructs::NgenMethodLoad) || + (dwEventOptions & ETW::EnumerationLog::EnumerationStructs::NgenMethodUnload) || + (dwEventOptions & ETW::EnumerationLog::EnumerationStructs::NgenMethodDCStart) || + (dwEventOptions & ETW::EnumerationLog::EnumerationStructs::NgenMethodDCEnd)); + } +} + +// This event cannot be supported yet on coreclr, since Silverlight needs to support +// XP, and this event uses a format (dynamic-sized arrays) only supported by the +// Vista+ Crimson event format. So stub out the whole function to a no-op on pre-Vista +// platforms. +#ifndef WINXP_AND_WIN2K3_BUILD_SUPPORT +//--------------------------------------------------------------------------------------- +// +// Fires the IL-to-native map event for JITted methods. This is used for the runtime, +// rundown start, and rundown end events that include the il-to-native map information +// +// Arguments: +// pMethodDesc - MethodDesc for which we'll fire the map event +// dwEventOptions - Options that tells us, in the rundown case, whether we're +// supposed to fire the start or end rundown events. +// + +// static +void ETW::MethodLog::SendMethodILToNativeMapEvent(MethodDesc * pMethodDesc, DWORD dwEventOptions, ReJITID rejitID) +{ + CONTRACTL + { + THROWS; + GC_NOTRIGGER; + SO_NOT_MAINLINE; + } + CONTRACTL_END; + + // This is the limit on how big the il-to-native map can get, as measured by number + // of entries in each parallel array (IL offset array and native offset array). + // This number was chosen to ensure the overall event stays under the Windows limit + // of 64K + const USHORT kMapEntriesMax = 7000; + + if (pMethodDesc == NULL) + return; + + if (pMethodDesc->HasClassOrMethodInstantiation() && pMethodDesc->IsTypicalMethodDefinition()) + return; + + // g_pDebugInterface is initialized on startup on desktop CLR, regardless of whether a debugger + // or profiler is loaded. So it should always be available. + _ASSERTE(g_pDebugInterface != NULL); + + ULONGLONG ullMethodIdentifier = (ULONGLONG)pMethodDesc; + + USHORT cMap; + NewArrayHolder rguiILOffset; + NewArrayHolder rguiNativeOffset; + + HRESULT hr = g_pDebugInterface->GetILToNativeMappingIntoArrays( + pMethodDesc, + kMapEntriesMax, + &cMap, + &rguiILOffset, + &rguiNativeOffset); + if (FAILED(hr)) + return; + + // Runtime provider. + // + // This macro already checks for the JittedMethodILToNativeMapKeyword before + // choosing to fire the event + if ((dwEventOptions & ETW::EnumerationLog::EnumerationStructs::JitMethodILToNativeMap) != 0) + { + FireEtwMethodILToNativeMap( + ullMethodIdentifier, + rejitID, + 0, // Extent: This event is only sent for JITted (not NGENd) methods, and + // currently there is only one extent (hot) for JITted methods. + cMap, + rguiILOffset, + rguiNativeOffset, + GetClrInstanceId()); + } + + // Rundown provider + // + // These macros already check for the JittedMethodILToNativeMapRundownKeyword + // before choosing to fire the event--we further check our options to see if we + // should fire the Start and / or End flavor of the event (since the keyword alone + // is insufficient to distinguish these). + // + // (for an explanation of the parameters see the FireEtwMethodILToNativeMap call above) + if ((dwEventOptions & ETW::EnumerationLog::EnumerationStructs::MethodDCStartILToNativeMap) != 0) + FireEtwMethodDCStartILToNativeMap(ullMethodIdentifier, 0, 0, cMap, rguiILOffset, rguiNativeOffset, GetClrInstanceId()); + if ((dwEventOptions & ETW::EnumerationLog::EnumerationStructs::MethodDCEndILToNativeMap) != 0) + FireEtwMethodDCEndILToNativeMap(ullMethodIdentifier, 0, 0, cMap, rguiILOffset, rguiNativeOffset, GetClrInstanceId()); +} +#endif // WINXP_AND_WIN2K3_BUILD_SUPPORT + + +void ETW::MethodLog::SendHelperEvent(ULONGLONG ullHelperStartAddress, ULONG ulHelperSize, LPCWSTR pHelperName) +{ + WRAPPER_NO_CONTRACT; + if(pHelperName) + { +#ifndef FEATURE_DTRACE + PCWSTR szDtraceOutput1=L""; +#else + CHAR szDtraceOutput1[DTRACE_OUTPUT_STRING_LEN]; +#endif // !FEATURE_DTRACE + ULONG methodFlags = ETW::MethodLog::MethodStructs::JitHelperMethod; // helper flag set +#ifndef FEATURE_DTRACE + FireEtwMethodLoadVerbose_V1(ullHelperStartAddress, + 0, + ullHelperStartAddress, + ulHelperSize, + 0, + methodFlags, + NULL, + pHelperName, + NULL, + GetClrInstanceId()); +#else // !FEATURE_DTRACE + // since DTrace do not support UNICODE string, they need to be converted to ANSI string + INT32 nTempHelperName = WideCharToMultiByte(pHelperName, szDtraceOutput1); + if (nTempHelperName == 0) + return; + // in the action, printf, of DTtrace, it cannot print an arg with value NULL when the format is set %s. + // Dtrace does not provide the condition statement so that we give a string "NULL" to it. + FireEtwMethodLoadVerbose_V1(ullHelperStartAddress, + 0, + ullHelperStartAddress, + ulHelperSize, + 0, + methodFlags, + szDtraceOutputNULL, + szDtraceOutput1, + szDtraceOutputNULL, + GetClrInstanceId()); +#endif // !FEATURE_DTRACE + } +} + + +/****************************************************************************/ +/* This routine sends back method events of type 'dwEventOptions', for all + NGEN methods in pModule */ +/****************************************************************************/ +void ETW::MethodLog::SendEventsForNgenMethods(Module *pModule, DWORD dwEventOptions) +{ + CONTRACTL { + THROWS; + GC_TRIGGERS; + } CONTRACTL_END; + +#ifdef FEATURE_PREJIT + if(!pModule || !pModule->HasNativeImage()) + return; + + MethodIterator mi(pModule); + + while(mi.Next()) + { + MethodDesc *hotDesc = (MethodDesc *)mi.GetMethodDesc(); + ETW::MethodLog::SendMethodEvent(hotDesc, dwEventOptions, FALSE); + } +#endif // FEATURE_PREJIT +} + +/****************************************************************************/ +/* This routine sends back method events of type 'dwEventOptions', for all + JITed methods in either a given LoaderAllocator (if pLoaderAllocatorFilter is non NULL) + or in a given Domain (if pDomainFilter is non NULL) or for + all methods (if both filters are null) */ +/****************************************************************************/ +void ETW::MethodLog::SendEventsForJitMethods(BaseDomain *pDomainFilter, LoaderAllocator *pLoaderAllocatorFilter, DWORD dwEventOptions) +{ + CONTRACTL { + THROWS; + GC_TRIGGERS; + } CONTRACTL_END; + +#if !defined(FEATURE_PAL) && !defined(DACCESS_COMPILE) + + // This is only called for JITted methods loading xor unloading + BOOL fLoadOrDCStart = ((dwEventOptions & ETW::EnumerationLog::EnumerationStructs::JitMethodLoadOrDCStartAny) != 0); + BOOL fUnloadOrDCEnd = ((dwEventOptions & ETW::EnumerationLog::EnumerationStructs::JitMethodUnloadOrDCEndAny) != 0); + _ASSERTE((fLoadOrDCStart || fUnloadOrDCEnd) && !(fLoadOrDCStart && fUnloadOrDCEnd)); + + BOOL fSendMethodEvent = + (dwEventOptions & + (ETW::EnumerationLog::EnumerationStructs::JitMethodLoad | + ETW::EnumerationLog::EnumerationStructs::JitMethodDCStart | + ETW::EnumerationLog::EnumerationStructs::JitMethodUnload | + ETW::EnumerationLog::EnumerationStructs::JitMethodDCEnd)) != 0; + + BOOL fSendILToNativeMapEvent = + (dwEventOptions & + (ETW::EnumerationLog::EnumerationStructs::MethodDCStartILToNativeMap | + ETW::EnumerationLog::EnumerationStructs::MethodDCEndILToNativeMap)) != 0; + + BOOL fCollectibleLoaderAllocatorFilter = + ((pLoaderAllocatorFilter != NULL) && (pLoaderAllocatorFilter->IsCollectible())); +#ifndef WINXP_AND_WIN2K3_BUILD_SUPPORT + if (fSendILToNativeMapEvent) + { + // The call to SendMethodILToNativeMapEvent assumes that the debugger's lazy + // data has already been initialized, to ensure we don't try to do the lazy init + // while under the implicit, notrigger CodeHeapIterator lock below. + + // g_pDebugInterface is initialized on startup on desktop CLR, regardless of whether a debugger + // or profiler is loaded. So it should always be available. + _ASSERTE(g_pDebugInterface != NULL); + g_pDebugInterface->InitializeLazyDataIfNecessary(); + } +#endif // WINXP_AND_WIN2K3_BUILD_SUPPORT + + EEJitManager::CodeHeapIterator heapIterator(pDomainFilter, pLoaderAllocatorFilter); + while(heapIterator.Next()) + { + MethodDesc * pMD = heapIterator.GetMethod(); + if (pMD == NULL) + continue; + + TADDR codeStart = heapIterator.GetMethodCode(); + + // Grab rejitID from the rejit manager. Short-circuit the call if we're filtering + // by a collectible loader allocator, since rejit is not supported on RefEmit + // assemblies. This also allows us to avoid having to pre-enter the rejit + // manager locks (which we have to do when filtering by domain; see + // code:#TableLockHolder). + ReJITID rejitID = + fCollectibleLoaderAllocatorFilter ? + 0 : + pMD->GetReJitManager()->GetReJitIdNoLock(pMD, codeStart); + + // There are small windows of time where the heap iterator may come across a + // codeStart that is not yet published to the MethodDesc. This may happen if + // we're JITting the method right now on another thread, and have not completed + // yet. Detect the race, and skip the method if appropriate. (If rejitID is + // nonzero, there is no race, as GetReJitIdNoLock will not return a nonzero + // rejitID if the codeStart has not yet been published for that rejitted version + // of the method.) This check also catches recompilations due to EnC, which we do + // not want to issue events for, in order to ensure xperf's assumption that + // MethodDesc* + ReJITID + extent (hot vs. cold) form a unique key for code + // ranges of methods + if ((rejitID == 0) && (codeStart != PCODEToPINSTR(pMD->GetNativeCode()))) + continue; + + // When we're called to announce loads, then the methodload event itself must + // precede any supplemental events, so that the method load or method jitting + // event is the first event the profiler sees for that MethodID (and not, say, + // the MethodILToNativeMap event.) + if (fLoadOrDCStart) + { + if (fSendMethodEvent) + { + ETW::MethodLog::SendMethodEvent( + pMD, + dwEventOptions, + TRUE, // bIsJit + NULL, // namespaceOrClassName + NULL, // methodName + NULL, // methodSignature + codeStart, + rejitID); + } + } + + // Send any supplemental events requested for this MethodID +#ifndef WINXP_AND_WIN2K3_BUILD_SUPPORT + if (fSendILToNativeMapEvent) + ETW::MethodLog::SendMethodILToNativeMapEvent(pMD, dwEventOptions, rejitID); +#endif // WINXP_AND_WIN2K3_BUILD_SUPPORT + + // When we're called to announce unloads, then the methodunload event itself must + // come after any supplemental events, so that the method unload event is the + // last event the profiler sees for this MethodID + if (fUnloadOrDCEnd) + { + if (fSendMethodEvent) + { + ETW::MethodLog::SendMethodEvent( + pMD, + dwEventOptions, + TRUE, // bIsJit + NULL, // namespaceOrClassName + NULL, // methodName + NULL, // methodSignature + codeStart, + rejitID); + } + } + } +#endif // !FEATURE_PAL && !DACCESS_COMPILE +} + +//--------------------------------------------------------------------------------------- +// +// Wrapper around IterateDomain, which locks the AppDomain to be < +// STAGE_FINALIZED until the iteration is complete. +// +// Arguments: +// pAppDomain - AppDomain to iterate +// enumerationOptions - Flags indicating what to enumerate. Just passed +// straight through to IterateDomain +// +void ETW::EnumerationLog::IterateAppDomain(AppDomain * pAppDomain, DWORD enumerationOptions) +{ + CONTRACTL + { + THROWS; + GC_TRIGGERS; + PRECONDITION(pAppDomain != NULL); + } + CONTRACTL_END; + + // Hold the system domain lock during the entire iteration, so we can + // ensure the App Domain does not get finalized until we're all done + SystemDomain::LockHolder lh; + + if (pAppDomain->IsFinalized()) + { + return; + } + + // Since we're not FINALIZED yet, the handle table should remain intact, + // as should all type information in this AppDomain + _ASSERTE(!pAppDomain->NoAccessToHandleTable()); + + // Now it's safe to do the iteration + IterateDomain(pAppDomain, enumerationOptions); + + // Since we're holding the system domain lock, the AD type info should be + // there throughout the entire iteration we just did + _ASSERTE(!pAppDomain->NoAccessToHandleTable()); +} + +/********************************************************************************/ +/* This routine fires ETW events for + Domain, + Assemblies in them, + DomainModule's in them, + Modules in them, + JIT methods in them, + and the NGEN methods in them + based on enumerationOptions.*/ +/********************************************************************************/ +void ETW::EnumerationLog::IterateDomain(BaseDomain *pDomain, DWORD enumerationOptions) +{ + CONTRACTL { + THROWS; + GC_TRIGGERS; + PRECONDITION(pDomain != NULL); + } CONTRACTL_END; + +#if defined(_DEBUG) && !defined(DACCESS_COMPILE) + // Do not call IterateDomain() directly with an AppDomain. Use + // IterateAppDomain(), whch wraps this function with a hold on the + // SystemDomain lock, which ensures pDomain's type data doesn't disappear + // on us. + if (pDomain->IsAppDomain()) + { + _ASSERTE(SystemDomain::IsUnderDomainLock()); + } +#endif // defined(_DEBUG) && !defined(DACCESS_COMPILE) + + EX_TRY + { + // DC Start events for Domain + if(enumerationOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleDCStart) + { + ETW::LoaderLog::SendDomainEvent(pDomain, enumerationOptions); + } + + // DC End or Unload Jit Method events + if (enumerationOptions & ETW::EnumerationLog::EnumerationStructs::JitMethodUnloadOrDCEndAny) + { + ETW::MethodLog::SendEventsForJitMethods(pDomain, NULL, enumerationOptions); + } + + if (pDomain->IsAppDomain()) + { + AppDomain::AssemblyIterator assemblyIterator = pDomain->AsAppDomain()->IterateAssembliesEx( + (AssemblyIterationFlags)(kIncludeLoaded | kIncludeExecution)); + CollectibleAssemblyHolder pDomainAssembly; + while (assemblyIterator.Next(pDomainAssembly.This())) + { + CollectibleAssemblyHolder pAssembly = pDomainAssembly->GetLoadedAssembly(); + BOOL bIsDomainNeutral = pAssembly->IsDomainNeutral(); + if (bIsDomainNeutral) + continue; + if (enumerationOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleDCStart) + { + ETW::EnumerationLog::IterateAssembly(pAssembly, enumerationOptions); + } + + DomainModuleIterator domainModuleIterator = pDomainAssembly->IterateModules(kModIterIncludeLoaded); + while (domainModuleIterator.Next()) + { + Module * pModule = domainModuleIterator.GetModule(); + ETW::EnumerationLog::IterateModule(pModule, enumerationOptions); + } + + if((enumerationOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleDCEnd) || + (enumerationOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleUnload)) + { + ETW::EnumerationLog::IterateAssembly(pAssembly, enumerationOptions); + } + } + } + else + { + SharedDomain::SharedAssemblyIterator sharedDomainIterator; + while (sharedDomainIterator.Next()) + { + Assembly * pAssembly = sharedDomainIterator.GetAssembly(); + if (enumerationOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleDCStart) + { + ETW::EnumerationLog::IterateAssembly(pAssembly, enumerationOptions); + } + + ModuleIterator domainModuleIterator = pAssembly->IterateModules(); + while (domainModuleIterator.Next()) + { + Module * pModule = domainModuleIterator.GetModule(); + ETW::EnumerationLog::IterateModule(pModule, enumerationOptions); + } + + if ((enumerationOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleDCEnd) || + (enumerationOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleUnload)) + { + ETW::EnumerationLog::IterateAssembly(pAssembly, enumerationOptions); + } + } + } + + // DC Start or Load Jit Method events + if (enumerationOptions & ETW::EnumerationLog::EnumerationStructs::JitMethodLoadOrDCStartAny) + { + ETW::MethodLog::SendEventsForJitMethods(pDomain, NULL, enumerationOptions); + } + + // DC End or Unload events for Domain + if((enumerationOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleDCEnd) || + (enumerationOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleUnload)) + { + ETW::LoaderLog::SendDomainEvent(pDomain, enumerationOptions); + } + } EX_CATCH { } EX_END_CATCH(SwallowAllExceptions); +} + + +/********************************************************************************/ +/* This routine fires ETW events for + Assembly in LoaderAllocator, + DomainModule's in them, + Modules in them, + JIT methods in them, + and the NGEN methods in them + based on enumerationOptions.*/ +/********************************************************************************/ +void ETW::EnumerationLog::IterateCollectibleLoaderAllocator(AssemblyLoaderAllocator *pLoaderAllocator, DWORD enumerationOptions) +{ + CONTRACTL { + THROWS; + GC_TRIGGERS; + PRECONDITION(pLoaderAllocator != NULL); + } CONTRACTL_END; + + EX_TRY + { + // Unload Jit Method events + if (enumerationOptions & ETW::EnumerationLog::EnumerationStructs::JitMethodUnload) + { + ETW::MethodLog::SendEventsForJitMethods(NULL, pLoaderAllocator, enumerationOptions); + } + + Assembly *pAssembly = pLoaderAllocator->Id()->GetDomainAssembly()->GetAssembly(); + _ASSERTE(!pAssembly->IsDomainNeutral()); // Collectible Assemblies are not domain neutral. + + DomainModuleIterator domainModuleIterator = pLoaderAllocator->Id()->GetDomainAssembly()->IterateModules(kModIterIncludeLoaded); + while (domainModuleIterator.Next()) + { + Module *pModule = domainModuleIterator.GetModule(); + ETW::EnumerationLog::IterateModule(pModule, enumerationOptions); + } + + if (enumerationOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleUnload) + { + ETW::EnumerationLog::IterateAssembly(pAssembly, enumerationOptions); + } + + // Load Jit Method events + if (enumerationOptions & ETW::EnumerationLog::EnumerationStructs::JitMethodLoad) + { + ETW::MethodLog::SendEventsForJitMethods(NULL, pLoaderAllocator, enumerationOptions); + } + } EX_CATCH { } EX_END_CATCH(SwallowAllExceptions); +} + +/********************************************************************************/ +/* This routine fires ETW events for Assembly and the DomainModule's in them + based on enumerationOptions.*/ +/********************************************************************************/ +void ETW::EnumerationLog::IterateAssembly(Assembly *pAssembly, DWORD enumerationOptions) +{ + CONTRACTL { + THROWS; + GC_TRIGGERS; + PRECONDITION(pAssembly != NULL); + } CONTRACTL_END; + + EX_TRY + { + // DC Start events for Assembly + if(enumerationOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleDCStart) + { + ETW::LoaderLog::SendAssemblyEvent(pAssembly, enumerationOptions); + } + + // DC Start, DCEnd, events for DomainModule + if((enumerationOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleDCEnd) || + (enumerationOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleDCStart)) + { + if(pAssembly->GetDomain()->IsAppDomain()) + { + DomainModuleIterator dmIterator = pAssembly->FindDomainAssembly(pAssembly->GetDomain()->AsAppDomain())->IterateModules(kModIterIncludeLoaded); + while (dmIterator.Next()) + { + ETW::LoaderLog::SendModuleEvent(dmIterator.GetModule(), enumerationOptions, TRUE); + } + } + } + + // DC End or Unload events for Assembly + if((enumerationOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleDCEnd) || + (enumerationOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleUnload)) + { + ETW::LoaderLog::SendAssemblyEvent(pAssembly, enumerationOptions); + } + } EX_CATCH { } EX_END_CATCH(SwallowAllExceptions); +} + +/********************************************************************************/ +/* This routine fires ETW events for Module, their range information and the NGEN methods in them + based on enumerationOptions.*/ +/********************************************************************************/ +void ETW::EnumerationLog::IterateModule(Module *pModule, DWORD enumerationOptions) +{ + CONTRACTL { + THROWS; + GC_TRIGGERS; + PRECONDITION(pModule != NULL); + } CONTRACTL_END; + + EX_TRY + { + // DC Start events for Module + if((enumerationOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleDCStart) || + (enumerationOptions & ETW::EnumerationLog::EnumerationStructs::ModuleRangeDCStart)) + { + ETW::LoaderLog::SendModuleEvent(pModule, enumerationOptions); + } + + // DC Start or Load or DC End or Unload Ngen Method events + if((enumerationOptions & ETW::EnumerationLog::EnumerationStructs::NgenMethodLoad) || + (enumerationOptions & ETW::EnumerationLog::EnumerationStructs::NgenMethodDCStart) || + (enumerationOptions & ETW::EnumerationLog::EnumerationStructs::NgenMethodUnload) || + (enumerationOptions & ETW::EnumerationLog::EnumerationStructs::NgenMethodDCEnd)) + { + ETW::MethodLog::SendEventsForNgenMethods(pModule, enumerationOptions); + } + + // DC End or Unload events for Module + if((enumerationOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleDCEnd) || + (enumerationOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleUnload) || + (enumerationOptions & ETW::EnumerationLog::EnumerationStructs::ModuleRangeDCEnd)) + { + ETW::LoaderLog::SendModuleEvent(pModule, enumerationOptions); + } + + // If we're logging types, then update the internal Type hash table to account + // for the module's unloading + if (enumerationOptions & ETW::EnumerationLog::EnumerationStructs::TypeUnload) + { + ETW::TypeSystemLog::OnModuleUnload(pModule); + } + + // ModuleRangeLoadPrivate events for module range information from attach/detach scenarios + if (ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_PRIVATE_PROVIDER_Context, + TRACE_LEVEL_INFORMATION, + CLR_PERFTRACK_PRIVATE_KEYWORD) && + (enumerationOptions & ETW::EnumerationLog::EnumerationStructs::ModuleRangeLoadPrivate)) + { + ETW::LoaderLog::SendModuleEvent(pModule, enumerationOptions); + } + } EX_CATCH { } EX_END_CATCH(SwallowAllExceptions); +} + +//--------------------------------------------------------------------------------------- +// +// This routine sends back domain, assembly, module and method events based on +// enumerationOptions. +// +// Arguments: +// * moduleFilter - if non-NULL, events from only moduleFilter module are reported +// * domainFilter - if non-NULL, events from only domainFilter domain are reported +// * enumerationOptions - Flags from ETW::EnumerationLog::EnumerationStructs which +// describe which events should be sent. +// +// Notes: +// * if all filter args are NULL, events from all domains are reported +// +// #TableLockHolder: +// +// A word about ReJitManager::TableLockHolder... As we enumerate through the functions, +// we may need to grab their ReJITIDs. The ReJitManager grabs its table Crst in order to +// fetch these. However, several other kinds of locks are being taken during this +// enumeration, such as the SystemDomain lock and the EEJitManager::CodeHeapIterator's +// lock. In order to avoid lock-leveling issues, we grab the appropriate ReJitManager +// table locks up front. In particular, we need to grab the SharedDomain's ReJitManager +// table lock as well as the specific AppDomain's ReJitManager table lock for the current +// AppDomain we're iterating. Why the SharedDomain's ReJitManager lock? For any given +// AppDomain we're iterating over, the MethodDescs we find may be managed by that +// AppDomain's ReJitManger OR the SharedDomain's ReJitManager. (This is due to generics +// and whether given instantiations may be shared based on their arguments.) Therefore, +// we proactively take the SharedDomain's ReJitManager's table lock up front, and then +// individually take the appropriate AppDomain's ReJitManager's table lock that +// corresponds to the domain or module we're currently iterating over. +// + +// static +void ETW::EnumerationLog::EnumerationHelper(Module *moduleFilter, BaseDomain *domainFilter, DWORD enumerationOptions) +{ + CONTRACTL { + THROWS; + GC_TRIGGERS; + } CONTRACTL_END; + + // Disable IBC logging during ETW enumeration since we call a lot of functionality + // that does logging and causes problems in the shutdown path due to critical + // section access for IBC logging + IBCLoggingDisabler disableLogging; + + // See code:#TableLockHolder + ReJitManager::TableLockHolder lkRejitMgrSharedDomain(SharedDomain::GetDomain()->GetReJitManager()); + + if(moduleFilter) + { + // See code:#TableLockHolder + ReJitManager::TableLockHolder lkRejitMgrModule(moduleFilter->GetReJitManager()); + + + // DC End or Unload Jit Method events from all Domains + if (enumerationOptions & ETW::EnumerationLog::EnumerationStructs::JitMethodUnloadOrDCEndAny) + { + ETW::MethodLog::SendEventsForJitMethods(NULL, NULL, enumerationOptions); + } + + ETW::EnumerationLog::IterateModule(moduleFilter, enumerationOptions); + + // DC Start or Load Jit Method events from all Domains + if (enumerationOptions & ETW::EnumerationLog::EnumerationStructs::JitMethodLoadOrDCStartAny) + { + ETW::MethodLog::SendEventsForJitMethods(NULL, NULL, enumerationOptions); + } + } + else + { + if(domainFilter) + { + // See code:#TableLockHolder + ReJitManager::TableLockHolder lkRejitMgrAD(domainFilter->GetReJitManager()); + + if(domainFilter->IsAppDomain()) + { + ETW::EnumerationLog::IterateAppDomain(domainFilter->AsAppDomain(), enumerationOptions); + } + else + { + ETW::EnumerationLog::IterateDomain(domainFilter, enumerationOptions); + } + } + else + { + AppDomainIterator appDomainIterator(FALSE); + while(appDomainIterator.Next()) + { + AppDomain *pDomain = appDomainIterator.GetDomain(); + if (pDomain != NULL) + { + // See code:#TableLockHolder + ReJitManager::TableLockHolder lkRejitMgrAD(pDomain->GetReJitManager()); + + ETW::EnumerationLog::IterateAppDomain(pDomain, enumerationOptions); + } + } + ETW::EnumerationLog::IterateDomain(SharedDomain::GetDomain(), enumerationOptions); + } + } +} + +#endif // !FEATURE_REDHAWK +#endif // defined(FEATURE_REDHAWK) || !defined(FEATURE_PAL) || defined(FEATURE_DTRACE) diff --git a/src/coreclr/src/nativeaot/Runtime/eventtrace.h b/src/coreclr/src/nativeaot/Runtime/eventtrace.h new file mode 100644 index 0000000000000..03744b76ea4ef --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/eventtrace.h @@ -0,0 +1,343 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// +// File: eventtrace.h +// Abstract: This module implements Event Tracing support. This includes +// eventtracebase.h, and adds VM-specific ETW helpers to support features like type +// logging, allocation logging, and gc heap walk logging. +// +// #EventTracing +// Windows +// ETW (Event Tracing for Windows) is a high-performance, low overhead and highly scalable +// tracing facility provided by the Windows Operating System. ETW is available on Win2K and above. There are +// four main types of components in ETW: event providers, controllers, consumers, and event trace sessions. +// An event provider is a logical entity that writes events to ETW sessions. The event provider must register +// a provider ID with ETW through the registration API. A provider first registers with ETW and writes events +// from various points in the code by invoking the ETW logging API. When a provider is enabled dynamically by +// the ETW controller application, calls to the logging API sends events to a specific trace session +// designated by the controller. Each event sent by the event provider to the trace session consists of a +// fixed header that includes event metadata and additional variable user-context data. CLR is an event +// provider. + +// Mac +// DTrace is similar to ETW and has been made to look like ETW at most of the places. +// For convenience, it is called ETM (Event Tracing for Mac) and exists only on the Mac Leopard OS +// ============================================================================ + +#ifndef _VMEVENTTRACE_H_ +#define _VMEVENTTRACE_H_ + +#include "eventtracebase.h" +#include "gcinterface.h" + +#ifdef FEATURE_EVENT_TRACE +struct ProfilingScanContext : ScanContext +{ + BOOL fProfilerPinned; + void * pvEtwContext; + void *pHeapId; + + ProfilingScanContext(BOOL fProfilerPinnedParam); +}; +#endif // defined(FEATURE_EVENT_TRACE) + +namespace ETW +{ +#ifndef FEATURE_REDHAWK + + class LoggedTypesFromModule; + + // We keep a hash of these to keep track of: + // * Which types have been logged through ETW (so we can avoid logging dupe Type + // events), and + // * GCSampledObjectAllocation stats to help with "smart sampling" which + // dynamically adjusts sampling rate of objects by type. + // See code:LoggedTypesFromModuleTraits + struct TypeLoggingInfo + { + public: + TypeLoggingInfo(TypeHandle thParam) + { + Init(thParam); + } + + TypeLoggingInfo() + { + Init(TypeHandle()); + } + + void Init(TypeHandle thParam) + { + th = thParam; + dwTickOfCurrentTimeBucket = 0; + dwAllocCountInCurrentBucket = 0; + flAllocPerMSec = 0; + + dwAllocsToSkipPerSample = 0; + dwAllocsSkippedForSample = 0; + cbIgnoredSizeForSample = 0; + }; + + // The type this TypeLoggingInfo represents + TypeHandle th; + + // Smart sampling + + // These bucket values remember stats of a particular time slice that are used to + // help adjust the sampling rate + DWORD dwTickOfCurrentTimeBucket; + DWORD dwAllocCountInCurrentBucket; + float flAllocPerMSec; + + // The number of data points to ignore before taking a "sample" (i.e., logging a + // GCSampledObjectAllocation ETW event for this type) + DWORD dwAllocsToSkipPerSample; + + // The current number of data points actually ignored for the current sample + DWORD dwAllocsSkippedForSample; + + // The current count of bytes of objects of this type actually allocated (and + // ignored) for the current sample + SIZE_T cbIgnoredSizeForSample; + }; + + // Class to wrap all type system logic for ETW + class TypeSystemLog + { + private: + static AllLoggedTypes * s_pAllLoggedTypes; + + // See code:ETW::TypeSystemLog::PostRegistrationInit + static BOOL s_fHeapAllocEventEnabledOnStartup; + static BOOL s_fHeapAllocHighEventEnabledNow; + static BOOL s_fHeapAllocLowEventEnabledNow; + + // If COMPLUS_UNSUPPORTED_ETW_ObjectAllocationEventsPerTypePerSec is set, then + // this is used to determine the event frequency, overriding + // s_nDefaultMsBetweenEvents above (regardless of which + // GCSampledObjectAllocation*Keyword was used) + static int s_nCustomMsBetweenEvents; + + public: + // This customizes the type logging behavior in LogTypeAndParametersIfNecessary + enum TypeLogBehavior + { + // Take lock, and consult hash table to see if this is the first time we've + // encountered the type, in which case, log it + kTypeLogBehaviorTakeLockAndLogIfFirstTime, + + // Caller has already taken lock, so just directly consult hash table to see + // if this is the first time we've encountered the type, in which case, log + // it + kTypeLogBehaviorAssumeLockAndLogIfFirstTime, + + // Don't take lock, don't consult hash table. Just log the type. (This is + // used in cases when checking for dupe type logging isn't worth it, such as + // when logging the finalization of an object.) + kTypeLogBehaviorAlwaysLog, + + // When logging the type for GCSampledObjectAllocation events, we don't need + // the lock (as it's already held by the code doing the stats for smart + // sampling), and we already know we need to log the type (since we already + // looked it up in the hash). But we would still need to consult the hash + // for any type parameters, so kTypeLogBehaviorAlwaysLog isn't appropriate, + // and this is used instead. + kTypeLogBehaviorAssumeLockAndAlwaysLogTopLevelType, + }; + + static HRESULT PreRegistrationInit(); + static void PostRegistrationInit(); + static BOOL IsHeapAllocEventEnabled(); + static void SendObjectAllocatedEvent(Object * pObject); + static CrstBase * GetHashCrst(); + static void LogTypeAndParametersIfNecessary(BulkTypeEventLogger * pBulkTypeEventLogger, ULONGLONG thAsAddr, TypeLogBehavior typeLogBehavior); + static void OnModuleUnload(Module * pModule); + static void OnKeywordsChanged(); + + private: + static BOOL ShouldLogType(TypeHandle th); + static BOOL ShouldLogTypeNoLock(TypeHandle th); + static TypeLoggingInfo LookupOrCreateTypeLoggingInfo(TypeHandle th, BOOL * pfCreatedNew, LoggedTypesFromModule ** ppLoggedTypesFromModule = NULL); + static BOOL AddOrReplaceTypeLoggingInfo(ETW::LoggedTypesFromModule * pLoggedTypesFromModule, const ETW::TypeLoggingInfo * pTypeLoggingInfo); + static int GetDefaultMsBetweenEvents(); + static void OnTypesKeywordTurnedOff(); + }; + +#endif // FEATURE_REDHAWK + + // Class to wrap all GC logic for ETW + class GCLog + { + private: + // When WPA triggers a GC, it gives us this unique number to append to our + // GCStart event so WPA can correlate the CLR's GC with the JScript GC they + // triggered at the same time. + // + // We set this value when the GC is triggered, and then retrieve the value on the + // first subsequent FireGcStart() method call for a full, induced GC, assuming + // that that's the GC that WPA triggered. This is imperfect, and if we were in + // the act of beginning another full, induced GC (for some other reason), then + // we'll attach this sequence number to that GC instead of to the WPA-induced GC, + // but who cares? When parsing ETW logs later on, it's indistinguishable if both + // GCs really were induced at around the same time. +#ifdef FEATURE_REDHAWK + static volatile LONGLONG s_l64LastClientSequenceNumber; +#else // FEATURE_REDHAWK + static Volatile s_l64LastClientSequenceNumber; +#endif // FEATURE_REDHAWK + + public: + typedef union st_GCEventInfo { + typedef struct _GenerationInfo { + ULONGLONG GenerationSize; + ULONGLONG TotalPromotedSize; + } GenerationInfo; + + struct { + GenerationInfo GenInfo[4]; // the heap info on gen0, gen1, gen2 and the large object heap. + ULONGLONG FinalizationPromotedSize; //not available per generation + ULONGLONG FinalizationPromotedCount; //not available per generation + ULONG PinnedObjectCount; + ULONG SinkBlockCount; + ULONG GCHandleCount; + } HeapStats; + + typedef enum _HeapType { + SMALL_OBJECT_HEAP, LARGE_OBJECT_HEAP, READ_ONLY_HEAP + } HeapType; + struct { + ULONGLONG Address; + ULONGLONG Size; + HeapType Type; + } GCCreateSegment; + + struct { + ULONGLONG Address; + } GCFreeSegment; + struct { + ULONG Count; + ULONG Depth; + } GCEnd; + + typedef enum _AllocationKind { + AllocationSmall = 0, + AllocationLarge + }AllocationKind; + struct { + ULONG Allocation; + AllocationKind Kind; + } AllocationTick; + + // These values are gotten from the gc_reason + // in gcimpl.h + typedef enum _GC_REASON { + GC_ALLOC_SOH = 0 , + GC_INDUCED = 1 , + GC_LOWMEMORY = 2, + GC_EMPTY = 3, + GC_ALLOC_LOH = 4, + GC_OOS_SOH = 5, + GC_OOS_LOH = 6, + GC_INDUCED_NOFORCE = 7 + } GC_REASON; + typedef enum _GC_TYPE { + GC_NGC = 0 , GC_BGC = 1 , GC_FGC = 2 + } GC_TYPE; + struct { + ULONG Count; + ULONG Depth; + GC_REASON Reason; + GC_TYPE Type; + } GCStart; + + struct { + ULONG Count; // how many finalizers we called. + } GCFinalizers; + + struct { + ULONG Reason; + // This is only valid when SuspendEE is called by GC (ie, Reason is either + // SUSPEND_FOR_GC or SUSPEND_FOR_GC_PREP. + ULONG GcCount; + } SuspendEE; + + struct { + ULONG HeapNum; + } GCMark; + + struct { + ULONGLONG SegmentSize; + ULONGLONG LargeObjectSegmentSize; + BOOL ServerGC; // TRUE means it's server GC; FALSE means it's workstation. + } GCSettings; + + struct { + // The generation that triggered this notification. + ULONG Count; + // 1 means the notification was due to allocation; 0 means it was due to other factors. + ULONG Alloc; + } GCFullNotify; + } ETW_GC_INFO, *PETW_GC_INFO; + +#ifdef FEATURE_EVENT_TRACE + static void GCSettingsEvent(); +#else + static void GCSettingsEvent() {}; +#endif // FEATURE_EVENT_TRACE + + static BOOL ShouldWalkHeapObjectsForEtw(); + static BOOL ShouldWalkHeapRootsForEtw(); + static BOOL ShouldTrackMovementForEtw(); + static BOOL ShouldWalkStaticsAndCOMForEtw(); + static HRESULT ForceGCForDiagnostics(); + static void ForceGC(LONGLONG l64ClientSequenceNumber); + static void FireGcStart(ETW_GC_INFO * pGcInfo); + static void RootReference( + LPVOID pvHandle, + Object * pRootedNode, + Object * pSecondaryNodeForDependentHandle, + BOOL fDependentHandle, + ProfilingScanContext * profilingScanContext, + DWORD dwGCFlags, + DWORD rootFlags); + static void ObjectReference( + ProfilerWalkHeapContext * profilerWalkHeapContext, + Object * pObjReferenceSource, + ULONGLONG typeID, + ULONGLONG cRefs, + Object ** rgObjReferenceTargets); + static void EndHeapDump(ProfilerWalkHeapContext * profilerWalkHeapContext); + static void BeginMovedReferences(size_t * pProfilingContext); + static void MovedReference(BYTE * pbMemBlockStart, BYTE * pbMemBlockEnd, ptrdiff_t cbRelocDistance, size_t profilingContext, BOOL fCompacting, BOOL fAllowProfApiNotification = TRUE); + static void EndMovedReferences(size_t profilingContext, BOOL fAllowProfApiNotification = TRUE); + static void WalkStaticsAndCOMForETW(); +#ifndef FEATURE_REDHAWK + static void SendFinalizeObjectEvent(MethodTable * pMT, Object * pObj); +#endif // FEATURE_REDHAWK + }; +}; + +#ifndef FEATURE_ETW +inline BOOL ETW::GCLog::ShouldWalkHeapObjectsForEtw() { return FALSE; } +inline BOOL ETW::GCLog::ShouldWalkHeapRootsForEtw() { return FALSE; } +inline BOOL ETW::GCLog::ShouldTrackMovementForEtw() { return FALSE; } +inline BOOL ETW::GCLog::ShouldWalkStaticsAndCOMForEtw() { return FALSE; } +inline void ETW::GCLog::FireGcStart(ETW_GC_INFO * pGcInfo) { } +inline void ETW::GCLog::EndHeapDump(ProfilerWalkHeapContext * profilerWalkHeapContext) { } +inline void ETW::GCLog::BeginMovedReferences(size_t * pProfilingContext) { } +inline void ETW::GCLog::MovedReference(BYTE * pbMemBlockStart, BYTE * pbMemBlockEnd, ptrdiff_t cbRelocDistance, size_t profilingContext, BOOL fCompacting) { } +inline void ETW::GCLog::EndMovedReferences(size_t profilingContext) { } +inline void ETW::GCLog::WalkStaticsAndCOMForETW() { } +inline void ETW::GCLog::RootReference( + LPVOID pvHandle, + Object * pRootedNode, + Object * pSecondaryNodeForDependentHandle, + BOOL fDependentHandle, + ProfilingScanContext * profilingScanContext, + DWORD dwGCFlags, + DWORD rootFlags) { } +#endif + +inline BOOL EventEnabledPinObjectAtGCTime() { return FALSE; } + +#endif //_VMEVENTTRACE_H_ diff --git a/src/coreclr/src/nativeaot/Runtime/eventtracebase.h b/src/coreclr/src/nativeaot/Runtime/eventtracebase.h new file mode 100644 index 0000000000000..3a51656944c5b --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/eventtracebase.h @@ -0,0 +1,1095 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// File: eventtracebase.h +// Abstract: This module implements base Event Tracing support (excluding some of the +// CLR VM-specific ETW helpers). +// +// #EventTracing +// Windows +// ETW (Event Tracing for Windows) is a high-performance, low overhead and highly scalable +// tracing facility provided by the Windows Operating System. ETW is available on Win2K and above. There are +// four main types of components in ETW: event providers, controllers, consumers, and event trace sessions. +// An event provider is a logical entity that writes events to ETW sessions. The event provider must register +// a provider ID with ETW through the registration API. A provider first registers with ETW and writes events +// from various points in the code by invoking the ETW logging API. When a provider is enabled dynamically by +// the ETW controller application, calls to the logging API sends events to a specific trace session +// designated by the controller. Each event sent by the event provider to the trace session consists of a +// fixed header that includes event metadata and additional variable user-context data. CLR is an event +// provider. + +// Mac +// DTrace is similar to ETW and has been made to look like ETW at most of the places. +// For convenience, it is called ETM (Event Tracing for Mac) and exists only on the Mac Leopard OS +// ============================================================================ + +#ifndef _ETWTRACER_HXX_ +#define _ETWTRACER_HXX_ + +struct EventStructTypeData; +void InitializeEventTracing(); + +#ifdef FEATURE_EVENT_TRACE + +// !!!!!!! NOTE !!!!!!!! +// The flags must match those in the ETW manifest exactly +// !!!!!!! NOTE !!!!!!!! + +enum EtwTypeFlags +{ + kEtwTypeFlagsDelegate = 0x1, + kEtwTypeFlagsFinalizable = 0x2, + kEtwTypeFlagsExternallyImplementedCOMObject = 0x4, + kEtwTypeFlagsArray = 0x8, + kEtwTypeFlagsModuleBaseAddress = 0x10, +}; + +enum EtwThreadFlags +{ + kEtwThreadFlagGCSpecial = 0x00000001, + kEtwThreadFlagFinalizer = 0x00000002, + kEtwThreadFlagThreadPoolWorker = 0x00000004, +}; + + +// During a heap walk, this is the storage for keeping track of all the nodes and edges +// being batched up by ETW, and for remembering whether we're also supposed to call into +// a profapi profiler. This is allocated toward the end of a GC and passed to us by the +// GC heap walker. +struct ProfilerWalkHeapContext +{ +public: + ProfilerWalkHeapContext(BOOL fProfilerPinnedParam, LPVOID pvEtwContextParam) + { + fProfilerPinned = fProfilerPinnedParam; + pvEtwContext = pvEtwContextParam; + } + + BOOL fProfilerPinned; + LPVOID pvEtwContext; +}; + +class Object; + +/******************************/ +/* CLR ETW supported versions */ +/******************************/ +#define ETW_SUPPORTED_MAJORVER 5 // ETW is supported on win2k and above +#define ETW_ENABLED_MAJORVER 6 // OS versions >= to this we enable ETW registration by default, since on XP and Windows 2003, registration is too slow. + +/***************************************/ +/* Tracing levels supported by CLR ETW */ +/***************************************/ +#define ETWMAX_TRACE_LEVEL 6 // Maximum Number of Trace Levels supported +#define TRACE_LEVEL_NONE 0 // Tracing is not on +#define TRACE_LEVEL_FATAL 1 // Abnormal exit or termination +#define TRACE_LEVEL_ERROR 2 // Severe errors that need logging +#define TRACE_LEVEL_WARNING 3 // Warnings such as allocation failure +#define TRACE_LEVEL_INFORMATION 4 // Includes non-error cases such as Entry-Exit +#define TRACE_LEVEL_VERBOSE 5 // Detailed traces from intermediate steps + +struct ProfilingScanContext; + +// +// Use this macro to check if ETW is initialized and the event is enabled +// +#define ETW_TRACING_ENABLED(Context, EventDescriptor) \ + (Context.IsEnabled && ETW_TRACING_INITIALIZED(Context.RegistrationHandle) && ETW_EVENT_ENABLED(Context, EventDescriptor)) + +// +// Using KEYWORDZERO means when checking the events category ignore the keyword +// +#define KEYWORDZERO 0x0 + +// +// Use this macro to check if ETW is initialized and the category is enabled +// +#define ETW_TRACING_CATEGORY_ENABLED(Context, Level, Keyword) \ + (ETW_TRACING_INITIALIZED(Context.RegistrationHandle) && ETW_CATEGORY_ENABLED(Context, Level, Keyword)) + +#ifdef FEATURE_DTRACE + #define ETWOnStartup(StartEventName, EndEventName) \ + ETWTraceStartup trace(StartEventName, EndEventName); + #define ETWFireEvent(EventName) \ + FireEtw##EventName(GetClrInstanceId()); +#else + #define ETWOnStartup(StartEventName, EndEventName) \ + ETWTraceStartup trace##StartEventName##(Microsoft_Windows_DotNETRuntimePrivateHandle, &StartEventName, &StartupId, &EndEventName, &StartupId); + #define ETWFireEvent(EventName) \ + ETWTraceStartup::StartupTraceEvent(Microsoft_Windows_DotNETRuntimePrivateHandle, &EventName, &StartupId); +#endif // FEATURE_DTRACE + +#ifndef FEATURE_REDHAWK + +// Headers +#ifndef FEATURE_PAL +#include +#include +#include +#include +#if !defined(DONOT_DEFINE_ETW_CALLBACK) && !defined(DACCESS_COMPILE) +#define GetVersionEx(Version) (GetOSVersion((LPOSVERSIONINFOW)Version)) +#else +#define GetVersionEx(Version) (WszGetVersionEx((LPOSVERSIONINFOW)Version)) +#endif // !DONOT_DEFINE_ETW_CALLBACK && !DACCESS_COMPILE +#endif // !FEATURE_PAL + +#if FEATURE_DTRACE +#include "clrdtrace.h" +#endif + +#endif //!FEATURE_REDHAWK + + +#else // FEATURE_EVENT_TRACE + +#include "etmdummy.h" +#endif // FEATURE_EVENT_TRACE + +#ifndef FEATURE_REDHAWK + +#if defined(FEATURE_CORECLR) && !defined(FEATURE_CORESYSTEM) +// For Silverlight non-CoreSys builds we still use an older toolset, +// headers/libs, and a different value for WINVER. We use this symbol +// to distinguish between whether we built the ETW header files from +// the ETW manifest using the -mof command line or not. +#define WINXP_AND_WIN2K3_BUILD_SUPPORT +#endif +#include "corprof.h" + +// g_nClrInstanceId is defined in Utilcode\Util.cpp. The definition goes into Utilcode.lib. +// This enables both the VM and Utilcode to raise ETW events. +extern UINT32 g_nClrInstanceId; +extern BOOL g_fEEManagedEXEStartup; +extern BOOL g_fEEIJWStartup; + +#define GetClrInstanceId() (static_cast(g_nClrInstanceId)) + +#if defined(FEATURE_EVENT_TRACE) && !defined(FEATURE_PAL) +// Callback and stack support +#if !defined(DONOT_DEFINE_ETW_CALLBACK) && !defined(DACCESS_COMPILE) +extern "C" { + /* ETW control callback + * Desc: This function handles the ETW control + * callback. + * Ret: success or failure + ***********************************************/ + void EtwCallback( + _In_ LPCGUID SourceId, + _In_ ULONG ControlCode, + _In_ UCHAR Level, + _In_ ULONGLONG MatchAnyKeyword, + _In_ ULONGLONG MatchAllKeyword, + _In_opt_ PEVENT_FILTER_DESCRIPTOR FilterData, + _Inout_opt_ PVOID CallbackContext); +} + +// +// User defined callback +// +#define MCGEN_PRIVATE_ENABLE_CALLBACK(RequestCode, Context, InOutBufferSize, Buffer) \ + EtwCallback(NULL /* SourceId */, (RequestCode==WMI_ENABLE_EVENTS) ? EVENT_CONTROL_CODE_ENABLE_PROVIDER : EVENT_CONTROL_CODE_DISABLE_PROVIDER, 0 /* Level */, 0 /* MatchAnyKeyword */, 0 /* MatchAllKeyword */, NULL /* FilterData */, Context) + +// +// User defined callback2 +// +#define MCGEN_PRIVATE_ENABLE_CALLBACK_V2(SourceId, ControlCode, Level, MatchAnyKeyword, MatchAllKeyword, FilterData, CallbackContext) \ + EtwCallback(SourceId, ControlCode, Level, MatchAnyKeyword, MatchAllKeyword, FilterData, CallbackContext) + +extern "C" { + /* ETW callout + * Desc: This function handles the ETW callout + * Ret: success or failure + ***********************************************/ + void EtwCallout( + REGHANDLE RegHandle, + PCEVENT_DESCRIPTOR Descriptor, + ULONG ArgumentCount, + PEVENT_DATA_DESCRIPTOR EventData); +} + +// +// Call user defined callout +// +#define MCGEN_CALLOUT(RegHandle, Descriptor, NumberOfArguments, EventData) \ + EtwCallout(RegHandle, Descriptor, NumberOfArguments, EventData) +#endif //!DONOT_DEFINE_ETW_CALLBACK && !DACCESS_COMPILE + +#include +// The bulk type event is too complex for MC.exe to auto-generate proper code. +// Use code:BulkTypeEventLogger instead. +#ifdef FireEtwBulkType +#undef FireEtwBulkType +#endif // FireEtwBulkType +#endif // FEATURE_EVENT_TRACE && !FEATURE_PAL + +/**************************/ +/* CLR ETW infrastructure */ +/**************************/ +// #CEtwTracer +// On Windows Vista, ETW has gone through a major upgrade, and one of the most significant changes is the +// introduction of the unified event provider model and APIs. The older architecture used the classic ETW +// events. The new ETW architecture uses the manifest based events. To support both types of events at the +// same time, we use the manpp tool for generating event macros that can be directly used to fire ETW events +// from various components within the CLR. +// (http://diagnostics/sites/etw/Lists/Announcements/DispForm.aspx?ID=10&Source=http%3A%2F%2Fdiagnostics%2Fsites%2Fetw%2Fdefault%2Easpx) +// Every ETW provider has to Register itself to the system, so that when enabled, it is capable of firing +// ETW events. file:../VM/eventtrace.cpp#Registration is where the actual Provider Registration takes place. +// At process shutdown, a registered provider need to be unregistered. +// file:../VM/eventtrace.cpp#Unregistration. Since ETW can also be enabled at any instant after the process +// has started, one may want to do something useful when that happens (e.g enumerate all the loaded modules +// in the system). To enable this, we have to implement a callback routine. +// file:../VM/eventtrace.cpp#EtwCallback is CLR's implementation of the callback. +// + +#include "daccess.h" +class Module; +class Assembly; +class MethodDesc; +class MethodTable; +class BaseDomain; +class AppDomain; +class SString; +class CrawlFrame; +class LoaderAllocator; +class AssemblyLoaderAllocator; +struct AllLoggedTypes; +class CrstBase; +class BulkTypeEventLogger; +class TypeHandle; +class Thread; + + +// All ETW helpers must be a part of this namespace +// We have auto-generated macros to directly fire the events +// but in some cases, gathering the event payload information involves some work +// and it can be done in a relevant helper class like the one's in this namespace +namespace ETW +{ + // Class to wrap the ETW infrastructure logic + class CEtwTracer + { +#if defined(FEATURE_EVENT_TRACE) && !defined(FEATURE_PAL) + ULONG RegGuids(LPCGUID ProviderId, PENABLECALLBACK EnableCallback, PVOID CallbackContext, PREGHANDLE RegHandle); +#endif // !FEATURE_PAL + + public: +#ifdef FEATURE_EVENT_TRACE + // Registers all the Event Tracing providers + HRESULT Register(); + + // Unregisters all the Event Tracing providers + HRESULT UnRegister(); +#else + HRESULT Register() + { + return S_OK; + } + HRESULT UnRegister() + { + return S_OK; + } +#endif // FEATURE_EVENT_TRACE + }; + + class LoaderLog; + class MethodLog; + // Class to wrap all the enumeration logic for ETW + class EnumerationLog + { + friend class ETW::LoaderLog; + friend class ETW::MethodLog; +#ifdef FEATURE_EVENT_TRACE + static void SendThreadRundownEvent(); + static void IterateDomain(BaseDomain *pDomain, DWORD enumerationOptions); + static void IterateAppDomain(AppDomain * pAppDomain, DWORD enumerationOptions); + static void IterateCollectibleLoaderAllocator(AssemblyLoaderAllocator *pLoaderAllocator, DWORD enumerationOptions); + static void IterateAssembly(Assembly *pAssembly, DWORD enumerationOptions); + static void IterateModule(Module *pModule, DWORD enumerationOptions); + static void EnumerationHelper(Module *moduleFilter, BaseDomain *domainFilter, DWORD enumerationOptions); + static DWORD GetEnumerationOptionsFromRuntimeKeywords(); + public: + typedef union _EnumerationStructs + { + typedef enum _EnumerationOptions + { + None= 0x00000000, + DomainAssemblyModuleLoad= 0x00000001, + DomainAssemblyModuleUnload= 0x00000002, + DomainAssemblyModuleDCStart= 0x00000004, + DomainAssemblyModuleDCEnd= 0x00000008, + JitMethodLoad= 0x00000010, + JitMethodUnload= 0x00000020, + JitMethodDCStart= 0x00000040, + JitMethodDCEnd= 0x00000080, + NgenMethodLoad= 0x00000100, + NgenMethodUnload= 0x00000200, + NgenMethodDCStart= 0x00000400, + NgenMethodDCEnd= 0x00000800, + ModuleRangeLoad= 0x00001000, + ModuleRangeDCStart= 0x00002000, + ModuleRangeDCEnd= 0x00004000, + ModuleRangeLoadPrivate= 0x00008000, + MethodDCStartILToNativeMap= 0x00010000, + MethodDCEndILToNativeMap= 0x00020000, + JitMethodILToNativeMap= 0x00040000, + TypeUnload= 0x00080000, + + // Helpers + ModuleRangeEnabledAny = ModuleRangeLoad | ModuleRangeDCStart | ModuleRangeDCEnd | ModuleRangeLoadPrivate, + JitMethodLoadOrDCStartAny = JitMethodLoad | JitMethodDCStart | MethodDCStartILToNativeMap, + JitMethodUnloadOrDCEndAny = JitMethodUnload | JitMethodDCEnd | MethodDCEndILToNativeMap, + }EnumerationOptions; + }EnumerationStructs; + + static void ProcessShutdown(); + static void ModuleRangeRundown(); + static void StartRundown(); + static void EndRundown(); + static void EnumerateForCaptureState(); +#else + public: + static void ProcessShutdown() {}; + static void StartRundown() {}; + static void EndRundown() {}; +#endif // FEATURE_EVENT_TRACE + }; + + + // Class to wrap all the sampling logic for ETW + class SamplingLog + { + // StackWalk available only when !FEATURE_PAL +#if defined(FEATURE_EVENT_TRACE) && !defined(FEATURE_PAL) + public: + typedef enum _EtwStackWalkStatus + { + Completed = 0, + UnInitialized = 1, + InProgress = 2 + } EtwStackWalkStatus; + private: + static const UINT8 s_MaxStackSize=100; + UINT32 m_FrameCount; + SIZE_T m_EBPStack[SamplingLog::s_MaxStackSize]; + void Append(SIZE_T currentFrame); + EtwStackWalkStatus SaveCurrentStack(int skipTopNFrames=1); + public: + static ULONG SendStackTrace(MCGEN_TRACE_CONTEXT TraceContext, PCEVENT_DESCRIPTOR Descriptor, LPCGUID EventGuid); + EtwStackWalkStatus GetCurrentThreadsCallStack(UINT32 *frameCount, PVOID **Stack); +#endif // FEATURE_EVENT_TRACE && !FEATURE_PAL + }; + + // Class to wrap all Loader logic for ETW + class LoaderLog + { + friend class ETW::EnumerationLog; +#ifdef FEATURE_EVENT_TRACE + static void SendModuleEvent(Module *pModule, DWORD dwEventOptions, BOOL bFireDomainModuleEvents=FALSE); +#if !defined(FEATURE_PAL) + static ULONG SendModuleRange(Module *pModule, DWORD dwEventOptions); +#endif // !FEATURE_PAL + static void SendAssemblyEvent(Assembly *pAssembly, DWORD dwEventOptions); + static void SendDomainEvent(BaseDomain *pBaseDomain, DWORD dwEventOptions, LPCWSTR wszFriendlyName=NULL); + public: + typedef union _LoaderStructs + { + typedef enum _AppDomainFlags + { + DefaultDomain=0x1, + ExecutableDomain=0x2, + SharedDomain=0x4 + }AppDomainFlags; + + typedef enum _AssemblyFlags + { + DomainNeutralAssembly=0x1, + DynamicAssembly=0x2, + NativeAssembly=0x4, + CollectibleAssembly=0x8, + }AssemblyFlags; + + typedef enum _ModuleFlags + { + DomainNeutralModule=0x1, + NativeModule=0x2, + DynamicModule=0x4, + ManifestModule=0x8, + IbcOptimized=0x10 + }ModuleFlags; + + typedef enum _RangeFlags + { + HotRange=0x0 + }RangeFlags; + + }LoaderStructs; + + static void DomainLoadReal(BaseDomain *pDomain, __in_opt LPWSTR wszFriendlyName=NULL); + + static void DomainLoad(BaseDomain *pDomain, __in_opt LPWSTR wszFriendlyName = NULL) + { +#ifndef FEATURE_PAL + if (MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context.IsEnabled) +#endif + { + DomainLoadReal(pDomain, wszFriendlyName); + } + } + + static void DomainUnload(AppDomain *pDomain); + static void CollectibleLoaderAllocatorUnload(AssemblyLoaderAllocator *pLoaderAllocator); + static void ModuleLoad(Module *pModule, LONG liReportedSharedModule); +#else + public: + static void DomainLoad(BaseDomain *pDomain, __in_opt LPWSTR wszFriendlyName=NULL) {}; + static void DomainUnload(AppDomain *pDomain) {}; + static void CollectibleLoaderAllocatorUnload(AssemblyLoaderAllocator *pLoaderAllocator) {}; + static void ModuleLoad(Module *pModule, LONG liReportedSharedModule) {}; +#endif // FEATURE_EVENT_TRACE + }; + + // Class to wrap all Method logic for ETW + class MethodLog + { + friend class ETW::EnumerationLog; +#ifdef FEATURE_EVENT_TRACE + static void SendEventsForJitMethods(BaseDomain *pDomainFilter, LoaderAllocator *pLoaderAllocatorFilter, DWORD dwEventOptions); + static void SendEventsForNgenMethods(Module *pModule, DWORD dwEventOptions); + static void SendMethodJitStartEvent(MethodDesc *pMethodDesc, SString *namespaceOrClassName=NULL, SString *methodName=NULL, SString *methodSignature=NULL); +#ifndef WINXP_AND_WIN2K3_BUILD_SUPPORT + static void SendMethodILToNativeMapEvent(MethodDesc * pMethodDesc, DWORD dwEventOptions, ReJITID rejitID); +#endif // WINXP_AND_WIN2K3_BUILD_SUPPORT + static void SendMethodEvent(MethodDesc *pMethodDesc, DWORD dwEventOptions, BOOL bIsJit, SString *namespaceOrClassName=NULL, SString *methodName=NULL, SString *methodSignature=NULL, SIZE_T pCode = 0, ReJITID rejitID = 0); + static void SendHelperEvent(ULONGLONG ullHelperStartAddress, ULONG ulHelperSize, LPCWSTR pHelperName); + public: + typedef union _MethodStructs + { + typedef enum _MethodFlags + { + DynamicMethod=0x1, + GenericMethod=0x2, + SharedGenericCode=0x4, + JittedMethod=0x8, + JitHelperMethod=0x10 + }MethodFlags; + + typedef enum _MethodExtent + { + HotSection=0x00000000, + ColdSection=0x10000000 + }MethodExtent; + + }MethodStructs; + + static void MethodJitting(MethodDesc *pMethodDesc, SString *namespaceOrClassName=NULL, SString *methodName=NULL, SString *methodSignature=NULL); + static void MethodJitted(MethodDesc *pMethodDesc, SString *namespaceOrClassName=NULL, SString *methodName=NULL, SString *methodSignature=NULL, SIZE_T pCode = 0, ReJITID rejitID = 0); + static void StubInitialized(ULONGLONG ullHelperStartAddress, LPCWSTR pHelperName); + static void StubsInitialized(PVOID *pHelperStartAddresss, PVOID *pHelperNames, LONG ulNoOfHelpers); + static void MethodRestored(MethodDesc * pMethodDesc); + static void MethodTableRestored(MethodTable * pMethodTable); + static void DynamicMethodDestroyed(MethodDesc *pMethodDesc); +#else // FEATURE_EVENT_TRACE + public: + static void MethodJitting(MethodDesc *pMethodDesc, SString *namespaceOrClassName=NULL, SString *methodName=NULL, SString *methodSignature=NULL) {}; + static void MethodJitted(MethodDesc *pMethodDesc, SString *namespaceOrClassName=NULL, SString *methodName=NULL, SString *methodSignature=NULL, SIZE_T pCode = 0, ReJITID rejitID = 0) {}; + static void StubInitialized(ULONGLONG ullHelperStartAddress, LPCWSTR pHelperName) {}; + static void StubsInitialized(PVOID *pHelperStartAddresss, PVOID *pHelperNames, LONG ulNoOfHelpers) {}; + static void MethodRestored(MethodDesc * pMethodDesc) {}; + static void MethodTableRestored(MethodTable * pMethodTable) {}; + static void DynamicMethodDestroyed(MethodDesc *pMethodDesc) {}; +#endif // FEATURE_EVENT_TRACE + }; + + // Class to wrap all Security logic for ETW + class SecurityLog + { +#ifdef FEATURE_EVENT_TRACE + public: + static void StrongNameVerificationStart(DWORD dwInFlags, __in LPWSTR strFullyQualifiedAssemblyName); + static void StrongNameVerificationStop(DWORD dwInFlags,ULONG result, __in LPWSTR strFullyQualifiedAssemblyName); + + static void FireFieldTransparencyComputationStart(LPCWSTR wszFieldName, + LPCWSTR wszModuleName, + DWORD dwAppDomain); + static void FireFieldTransparencyComputationEnd(LPCWSTR wszFieldName, + LPCWSTR wszModuleName, + DWORD dwAppDomain, + BOOL fIsCritical, + BOOL fIsTreatAsSafe); + + static void FireMethodTransparencyComputationStart(LPCWSTR wszMethodName, + LPCWSTR wszModuleName, + DWORD dwAppDomain); + static void FireMethodTransparencyComputationEnd(LPCWSTR wszMethodName, + LPCWSTR wszModuleName, + DWORD dwAppDomain, + BOOL fIsCritical, + BOOL fIsTreatAsSafe); + + static void FireModuleTransparencyComputationStart(LPCWSTR wszModuleName, DWORD dwAppDomain); + static void FireModuleTransparencyComputationEnd(LPCWSTR wszModuleName, + DWORD dwAppDomain, + BOOL fIsAllCritical, + BOOL fIsAllTransparent, + BOOL fIsTreatAsSafe, + BOOL fIsOpportunisticallyCritical, + DWORD dwSecurityRuleSet); + + static void FireTokenTransparencyComputationStart(DWORD dwToken, + LPCWSTR wszModuleName, + DWORD dwAppDomain); + static void FireTokenTransparencyComputationEnd(DWORD dwToken, + LPCWSTR wszModuleName, + DWORD dwAppDomain, + BOOL fIsCritical, + BOOL fIsTreatAsSafe); + + static void FireTypeTransparencyComputationStart(LPCWSTR wszTypeName, + LPCWSTR wszModuleName, + DWORD dwAppDomain); + static void FireTypeTransparencyComputationEnd(LPCWSTR wszTypeName, + LPCWSTR wszModuleName, + DWORD dwAppDomain, + BOOL fIsAllCritical, + BOOL fIsAllTransparent, + BOOL fIsCritical, + BOOL fIsTreatAsSafe); +#else + public: + static void StrongNameVerificationStart(DWORD dwInFlags,LPWSTR strFullyQualifiedAssemblyName) {}; + static void StrongNameVerificationStop(DWORD dwInFlags,ULONG result, LPWSTR strFullyQualifiedAssemblyName) {}; + + static void FireFieldTransparencyComputationStart(LPCWSTR wszFieldName, + LPCWSTR wszModuleName, + DWORD dwAppDomain) {}; + static void FireFieldTransparencyComputationEnd(LPCWSTR wszFieldName, + LPCWSTR wszModuleName, + DWORD dwAppDomain, + BOOL fIsCritical, + BOOL fIsTreatAsSafe) {}; + + static void FireMethodTransparencyComputationStart(LPCWSTR wszMethodName, + LPCWSTR wszModuleName, + DWORD dwAppDomain) {}; + static void FireMethodTransparencyComputationEnd(LPCWSTR wszMethodName, + LPCWSTR wszModuleName, + DWORD dwAppDomain, + BOOL fIsCritical, + BOOL fIsTreatAsSafe) {}; + + static void FireModuleTransparencyComputationStart(LPCWSTR wszModuleName, DWORD dwAppDomain) {}; + static void FireModuleTransparencyComputationEnd(LPCWSTR wszModuleName, + DWORD dwAppDomain, + BOOL fIsAllCritical, + BOOL fIsAllTransparent, + BOOL fIsTreatAsSafe, + BOOL fIsOpportunisticallyCritical, + DWORD dwSecurityRuleSet) {}; + + static void FireTokenTransparencyComputationStart(DWORD dwToken, + LPCWSTR wszModuleName, + DWORD dwAppDomain) {}; + static void FireTokenTransparencyComputationEnd(DWORD dwToken, + LPCWSTR wszModuleName, + DWORD dwAppDomain, + BOOL fIsCritical, + BOOL fIsTreatAsSafe) {}; + + static void FireTypeTransparencyComputationStart(LPCWSTR wszTypeName, + LPCWSTR wszModuleName, + DWORD dwAppDomain) {}; + static void FireTypeTransparencyComputationEnd(LPCWSTR wszTypeName, + LPCWSTR wszModuleName, + DWORD dwAppDomain, + BOOL fIsAllCritical, + BOOL fIsAllTransparent, + BOOL fIsCritical, + BOOL fIsTreatAsSafe) {}; +#endif // FEATURE_EVENT_TRACE + }; + + // Class to wrap all Binder logic for ETW + class BinderLog + { + public: + typedef union _BinderStructs { + typedef enum _NGENBINDREJECT_REASON { + NGEN_BIND_START_BIND = 0, + NGEN_BIND_NO_INDEX = 1, + NGEN_BIND_SYSTEM_ASSEMBLY_NOT_AVAILABLE = 2, + NGEN_BIND_NO_NATIVE_IMAGE = 3, + NGEN_BIND_REJECT_CONFIG_MASK = 4, + NGEN_BIND_FAIL = 5, + NGEN_BIND_INDEX_CORRUPTION = 6, + NGEN_BIND_REJECT_TIMESTAMP = 7, + NGEN_BIND_REJECT_NATIVEIMAGE_NOT_FOUND = 8, + NGEN_BIND_REJECT_IL_SIG = 9, + NGEN_BIND_REJECT_LOADER_EVAL_FAIL = 10, + NGEN_BIND_MISSING_FOUND = 11, + NGEN_BIND_REJECT_HOSTASM = 12, + NGEN_BIND_REJECT_IL_NOT_FOUND = 13, + NGEN_BIND_REJECT_APPBASE_NOT_FILE = 14, + NGEN_BIND_BIND_DEPEND_REJECT_REF_DEF_MISMATCH = 15, + NGEN_BIND_BIND_DEPEND_REJECT_NGEN_SIG = 16, + NGEN_BIND_APPLY_EXTERNAL_RELOCS_FAILED = 17, + NGEN_BIND_SYSTEM_ASSEMBLY_NATIVEIMAGE_NOT_AVAILABLE = 18, + NGEN_BIND_ASSEMBLY_HAS_DIFFERENT_GRANT = 19, + NGEN_BIND_ASSEMBLY_NOT_DOMAIN_NEUTRAL = 20, + NGEN_BIND_NATIVEIMAGE_VERSION_MISMATCH = 21, + NGEN_BIND_LOADFROM_NOT_ALLOWED = 22, + NGEN_BIND_DEPENDENCY_HAS_DIFFERENT_IDENTITY = 23 + } NGENBINDREJECT_REASON; + } BinderStructs; + }; + + // Class to wrap all Exception logic for ETW + class ExceptionLog + { + public: +#ifdef FEATURE_EVENT_TRACE + static void ExceptionThrown(CrawlFrame *pCf, BOOL bIsReThrownException, BOOL bIsNewException); +#else + static void ExceptionThrown(CrawlFrame *pCf, BOOL bIsReThrownException, BOOL bIsNewException) {}; +#endif // FEATURE_EVENT_TRACE + typedef union _ExceptionStructs + { + typedef enum _ExceptionThrownFlags + { + HasInnerException=0x1, + IsNestedException=0x2, + IsReThrownException=0x4, + IsCSE=0x8, + IsCLSCompliant=0x10 + }ExceptionThrownFlags; + }ExceptionStructs; + }; + // Class to wrap all Contention logic for ETW + class ContentionLog + { + public: + typedef union _ContentionStructs + { + typedef enum _ContentionFlags { + ManagedContention=0, + NativeContention=1 + } ContentionFlags; + } ContentionStructs; + }; + // Class to wrap all Interop logic for ETW + class InteropLog + { + public: + }; + + // Class to wrap all Information logic for ETW + class InfoLog + { + public: + typedef union _InfoStructs + { + typedef enum _StartupMode + { + ManagedExe=0x1, + HostedCLR=0x2, + IJW=0x4, + COMActivated=0x8, + Other=0x10 + }StartupMode; + + typedef enum _Sku + { + DesktopCLR=0x1, + CoreCLR=0x2 + }Sku; + + typedef enum _EtwMode + { + Normal=0x0, + Callback=0x1 + }EtwMode; + }InfoStructs; + +#ifdef FEATURE_EVENT_TRACE + static void RuntimeInformation(INT32 type); +#else + static void RuntimeInformation(INT32 type) {}; +#endif // FEATURE_EVENT_TRACE + }; +}; + + +// +// The ONE and only ONE global instantiation of this class +// +extern ETW::CEtwTracer * g_pEtwTracer; +#define ETW_IS_TRACE_ON(level) ( FALSE ) // for fusion which is eventually going to get removed +#define ETW_IS_FLAG_ON(flag) ( FALSE ) // for fusion which is eventually going to get removed + +// Commonly used constats for ETW Assembly Loader and Assembly Binder events. +#define ETWLoadContextNotAvailable (LOADCTX_TYPE_HOSTED + 1) +#define ETWAppDomainIdNotAvailable 0 // Valid AppDomain IDs start from 1 + +#define ETWFieldUnused 0 // Indicates that a particular field in the ETW event payload template is currently unused. + +#define ETWLoaderLoadTypeNotAvailable 0 // Static or Dynamic Load is only valid at LoaderPhaseStart and LoaderPhaseEnd events - for other events, 0 indicates "not available" +#define ETWLoaderStaticLoad 0 // Static reference load +#define ETWLoaderDynamicLoad 1 // Dynamic assembly load + +#if defined(FEATURE_EVENT_TRACE) && !defined(FEATURE_PAL) && !defined(WINXP_AND_WIN2K3_BUILD_SUPPORT) +// "mc.exe -MOF" already generates this block for XP-suported builds inside ClrEtwAll.h; +// on Vista+ builds, mc is run without -MOF, and we still have code that depends on it, so +// we manually place it here. +FORCEINLINE +BOOLEAN __stdcall +McGenEventTracingEnabled( + __in PMCGEN_TRACE_CONTEXT EnableInfo, + __in PCEVENT_DESCRIPTOR EventDescriptor + ) +{ + + if(!EnableInfo){ + return FALSE; + } + + + // + // Check if the event Level is lower than the level at which + // the channel is enabled. + // If the event Level is 0 or the channel is enabled at level 0, + // all levels are enabled. + // + + if ((EventDescriptor->Level <= EnableInfo->Level) || // This also covers the case of Level == 0. + (EnableInfo->Level == 0)) { + + // + // Check if Keyword is enabled + // + + if ((EventDescriptor->Keyword == (ULONGLONG)0) || + ((EventDescriptor->Keyword & EnableInfo->MatchAnyKeyword) && + ((EventDescriptor->Keyword & EnableInfo->MatchAllKeyword) == EnableInfo->MatchAllKeyword))) { + return TRUE; + } + } + + return FALSE; +} +#endif // defined(FEATURE_EVENT_TRACE) && !defined(FEATURE_PAL) && !defined(WINXP_AND_WIN2K3_BUILD_SUPPORT) + + +#if defined(FEATURE_EVENT_TRACE) && !defined(FEATURE_PAL) +ETW_INLINE +ULONG +ETW::SamplingLog::SendStackTrace( + MCGEN_TRACE_CONTEXT TraceContext, + PCEVENT_DESCRIPTOR Descriptor, + LPCGUID EventGuid) +{ +#define ARGUMENT_COUNT_CLRStackWalk 5 + ULONG Result = ERROR_SUCCESS; +typedef struct _MCGEN_TRACE_BUFFER { + EVENT_TRACE_HEADER Header; + EVENT_DATA_DESCRIPTOR EventData[ARGUMENT_COUNT_CLRStackWalk]; +} MCGEN_TRACE_BUFFER; + + REGHANDLE RegHandle = TraceContext.RegistrationHandle; + if(!TraceContext.IsEnabled || !McGenEventTracingEnabled(&TraceContext, Descriptor)) + { + return Result; + } + + PVOID *Stack = NULL; + UINT32 FrameCount = 0; + ETW::SamplingLog stackObj; + if(stackObj.GetCurrentThreadsCallStack(&FrameCount, &Stack) == ETW::SamplingLog::Completed) + { + UCHAR Reserved1=0, Reserved2=0; + UINT16 ClrInstanceId = GetClrInstanceId(); + MCGEN_TRACE_BUFFER TraceBuf; + PEVENT_DATA_DESCRIPTOR EventData = TraceBuf.EventData; + + EventDataDescCreate(&EventData[0], &ClrInstanceId, sizeof(const UINT16) ); + + EventDataDescCreate(&EventData[1], &Reserved1, sizeof(const UCHAR) ); + + EventDataDescCreate(&EventData[2], &Reserved2, sizeof(const UCHAR) ); + + EventDataDescCreate(&EventData[3], &FrameCount, sizeof(const unsigned int) ); + + EventDataDescCreate(&EventData[4], Stack, sizeof(PVOID) * FrameCount ); + +#ifdef WINXP_AND_WIN2K3_BUILD_SUPPORT + if (!McGenPreVista) + { + return PfnEventWrite(RegHandle, Descriptor, ARGUMENT_COUNT_CLRStackWalk, EventData); + } + else + { + const MCGEN_TRACE_CONTEXT* Context = (const MCGEN_TRACE_CONTEXT*)(ULONG_PTR)RegHandle; + // + // Fill in header fields + // + + TraceBuf.Header.GuidPtr = (ULONGLONG)EventGuid; + TraceBuf.Header.Flags = WNODE_FLAG_TRACED_GUID |WNODE_FLAG_USE_GUID_PTR|WNODE_FLAG_USE_MOF_PTR; + TraceBuf.Header.Class.Version = (SHORT)Descriptor->Version; + TraceBuf.Header.Class.Level = Descriptor->Level; + TraceBuf.Header.Class.Type = Descriptor->Opcode; + TraceBuf.Header.Size = sizeof(MCGEN_TRACE_BUFFER); + + return TraceEvent(Context->Logger, &TraceBuf.Header); + } +#else // !WINXP_AND_WIN2K3_BUILD_SUPPORT + return EventWrite(RegHandle, Descriptor, ARGUMENT_COUNT_CLRStackWalk, EventData); +#endif // WINXP_AND_WIN2K3_BUILD_SUPPORT + } + return Result; +}; + +#endif // FEATURE_EVENT_TRACE && !FEATURE_PAL + +#ifdef FEATURE_EVENT_TRACE +#ifdef TARGET_X86 +struct CallStackFrame +{ + struct CallStackFrame* m_Next; + SIZE_T m_ReturnAddress; +}; +#endif // TARGET_X86 +#endif // FEATURE_EVENT_TRACE + +#if defined(FEATURE_EVENT_TRACE) && !defined(FEATURE_PAL) +FORCEINLINE +BOOLEAN __stdcall +McGenEventProviderEnabled( + __in PMCGEN_TRACE_CONTEXT Context, + __in UCHAR Level, + __in ULONGLONG Keyword + ) +{ + if(!Context) { + return FALSE; + } + +#ifdef WINXP_AND_WIN2K3_BUILD_SUPPORT + if(McGenPreVista){ + return ( ((Level <= Context->Level) || (Context->Level == 0)) && + (((ULONG)(Keyword & 0xFFFFFFFF) == 0) || ((ULONG)(Keyword & 0xFFFFFFFF) & Context->Flags))); + } +#endif // WINXP_AND_WIN2K3_BUILD_SUPPORT + + // + // Check if the event Level is lower than the level at which + // the channel is enabled. + // If the event Level is 0 or the channel is enabled at level 0, + // all levels are enabled. + // + + if ((Level <= Context->Level) || // This also covers the case of Level == 0. + (Context->Level == 0)) { + + // + // Check if Keyword is enabled + // + + if ((Keyword == (ULONGLONG)0) || + ((Keyword & Context->MatchAnyKeyword) && + ((Keyword & Context->MatchAllKeyword) == Context->MatchAllKeyword))) { + return TRUE; + } + } + return FALSE; +} +#endif // FEATURE_EVENT_TRACE && !FEATURE_PAL + +#if defined(FEATURE_EVENT_TRACE) && !defined(FEATURE_PAL) + +// This macro only checks if a provider is enabled +// It does not check the flags and keywords for which it is enabled +#define ETW_PROVIDER_ENABLED(ProviderSymbol) \ + ProviderSymbol##_Context.IsEnabled + +#define FireEtwGCPerHeapHistorySpecial(DataPerHeap, DataSize, ClrInstanceId)\ + MCGEN_ENABLE_CHECK(MICROSOFT_WINDOWS_DOTNETRUNTIME_PRIVATE_PROVIDER_Context, GCPerHeapHistory) ?\ + Etw_GCDataPerHeapSpecial(&GCPerHeapHistory, &GarbageCollectionPrivateId, DataPerHeap, DataSize, ClrInstanceId)\ + : ERROR_SUCCESS\ + +// The GC uses this macro around its heap walk so the TypeSystemLog's crst can be held +// for the duration of the walk (if the ETW client has requested type information). +#define ETW_HEAP_WALK_HOLDER(__fShouldWalkHeapRootsForEtw, __fShouldWalkHeapObjectsForEtw) \ + CrstHolderWithState __crstHolderWithState(ETW::TypeSystemLog::GetHashCrst(), ((__fShouldWalkHeapRootsForEtw) || (__fShouldWalkHeapObjectsForEtw))) + +#else + +// For ETM, we rely on DTrace to do the checking +#define ETW_PROVIDER_ENABLED(ProviderSymbol) TRUE +#define FireEtwGCPerHeapHistorySpecial(DataPerHeap, DataSize, ClrInstanceId) 0 + +#endif // FEATURE_EVENT_TRACE && !FEATURE_PAL + +#endif // !FEATURE_REDHAWK +// These parts of the ETW namespace are common for both FEATURE_REDHAWK and +// !FEATURE_REDHAWK builds. + + +struct ProfilingScanContext; +struct ProfilerWalkHeapContext; +class Object; + +namespace ETW +{ + // Class to wrap the logging of threads (runtime and rundown providers) + class ThreadLog + { + private: + static DWORD GetEtwThreadFlags(Thread * pThread); + + public: + static void FireThreadCreated(Thread * pThread); + static void FireThreadDC(Thread * pThread); + }; +}; + +#ifndef FEATURE_REDHAWK + +#ifdef FEATURE_EVENT_TRACE + +// +// Use this macro at the least before calling the Event Macros +// + +#define ETW_TRACING_INITIALIZED(RegHandle) \ + (g_pEtwTracer && RegHandle) + +// +// Use this macro to check if an event is enabled +// if the fields in the event are not cheap to calculate +// +#define ETW_EVENT_ENABLED(Context, EventDescriptor) \ + (MCGEN_ENABLE_CHECK(Context, EventDescriptor)) + +// +// Use this macro to check if a category of events is enabled +// + +#define ETW_CATEGORY_ENABLED(Context, Level, Keyword) \ + (Context.IsEnabled && McGenEventProviderEnabled(&Context, Level, Keyword)) + + + +// +// Special Handling of Startup events +// + +#if defined(FEATURE_EVENT_TRACE) && !defined(FEATURE_PAL) && !defined(WINXP_AND_WIN2K3_BUILD_SUPPORT) +// "mc.exe -MOF" already generates this block for XP-suported builds inside ClrEtwAll.h; +// on Vista+ builds, mc is run without -MOF, and we still have code that depends on it, so +// we manually place it here. +ETW_INLINE +ULONG +CoMofTemplate_h( + __in REGHANDLE RegHandle, + __in PCEVENT_DESCRIPTOR Descriptor, + __in_opt LPCGUID EventGuid, + __in const unsigned short ClrInstanceID + ) +{ +#define ARGUMENT_COUNT_h 1 + ULONG Error = ERROR_SUCCESS; +typedef struct _MCGEN_TRACE_BUFFER { + EVENT_TRACE_HEADER Header; + EVENT_DATA_DESCRIPTOR EventData[ARGUMENT_COUNT_h]; +} MCGEN_TRACE_BUFFER; + + MCGEN_TRACE_BUFFER TraceBuf; + PEVENT_DATA_DESCRIPTOR EventData = TraceBuf.EventData; + + EventDataDescCreate(&EventData[0], &ClrInstanceID, sizeof(const unsigned short) ); + + + { + Error = EventWrite(RegHandle, Descriptor, ARGUMENT_COUNT_h, EventData); + + } + +#ifdef MCGEN_CALLOUT +MCGEN_CALLOUT(RegHandle, + Descriptor, + ARGUMENT_COUNT_h, + EventData); +#endif + + return Error; +} +#endif // defined(FEATURE_EVENT_TRACE) && !defined(FEATURE_PAL) && !defined(WINXP_AND_WIN2K3_BUILD_SUPPORT) + +class ETWTraceStartup { +#ifndef FEATURE_DTRACE + REGHANDLE TraceHandle; + PCEVENT_DESCRIPTOR EventStartDescriptor; + LPCGUID EventStartGuid; + PCEVENT_DESCRIPTOR EventEndDescriptor; + LPCGUID EventEndGuid; +public: + ETWTraceStartup(REGHANDLE _TraceHandle, PCEVENT_DESCRIPTOR _EventStartDescriptor, LPCGUID _EventStartGuid, PCEVENT_DESCRIPTOR _EventEndDescriptor, LPCGUID _EventEndGuid) { + TraceHandle = _TraceHandle; + EventStartDescriptor = _EventStartDescriptor; + EventEndDescriptor = _EventEndDescriptor; + EventStartGuid = _EventStartGuid; + EventEndGuid = _EventEndGuid; + StartupTraceEvent(TraceHandle, EventStartDescriptor, EventStartGuid); + } + ~ETWTraceStartup() { + StartupTraceEvent(TraceHandle, EventEndDescriptor, EventEndGuid); + } + static void StartupTraceEvent(REGHANDLE _TraceHandle, PCEVENT_DESCRIPTOR _EventDescriptor, LPCGUID _EventGuid) { + EVENT_DESCRIPTOR desc = *_EventDescriptor; + if(ETW_TRACING_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_PRIVATE_PROVIDER_Context, desc)) + { +#ifndef FEATURE_PAL + CoMofTemplate_h(MICROSOFT_WINDOWS_DOTNETRUNTIME_PRIVATE_PROVIDER_Context.RegistrationHandle, _EventDescriptor, _EventGuid, GetClrInstanceId()); +#endif // !FEATURE_PAL + } + } +#else //!FEATURE_DTRACE + void (*startFP)(); + void (*endFP)(); +public: + ETWTraceStartup(void (*sFP)(), void (*eFP)()) : startFP (sFP), endFP(eFP) { + (*startFP)(); + } + ~ETWTraceStartup() { + (*endFP)(); + } +#endif //!FEATURE_DTRACE +}; + + + +#else // FEATURE_EVENT_TRACE + +#define ETWOnStartup(StartEventName, EndEventName) +#define ETWFireEvent(EventName) + +// Use this macro at the least before calling the Event Macros +#define ETW_TRACING_INITIALIZED(RegHandle) (FALSE) + +// Use this macro to check if an event is enabled +// if the fields in the event are not cheap to calculate +#define ETW_EVENT_ENABLED(Context, EventDescriptor) (FALSE) + +// Use this macro to check if a category of events is enabled +#define ETW_CATEGORY_ENABLED(Context, Level, Keyword) (FALSE) + +// Use this macro to check if ETW is initialized and the event is enabled +#define ETW_TRACING_ENABLED(Context, EventDescriptor) (FALSE) + +// Use this macro to check if ETW is initialized and the category is enabled +#define ETW_TRACING_CATEGORY_ENABLED(Context, Level, Keyword) (FALSE) + +#endif // FEATURE_EVENT_TRACE + +#endif // FEATURE_REDHAWK + +#endif //_ETWTRACER_HXX_ diff --git a/src/coreclr/src/nativeaot/Runtime/eventtracepriv.h b/src/coreclr/src/nativeaot/Runtime/eventtracepriv.h new file mode 100644 index 0000000000000..c97eb9fc37f3a --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/eventtracepriv.h @@ -0,0 +1,213 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// +// File: eventtracepriv.h +// +// Contains some private definitions used by eventrace.cpp, but that aren't needed by +// clients of eventtrace.cpp, and thus don't belong in eventtrace.h. Also, since +// inclusions of this file are tightly controlled (basically just by eventtrace.cpp), we +// can assume some classes are defined that aren't necessarily defined when eventtrace.h +// is #included (e.g., StackSString and StackSArray). +// +// ============================================================================ + +#ifndef __EVENTTRACEPRIV_H__ +#define __EVENTTRACEPRIV_H__ + +#ifdef FEATURE_REDHAWK +#include "holder.h" +#endif // FEATURE_REDHAWK + +#ifndef _countof +#define _countof(_array) (sizeof(_array)/sizeof(_array[0])) +#endif + +const UINT cbMaxEtwEvent = 64 * 1024; + +//--------------------------------------------------------------------------------------- +// C++ copies of ETW structures +//--------------------------------------------------------------------------------------- + +// !!!!!!! NOTE !!!!!!!! +// The EventStruct* structs are described in the ETW manifest event templates, and the +// LAYOUT MUST MATCH THE MANIFEST EXACTLY! +// !!!!!!! NOTE !!!!!!!! + +#pragma pack(push, 1) + +struct EventStructGCBulkRootEdgeValue +{ + LPVOID RootedNodeAddress; + BYTE GCRootKind; + DWORD GCRootFlag; + LPVOID GCRootID; +}; + +struct EventStructGCBulkRootConditionalWeakTableElementEdgeValue +{ + LPVOID GCKeyNodeID; + LPVOID GCValueNodeID; + LPVOID GCRootID; +}; + +struct EventStructGCBulkNodeValue +{ + LPVOID Address; + ULONGLONG Size; + ULONGLONG TypeID; + ULONGLONG EdgeCount; +}; + +struct EventStructGCBulkEdgeValue +{ + LPVOID Value; + ULONG ReferencingFieldID; +}; + +struct EventStructGCBulkSurvivingObjectRangesValue +{ + LPVOID RangeBase; + ULONGLONG RangeLength; +}; + +struct EventStructGCBulkMovedObjectRangesValue +{ + LPVOID OldRangeBase; + LPVOID NewRangeBase; + ULONGLONG RangeLength; +}; + +// This only contains the fixed-size data at the top of each struct in +// the bulk type event. These fields must still match exactly the initial +// fields of the struct described in the manifest. +struct EventStructBulkTypeFixedSizedData +{ + ULONGLONG TypeID; + ULONGLONG ModuleID; + ULONG TypeNameID; + ULONG Flags; + BYTE CorElementType; +}; + +#pragma pack(pop) + + + +// Represents one instance of the Value struct inside a single BulkType event +class BulkTypeValue +{ +public: + BulkTypeValue(); + void Clear(); + + // How many bytes will this BulkTypeValue take up when written into the actual ETW + // event? + int GetByteCountInEvent() + { + return + sizeof(fixedSizedData) + + sizeof(cTypeParameters) + +#ifdef FEATURE_REDHAWK + sizeof(WCHAR) + // No name in event, so just the null terminator + cTypeParameters * sizeof(ULONGLONG); // Type parameters +#else + (sName.GetCount() + 1) * sizeof(WCHAR) + // Size of name, including null terminator + rgTypeParameters.GetCount() * sizeof(ULONGLONG);// Type parameters +#endif + } + + EventStructBulkTypeFixedSizedData fixedSizedData; + + // Below are the remainder of each struct in the bulk type event (i.e., the + // variable-sized data). The var-sized fields are copied into the event individually + // (not directly), so they don't need to have the same layout as in the ETW manifest + + // This is really a denorm of the size already stored in rgTypeParameters, but we + // need a persistent place to stash this away so EventDataDescCreate & EventWrite + // have a reliable place to copy it from. This is filled in at the last minute, + // when sending the event. (On ProjectN, which doesn't have StackSArray, this is + // filled in earlier and used in more places.) + ULONG cTypeParameters; + +#ifdef FEATURE_REDHAWK + // If > 1 type parameter, this is an array of their EEType*'s + NewArrayHolder rgTypeParameters; + + // If exactly one type parameter, this is its EEType*. (If != 1 type parameter, + // this is 0.) + ULONGLONG ullSingleTypeParameter; +#else // FEATURE_REDHAWK + StackSString sName; + StackSArray rgTypeParameters; +#endif // FEATURE_REDHAWK +}; + +// Encapsulates all the type event batching we need to do. This is used by +// ETW::TypeSystemLog, which calls LogTypeAndParameters for each type to be logged. +// BulkTypeEventLogger will batch each type and its generic type parameters, and flush to +// ETW as necessary. ETW::TypeSystemLog also calls FireBulkTypeEvent directly to force a +// flush (e.g., once at end of GC heap traversal, or on each object allocation). +class BulkTypeEventLogger +{ +private: + + // Estimate of how many bytes we can squeeze in the event data for the value struct + // array. (Intentionally overestimate the size of the non-array parts to keep it safe.) + static const int kMaxBytesTypeValues = (cbMaxEtwEvent - 0x30); + + // Estimate of how many type value elements we can put into the struct array, while + // staying under the ETW event size limit. Note that this is impossible to calculate + // perfectly, since each element of the struct array has variable size. + // + // In addition to the byte-size limit per event, Windows always forces on us a + // max-number-of-descriptors per event, which in the case of BulkType, will kick in + // far sooner. There's a max number of 128 descriptors allowed per event. 2 are used + // for Count + ClrInstanceID. Then 4 per batched value. (Might actually be 3 if there + // are no type parameters to log, but let's overestimate at 4 per value). + static const int kMaxCountTypeValues = (128 - 2) / 4; + // Note: This results in a relatively small batch (about 31 types per event). We + // could increase this substantially by creating a single, contiguous buffer, which + // would let us max out the number of type values to batch by allowing the byte-size + // limit to kick in before the max-descriptor limit. We could esimate that as + // follows: + // + // static const int kMaxCountTypeValues = kMaxBytesTypeValues / + // (sizeof(EventStructBulkTypeFixedSizedData) + + // 200 * sizeof(WCHAR) + // Assume 199 + 1 terminating-NULL character in type name + // sizeof(UINT) + // Type parameter count + // 10 * sizeof(ULONGLONG)); // Assume 10 type parameters + // + // The downside, though, is that we would have to do a lot more copying to fill out + // that buffer before sending the event. It's unclear that increasing the batch size + // is enough of a win to offset all the extra buffer copying. So for now, we'll keep + // the batch size low and avoid extra copying. + + // How many types have we batched? + int m_nBulkTypeValueCount; + + // What is the byte size of all the types we've batched? + int m_nBulkTypeValueByteCount; + + // List of types we've batched. + BulkTypeValue m_rgBulkTypeValues[kMaxCountTypeValues]; + +#ifdef FEATURE_REDHAWK + int LogSingleType(EEType * pEEType); +#else + int LogSingleType(TypeHandle th); +#endif + +public: + BulkTypeEventLogger() : + m_nBulkTypeValueCount(0), + m_nBulkTypeValueByteCount(0) + { + LIMITED_METHOD_CONTRACT; + } + + void LogTypeAndParameters(ULONGLONG thAsAddr, ETW::TypeSystemLog::TypeLogBehavior typeLogBehavior); + void FireBulkTypeEvent(); + void Cleanup(); +}; + +#endif // __EVENTTRACEPRIV_H__ diff --git a/src/coreclr/src/nativeaot/Runtime/forward_declarations.h b/src/coreclr/src/nativeaot/Runtime/forward_declarations.h new file mode 100644 index 0000000000000..4fa221acff928 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/forward_declarations.h @@ -0,0 +1,52 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// This file may be included by header files to forward declare common +// public types. The intent here is that .CPP files should need to +// include fewer header files. + +#define FWD_DECL(x) \ + class x; \ + typedef DPTR(x) PTR_##x; + +// rtu +FWD_DECL(AllocHeap) +FWD_DECL(CObjectHeader) +FWD_DECL(CLREventStatic) +FWD_DECL(CrstHolder) +FWD_DECL(CrstStatic) +FWD_DECL(EEMethodInfo) +FWD_DECL(EECodeManager) +FWD_DECL(EEThreadId) +FWD_DECL(MethodInfo) +FWD_DECL(Module) +FWD_DECL(Object) +FWD_DECL(OBJECTHANDLEHolder) +FWD_DECL(PageEntry) +FWD_DECL(PAL_EnterHolder) +FWD_DECL(PAL_LeaveHolder) +FWD_DECL(SpinLock) +FWD_DECL(RCOBJECTHANDLEHolder) +FWD_DECL(RedhawkGCInterface) +FWD_DECL(RtuObjectRef) +FWD_DECL(RuntimeInstance) +FWD_DECL(StackFrameIterator) +FWD_DECL(SyncClean) +FWD_DECL(SyncState) +FWD_DECL(Thread) +FWD_DECL(ThreadStore) + +#ifdef FEATURE_RWX_MEMORY +namespace rh { + namespace util { + FWD_DECL(MemRange) + FWD_DECL(MemAccessMgr) + FWD_DECL(WriteAccessHolder) + } +} +#endif // FEATURE_RWX_MEMORY + +// inc +FWD_DECL(EEInterfaceInfo) +FWD_DECL(EEType) + diff --git a/src/coreclr/src/nativeaot/Runtime/gcdump.cpp b/src/coreclr/src/nativeaot/Runtime/gcdump.cpp new file mode 100644 index 0000000000000..5bf25316cbc5f --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/gcdump.cpp @@ -0,0 +1,709 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +/***************************************************************************** + * GCDump.cpp + * + * Defines functions to display the GCInfo as defined by the GC-encoding + * spec. The GC information may be either dynamically created by a + * Just-In-Time compiler conforming to the standard code-manager spec, + * or may be persisted by a managed native code compiler conforming + * to the standard code-manager spec. + */ +#include "common.h" + +#if (defined(_DEBUG) || defined(DACCESS_COMPILE)) + +#include "gcenv.h" +#include "varint.h" +#include "gcinfo.h" +#include "gcdump.h" + +/*****************************************************************************/ + +#ifdef DACCESS_COMPILE +static void DacNullPrintf(const char* , ...) {} +#endif + +GCDump::GCDump() +{ +#ifndef DACCESS_COMPILE + // By default, use the standard printf function to dump + GCDump::gcPrintf = (printfFtn) ::printf; +#else + // Default for DAC is a no-op. + GCDump::gcPrintf = DacNullPrintf; +#endif +} + + + +/*****************************************************************************/ + +static const char * const calleeSaveRegMaskBitNumberToName[] = +{ +#if defined(TARGET_X86) + "EBX", + "ESI", + "EDI", + "EBP", +#elif defined(TARGET_AMD64) + "RBX", + "RSI", + "RDI", + "RBP", + "R12", + "R13", + "R14", + "R15" +#elif defined(TARGET_ARM) + "R4", + "R5", + "R6", + "R7", + "R8", + "R9", + "R10", + "R11", + "LR", +#elif defined(TARGET_ARM64) + "LR", + "X19", + "X20", + "X21", + "X22", + "X23", + "X24", + "X25", + "X26", + "X27", + "X28", + "FP", +#else +#error unknown architecture +#endif +}; + +char const * GetReturnKindString(GCInfoHeader::MethodReturnKind returnKind) +{ + switch (returnKind) + { + case GCInfoHeader::MRK_ReturnsScalar: return "scalar"; + case GCInfoHeader::MRK_ReturnsObject: return "object"; + case GCInfoHeader::MRK_ReturnsByref: return "byref"; + case GCInfoHeader::MRK_ReturnsToNative: return "native"; +#if defined(TARGET_ARM64) + case GCInfoHeader::MRK_Scalar_Obj: return "{scalar, object}"; + case GCInfoHeader::MRK_Obj_Obj: return "{object, object}"; + case GCInfoHeader::MRK_Byref_Obj: return "{byref, object}"; + case GCInfoHeader::MRK_Scalar_Byref: return "{scalar, byref}"; + case GCInfoHeader::MRK_Obj_Byref: return "{object, byref}"; + case GCInfoHeader::MRK_Byref_Byref: return "{byref, byref}"; +#endif // defined(TARGET_ARM64) + default: return "???"; + } +} + +char const * GetFramePointerRegister() +{ +#if defined(TARGET_X86) + return "EBP"; +#elif defined(TARGET_AMD64) + return "RBP"; +#elif defined(TARGET_ARM) + return "R7"; +#elif defined(TARGET_ARM64) + return "FP"; +#else +#error unknown architecture +#endif +} + +char const * GetStackPointerRegister() +{ +#if defined(TARGET_X86) + return "ESP"; +#elif defined(TARGET_AMD64) + return "RSP"; +#elif defined(TARGET_ARM) || defined(TARGET_ARM64) + return "SP"; +#else +#error unknown architecture +#endif +} + +size_t FASTCALL GCDump::DumpInfoHeader (PTR_UInt8 gcInfo, + Tables * pTables, + GCInfoHeader * pHeader /* OUT */ + ) +{ + size_t headerSize = 0; + PTR_UInt8 gcInfoStart = gcInfo; + PTR_UInt8 pbStackChanges = 0; + PTR_UInt8 pbUnwindInfo = 0; + + unsigned unwindInfoBlobOffset = VarInt::ReadUnsigned(gcInfo); + bool inlineUnwindInfo = (unwindInfoBlobOffset == 0); + + if (inlineUnwindInfo) + { + // it is inline.. + pbUnwindInfo = gcInfo; + } + else + { + // The offset was adjusted by 1 to reserve the 0 encoding for the inline case, so we re-adjust it to + // the actual offset here. + pbUnwindInfo = pTables->pbUnwindInfoBlob + unwindInfoBlobOffset - 1; + } + + // @TODO: decode all funclet headers as well. + pbStackChanges = pHeader->DecodeHeader(0, pbUnwindInfo, &headerSize ); + + if (inlineUnwindInfo) + gcInfo += headerSize; + + unsigned epilogCount = pHeader->GetEpilogCount(); + bool epilogAtEnd = pHeader->IsEpilogAtEnd(); + + gcPrintf(" prologSize: %d\n", pHeader->GetPrologSize()); + if (pHeader->HasVaryingEpilogSizes()) + gcPrintf(" epilogSize: (varies)\n"); + else + gcPrintf(" epilogSize: %d\n", pHeader->GetFixedEpilogSize()); + + gcPrintf(" epilogCount: %d %s\n", epilogCount, epilogAtEnd ? "[end]" : ""); + gcPrintf(" returnKind: %s\n", GetReturnKindString(pHeader->GetReturnKind())); + gcPrintf(" frameKind: %s", pHeader->HasFramePointer() ? GetFramePointerRegister() : GetStackPointerRegister()); +#ifdef TARGET_AMD64 + if (pHeader->HasFramePointer()) + gcPrintf(" offset: %d", pHeader->GetFramePointerOffset()); +#endif // HOST_AMD64 + gcPrintf("\n"); + gcPrintf(" frameSize: %d\n", pHeader->GetFrameSize()); + + if (pHeader->HasDynamicAlignment()) { + gcPrintf(" alignment: %d\n", (1 << pHeader->GetDynamicAlignment())); + if (pHeader->GetParamPointerReg() != RN_NONE) { + gcPrintf(" paramReg: %d\n", pHeader->GetParamPointerReg()); + } + } + + gcPrintf(" savedRegs: "); + CalleeSavedRegMask savedRegs = pHeader->GetSavedRegs(); + CalleeSavedRegMask mask = (CalleeSavedRegMask) 1; + for (int i = 0; i < RBM_CALLEE_SAVED_REG_COUNT; i++) + { + if (savedRegs & mask) + { + gcPrintf("%s ", calleeSaveRegMaskBitNumberToName[i]); + } + mask = (CalleeSavedRegMask)(mask << 1); + } + gcPrintf("\n"); + +#ifdef TARGET_ARM + gcPrintf(" parmRegsPushedCount: %d\n", pHeader->ParmRegsPushedCount()); +#endif + +#ifdef TARGET_X86 + gcPrintf(" returnPopSize: %d\n", pHeader->GetReturnPopSize()); + if (pHeader->HasStackChanges()) + { + // @TODO: need to read the stack changes string that follows + ASSERT(!"NYI -- stack changes for ESP frames"); + } +#endif + + if (pHeader->ReturnsToNative()) + { + gcPrintf(" reversePinvokeFrameOffset: 0x%02x\n", pHeader->GetReversePinvokeFrameOffset()); + } + + + if (!epilogAtEnd && !pHeader->IsFunclet()) + { + gcPrintf(" epilog offsets: "); + unsigned previousOffset = 0; + for (unsigned idx = 0; idx < epilogCount; idx++) + { + unsigned newOffset = previousOffset + VarInt::ReadUnsigned(gcInfo); + gcPrintf("0x%04x ", newOffset); + if (pHeader->HasVaryingEpilogSizes()) + gcPrintf("(%u bytes) ", VarInt::ReadUnsigned(gcInfo)); + previousOffset = newOffset; + } + gcPrintf("\n"); + } + + return gcInfo - gcInfoStart; +} + +// TODO: Can we unify this code with ReportLocalSlot in RHCodeMan.cpp? +void GCDump::PrintLocalSlot(UInt32 slotNum, GCInfoHeader const * pHeader) +{ + char const * baseReg; + Int32 offset; + + if (pHeader->HasFramePointer()) + { + baseReg = GetFramePointerRegister(); +#ifdef TARGET_ARM + offset = pHeader->GetFrameSize() - ((slotNum + 1) * POINTER_SIZE); +#elif defined(TARGET_ARM64) + if (pHeader->AreFPLROnTop()) + { + offset = -(Int32)((slotNum + 1) * POINTER_SIZE); + } + else + { + offset = (slotNum + 2) * POINTER_SIZE; + } +#elif defined(TARGET_X86) + offset = -pHeader->GetPreservedRegsSaveSize() - (slotNum * POINTER_SIZE); +#elif defined(TARGET_AMD64) + if (pHeader->GetFramePointerOffset() == 0) + { + offset = -pHeader->GetPreservedRegsSaveSize() - (slotNum * POINTER_SIZE); + } + else + { + offset = (slotNum * POINTER_SIZE); + } +#else +#error unknown architecture +#endif + } + else + { + baseReg = GetStackPointerRegister(); + offset = pHeader->GetFrameSize() - ((slotNum + 1) * POINTER_SIZE); + } + + char const * sign = "+"; + if (offset < 0) + { + sign = "-"; + offset = -offset; + } + gcPrintf("local slot 0n%d, [%s%s%02X]\n", slotNum, baseReg, sign, offset); +} + +// Reads a 7-bit-encoded register mask: +// - 0RRRRRRR for non-ARM64 registers and { x0-x6 } ARM64 registers +// - 1RRRRRRR 0RRRRRRR for { x0-x13 } ARM64 registers +// - 1RRRRRRR 1RRRRRRR 000RRRRR for { x0-x15, xip0, xip1, lr } ARM64 registers +// Returns the number of bytes read. +size_t ReadRegisterMaskBy7Bit(PTR_UInt8 pCursor, UInt32* pMask) +{ + UInt32 byte0 = *pCursor; + if (!(byte0 & 0x80)) + { + *pMask = byte0; + return 1; + } + +#if defined(TARGET_ARM64) + UInt32 byte1 = *(pCursor + 1); + if (!(byte1 & 0x80)) + { + // XOR with 0x80 discards the most significant bit of byte0 + *pMask = (byte1 << 7) ^ byte0 ^ 0x80; + return 2; + } + + UInt32 byte2 = *(pCursor + 2); + if (!(byte2 & 0x80)) + { + // XOR with 0x4080 discards the most significant bits of byte0 and byte1 + *pMask = (byte2 << 14) ^ (byte1 << 7) ^ byte0 ^ 0x4080; + return 3; + } +#endif + + UNREACHABLE_MSG("Register mask is too long"); +} + +void GCDump::DumpCallsiteString(UInt32 callsiteOffset, PTR_UInt8 pbCallsiteString, + GCInfoHeader const * pHeader) +{ + gcPrintf("%04x: ", callsiteOffset); + + int count = 0; + UInt8 b; + PTR_UInt8 pCursor = pbCallsiteString; + + bool last = false; + bool first = true; + + do + { + if (!first) + gcPrintf(" "); + + first = false; + + b = *pCursor++; + last = ((b & 0x20) == 0x20); + + switch (b & 0xC0) + { + case 0x00: + { + // case 2 -- "register set" + gcPrintf("%02x | 2 ", b); +#ifdef TARGET_ARM + if (b & CSR_MASK_R4) { gcPrintf("R4 "); count++; } + if (b & CSR_MASK_R5) { gcPrintf("R5 "); count++; } + if (b & CSR_MASK_R6) { gcPrintf("R6 "); count++; } + if (b & CSR_MASK_R7) { gcPrintf("R7 "); count++; } + if (b & CSR_MASK_R8) { gcPrintf("R8 "); count++; } +#elif defined(TARGET_ARM64) + UInt16 regs = (b & 0xF); + if (b & 0x10) { regs |= (*pCursor++ << 4); } + + ASSERT(!(regs & CSR_MASK_LR)); + if (regs & CSR_MASK_X19) { gcPrintf("X19 "); count++; } + if (regs & CSR_MASK_X20) { gcPrintf("X20 "); count++; } + if (regs & CSR_MASK_X21) { gcPrintf("X21 "); count++; } + if (regs & CSR_MASK_X22) { gcPrintf("X22 "); count++; } + if (regs & CSR_MASK_X23) { gcPrintf("X23 "); count++; } + if (regs & CSR_MASK_X24) { gcPrintf("X24 "); count++; } + if (regs & CSR_MASK_X25) { gcPrintf("X25 "); count++; } + if (regs & CSR_MASK_X26) { gcPrintf("X26 "); count++; } + if (regs & CSR_MASK_X27) { gcPrintf("X27 "); count++; } + if (regs & CSR_MASK_X28) { gcPrintf("X28 "); count++; } + if (regs & CSR_MASK_FP ) { gcPrintf("FP " ); count++; } +#elif defined(TARGET_AMD64) + if (b & CSR_MASK_RBX) { gcPrintf("RBX "); count++; } + if (b & CSR_MASK_RSI) { gcPrintf("RSI "); count++; } + if (b & CSR_MASK_RDI) { gcPrintf("RDI "); count++; } + if (b & CSR_MASK_RBP) { gcPrintf("RBP "); count++; } + if (b & CSR_MASK_R12) { gcPrintf("R12 "); count++; } +#elif defined(TARGET_X86) + if (b & CSR_MASK_RBX) { gcPrintf("EBX "); count++; } + if (b & CSR_MASK_RSI) { gcPrintf("ESI "); count++; } + if (b & CSR_MASK_RDI) { gcPrintf("EDI "); count++; } + if (b & CSR_MASK_RBP) { gcPrintf("EBP "); count++; } +#else +#error unknown architecture +#endif + gcPrintf("\n"); + } + break; + + case 0x40: + { + // case 3 -- "register" + const char* regName = "???"; + const char* interior = (b & 0x10) ? "+" : ""; + const char* pinned = (b & 0x08) ? "!" : ""; + + switch (b & 0x7) + { +#ifdef TARGET_ARM + case CSR_NUM_R4: regName = "R4"; break; + case CSR_NUM_R5: regName = "R5"; break; + case CSR_NUM_R6: regName = "R6"; break; + case CSR_NUM_R7: regName = "R7"; break; + case CSR_NUM_R8: regName = "R8"; break; + case CSR_NUM_R9: regName = "R9"; break; + case CSR_NUM_R10: regName = "R10"; break; + case CSR_NUM_R11: regName = "R11"; break; +#elif defined(TARGET_ARM64) + case CSR_NUM_X19: regName = "X19"; break; + case CSR_NUM_X20: regName = "X20"; break; + case CSR_NUM_X21: regName = "X21"; break; + case CSR_NUM_X22: regName = "X22"; break; + case CSR_NUM_X23: regName = "X23"; break; + case CSR_NUM_X24: regName = "X24"; break; + case CSR_NUM_X25: regName = "X25"; break; + case 0: + switch (*pCursor++) + { + case CSR_NUM_X26: regName = "X26"; break; + case CSR_NUM_X27: regName = "X27"; break; + case CSR_NUM_X28: regName = "X28"; break; + case CSR_NUM_FP : regName = "FP" ; break; + } + break; +#elif defined(TARGET_AMD64) + case CSR_NUM_RBX: regName = "RBX"; break; + case CSR_NUM_RSI: regName = "RSI"; break; + case CSR_NUM_RDI: regName = "RDI"; break; + case CSR_NUM_RBP: regName = "RBP"; break; + case CSR_NUM_R12: regName = "R12"; break; + case CSR_NUM_R13: regName = "R13"; break; + case CSR_NUM_R14: regName = "R14"; break; + case CSR_NUM_R15: regName = "R15"; break; +#elif defined(TARGET_X86) + case CSR_NUM_RBX: regName = "EBX"; break; + case CSR_NUM_RSI: regName = "ESI"; break; + case CSR_NUM_RDI: regName = "EDI"; break; + case CSR_NUM_RBP: regName = "EBP"; break; +#else +#error unknown architecture +#endif + } + gcPrintf("%02x | 3 %s%s%s \n", b, regName, interior, pinned); + count++; + } + break; + + case 0x80: + { + if (b & 0x10) + { + // case 4 -- "local slot set" or "common var tail" + if ((b & 0x0f) != 0) + { + gcPrintf("%02x | 4 ", b); + bool isFirst = true; + + int mask = 0x01; + int slotNum = 0; + while (mask <= 0x08) + { + if (b & mask) + { + if (!isFirst) + { + if (!first) + gcPrintf(" "); + gcPrintf(" | "); + } + + PrintLocalSlot(slotNum, pHeader); + + isFirst = false; + count++; + } + mask <<= 1; + slotNum++; + } + } + else + { + unsigned commonVarInx = 0; + if ((b & 0x20) == 0) + commonVarInx = VarInt::ReadUnsigned(pCursor); + + gcPrintf("%02x | 8 set #%04u\n", b, commonVarInx); + } + } + else + { + // case 5 -- "local slot" + int slotNum = (int)(b & 0xF) + 4; + gcPrintf("%02x | 5 ", b); + PrintLocalSlot(slotNum, pHeader); + + count++; + } + } + break; + case 0xC0: + { + if ((b & 0xC7) == 0xC2) + { + // case 7 - live scratch regs + gcPrintf("%02x | 7 ", b); + + UInt32 regs, byrefRegs = 0, pinnedRegs = 0; + pCursor += ReadRegisterMaskBy7Bit(pCursor, ®s); + if (b & 0x10) + pCursor += ReadRegisterMaskBy7Bit(pCursor, &byrefRegs); + if (b & 0x08) + pCursor += ReadRegisterMaskBy7Bit(pCursor, &pinnedRegs); + + for (UInt32 reg = 0; ; reg++) + { + UInt32 regMask = (1 << reg); + if (regMask > regs) + break; + + if (regs & regMask) + { + char* pinned = (pinnedRegs & regMask) ? "!" : ""; + char* interior = (byrefRegs & regMask) ? "+" : ""; + char* regStr = "???"; + + switch (reg) + { +#if defined(TARGET_ARM) + case SR_NUM_R0: regStr = "R0"; break; + case SR_NUM_R1: regStr = "R1"; break; + case SR_NUM_R2: regStr = "R2"; break; + case SR_NUM_R3: regStr = "R3"; break; + case SR_NUM_R12: regStr = "R12"; break; + case SR_NUM_LR: regStr = "LR"; break; +#elif defined(TARGET_ARM64) + case SR_NUM_X0: regStr = "X0"; break; + case SR_NUM_X1: regStr = "X1"; break; + case SR_NUM_X2: regStr = "X2"; break; + case SR_NUM_X3: regStr = "X3"; break; + case SR_NUM_X4: regStr = "X4"; break; + case SR_NUM_X5: regStr = "X5"; break; + case SR_NUM_X6: regStr = "X6"; break; + case SR_NUM_X7: regStr = "X7"; break; + case SR_NUM_X8: regStr = "X8"; break; + case SR_NUM_X9: regStr = "X9"; break; + case SR_NUM_X10: regStr = "X10"; break; + case SR_NUM_X11: regStr = "X11"; break; + case SR_NUM_X12: regStr = "X12"; break; + case SR_NUM_X13: regStr = "X13"; break; + case SR_NUM_X14: regStr = "X14"; break; + case SR_NUM_X15: regStr = "X15"; break; + case SR_NUM_XIP0: regStr = "XIP0"; break; + case SR_NUM_XIP1: regStr = "XIP1"; break; + case SR_NUM_LR: regStr = "LR"; break; +#elif defined(TARGET_AMD64) + case SR_NUM_RAX: regStr = "RAX"; break; + case SR_NUM_RCX: regStr = "RCX"; break; + case SR_NUM_RDX: regStr = "RDX"; break; + case SR_NUM_R8: regStr = "R8"; break; + case SR_NUM_R9: regStr = "R9"; break; + case SR_NUM_R10: regStr = "R10"; break; + case SR_NUM_R11: regStr = "R11"; break; +#elif defined(TARGET_X86) + case SR_NUM_RAX: regStr = "EAX"; break; + case SR_NUM_RCX: regStr = "ECX"; break; + case SR_NUM_RDX: regStr = "EDX"; break; +#else +#error unknown architecture +#endif + } + gcPrintf("%s%s%s ", regStr, interior, pinned); + count++; + } + } + } + else + { + // case 6 - stack slot / stack slot set + gcPrintf("%02x ", b); + unsigned mask = 0; + PTR_UInt8 pInts = pCursor; + unsigned offset = VarInt::ReadUnsigned(pCursor); + const char* interior = (b & 0x10) ? "+" : ""; + const char* pinned = (b & 0x08) ? "!" : ""; + const char* baseReg = (b & 0x04) ? GetFramePointerRegister() : GetStackPointerRegister(); + const char* sign = (b & 0x02) ? "-" : "+"; + if (b & 0x01) + { + mask = VarInt::ReadUnsigned(pCursor); + } + + int c = 1; + while (pInts != pCursor) + { + gcPrintf("%02x ", *pInts++); + c++; + } + + for (; c < 4; c++) + { + gcPrintf(" "); + } + + gcPrintf("| 6 [%s%s%02X]%s%s\n", baseReg, sign, offset, interior, pinned); + count++; + + while (mask > 0) + { + offset += POINTER_SIZE; + if (mask & 1) + { + if (!first) + gcPrintf(" "); + + gcPrintf(" | [%s%s%02X]%s%s\n", baseReg, sign, offset, interior, pinned); + count++; + } + mask >>= 1; + } + } + } + break; + } + } + while (!last); + + //gcPrintf("\n"); +} + +size_t FASTCALL GCDump::DumpGCTable (PTR_UInt8 gcInfo, + Tables * pTables, + const GCInfoHeader& header) +{ + PTR_UInt8 pCursor = gcInfo; + + if (header.HasCommonVars()) + { + UInt32 commonVarCount = VarInt::ReadUnsigned(pCursor); + for (UInt32 i = 0; i < commonVarCount; i++) + { + VarInt::SkipUnsigned(pCursor); + } + } + + // + // Decode the method GC info + // + // 0ddddccc -- SMALL ENCODING + // + // -- dddd is an index into the delta shortcut table + // -- ccc is an offset into the callsite strings blob + // + // 1ddddddd { info offset } -- BIG ENCODING + // + // -- ddddddd is a 7-bit delta + // -- { info offset } is a variable-length unsigned encoding of the offset into the callsite + // strings blob for this callsite. + // + // 10000000 { delta } -- FORWARDER + // + // -- { delta } is a variable-length unsigned encoding of the offset to the next callsite + // + // 11111111 -- STRING TERMINATOR + // + + UInt32 curOffset = 0; + + for (;;) + { + UInt8 b = *pCursor++; + unsigned infoOffset; + + if (b & 0x80) + { + UInt8 lowBits = (b & 0x7F); + // FORWARDER + if (lowBits == 0) + { + curOffset += VarInt::ReadUnsigned(pCursor); + continue; + } + else + if (lowBits == 0x7F) // STRING TERMINATOR + break; + + // BIG ENCODING + curOffset += lowBits; + infoOffset = VarInt::ReadUnsigned(pCursor); + } + else + { + // SMALL ENCODING + infoOffset = (b & 0x7); + curOffset += pTables->pbDeltaShortcutTable[b >> 3]; + } + + DumpCallsiteString(curOffset, pTables->pbCallsiteInfoBlob + infoOffset, &header); + } + + gcPrintf("-------\n"); + + return 0; +} + +#endif // _DEBUG || DACCESS_COMPILE diff --git a/src/coreclr/src/nativeaot/Runtime/gcdump.h b/src/coreclr/src/nativeaot/Runtime/gcdump.h new file mode 100644 index 0000000000000..34617ae2fa1b6 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/gcdump.h @@ -0,0 +1,77 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +/***************************************************************************** + * GCDump.h + * + * Defines functions to display the GCInfo as defined by the GC-encoding + * spec. The GC information may be either dynamically created by a + * Just-In-Time compiler conforming to the standard code-manager spec, + * or may be persisted by a managed native code compiler conforming + * to the standard code-manager spec. + */ + +/*****************************************************************************/ +#ifndef __GCDUMP_H__ +#define __GCDUMP_H__ +/*****************************************************************************/ + +struct GCInfoHeader; + +#ifndef FASTCALL +#define FASTCALL __fastcall +#endif + + +class GCDump +{ +public: + + struct Tables + { + PTR_UInt8 pbDeltaShortcutTable; + PTR_UInt8 pbUnwindInfoBlob; + PTR_UInt8 pbCallsiteInfoBlob; + }; + + + GCDump (); + + /*------------------------------------------------------------------------- + * Dumps the GCInfoHeader to 'stdout' + * gcInfo : Start of the GC info block + * Return value : Size in bytes of the header encoding + */ + + size_t FASTCALL DumpInfoHeader(PTR_UInt8 gcInfo, + Tables * pTables, + GCInfoHeader * header /* OUT */ + ); + + /*------------------------------------------------------------------------- + * Dumps the GC tables to 'stdout' + * gcInfo : Ptr to the start of the table part of the GC info. + * This immediately follows the GCinfo header + * Return value : Size in bytes of the GC table encodings + */ + + size_t FASTCALL DumpGCTable(PTR_UInt8 gcInfo, + Tables * pTables, + const GCInfoHeader& header + ); + + + typedef void (*printfFtn)(const char* fmt, ...); + printfFtn gcPrintf; + + + + //------------------------------------------------------------------------- +protected: + + void PrintLocalSlot(UInt32 slotNum, GCInfoHeader const * pHeader); + void DumpCallsiteString(UInt32 callsiteOffset, PTR_UInt8 pbCallsiteString, GCInfoHeader const * pHeader); +}; + +/*****************************************************************************/ +#endif // __GC_DUMP_H__ +/*****************************************************************************/ diff --git a/src/coreclr/src/nativeaot/Runtime/gcenv.h b/src/coreclr/src/nativeaot/Runtime/gcenv.h new file mode 100644 index 0000000000000..0f00ed4b9984e --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/gcenv.h @@ -0,0 +1,199 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +#ifndef __GCENV_H__ +#define __GCENV_H__ + +#define FEATURE_PREMORTEM_FINALIZATION + +#ifdef _MSC_VER +#pragma warning( disable: 4189 ) // 'hp': local variable is initialized but not referenced -- common in GC +#pragma warning( disable: 4127 ) // conditional expression is constant -- common in GC +#endif + +#include +#include +#include +#include +#include + +#include "sal.h" +#include "gcenv.structs.h" +#include "gcenv.interlocked.h" +#include "gcenv.base.h" +#include "gcenv.os.h" + +#include "Crst.h" +#include "event.h" +#include "CommonTypes.h" +#include "CommonMacros.h" +#include "daccess.h" +#include "TargetPtrs.h" +#include "eetype.h" +#include "ObjectLayout.h" +#include "rheventtrace.h" +#include "PalRedhawkCommon.h" +#include "PalRedhawk.h" +#include "gcrhinterface.h" +#include "gcenv.interlocked.inl" + +#include "slist.h" +#include "RWLock.h" +#include "shash.h" +#include "TypeManager.h" +#include "RuntimeInstance.h" +#include "eetype.inl" +#include "volatile.h" + +#include "gcenv.inl" + +#include "stressLog.h" +#ifdef FEATURE_ETW + + #ifndef _INC_WINDOWS + typedef void* LPVOID; + typedef uint32_t UINT; + typedef void* PVOID; + typedef uint64_t ULONGLONG; + typedef uint32_t ULONG; + typedef int64_t LONGLONG; + typedef uint8_t BYTE; + typedef uint16_t UINT16; + #endif // _INC_WINDOWS + + #include "etwevents.h" + #include "eventtrace.h" + +#else // FEATURE_ETW + + #include "etmdummy.h" + #define ETW_EVENT_ENABLED(e,f) false + +#endif // FEATURE_ETW + +#define MAX_LONGPATH 1024 +#define LOG(x) + +#ifndef YieldProcessor +#define YieldProcessor PalYieldProcessor +#endif + +// Adapter for GC's view of Array +class ArrayBase : Array +{ +public: + DWORD GetNumComponents() + { + return m_Length; + } + + static size_t GetOffsetOfNumComponents() + { + return offsetof(ArrayBase, m_Length); + } +}; + +// +// ----------------------------------------------------------------------------------------------------------- +// +// Bridge GC/HandleTable's version of MethodTable to Redhawk's EEType. Neither component tries to access any +// fields of MethodTable directly so this is mostly just a case of providing all the CLR-style accessors they +// need implemented on top of EEType functionality (we can simply recast the 'this' pointer into an EEType +// pointer). +// +// ****** NOTE: Do NOT attempt to add fields or virtual methods to this class! The pointer passed in 'this' +// ****** really does point to an EEType (there's no such thing as a MethodTable structure in RH). +// +class MethodTable +{ +public: + UInt32 GetBaseSize() { return ((EEType*)this)->get_BaseSize(); } + UInt16 GetComponentSize() { return ((EEType*)this)->get_ComponentSize(); } + UInt16 RawGetComponentSize() { return ((EEType*)this)->get_ComponentSize(); } + UInt32 ContainsPointers() { return ((EEType*)this)->HasReferenceFields(); } + UInt32 ContainsPointersOrCollectible() { return ((EEType*)this)->HasReferenceFields(); } + UInt32_BOOL HasComponentSize() const { return TRUE; } +#ifdef FEATURE_PREMORTEM_FINALIZATION + UInt32_BOOL HasFinalizer() { return ((EEType*)this)->HasFinalizer(); } + UInt32_BOOL HasCriticalFinalizer() { return FALSE; } +#endif // FEATURE_PREMORTEM_FINALIZATION +#ifdef FEATURE_STRUCTALIGN +#ifdef FEATURE_BARTOK + UInt32 GetRequiredAlignment() const { return ((EEType*)this)->get_BaseAlignment(); } +#else // FEATURE_BARTOK + UInt32 GetRequiredAlignment() const { return sizeof(void*); } +#endif // FEATURE_BARTOK +#endif // FEATURE_STRUCTALIGN + bool RequiresAlign8() { return ((EEType*)this)->RequiresAlign8(); } + bool IsValueType() { return ((EEType*)this)->get_IsValueType(); } + UInt32_BOOL SanityCheck() { return ((EEType*)this)->Validate(); } +}; + +class EEConfig +{ + UInt8 m_gcStressMode; + +public: + enum HeapVerifyFlags { + HEAPVERIFY_NONE = 0, + HEAPVERIFY_GC = 1, // Verify the heap at beginning and end of GC + HEAPVERIFY_BARRIERCHECK = 2, // Verify the brick table + HEAPVERIFY_SYNCBLK = 4, // Verify sync block scanning + + // the following options can be used to mitigate some of the overhead introduced + // by heap verification. some options might cause heap verifiction to be less + // effective depending on the scenario. + + HEAPVERIFY_NO_RANGE_CHECKS = 0x10, // Excludes checking if an OBJECTREF is within the bounds of the managed heap + HEAPVERIFY_NO_MEM_FILL = 0x20, // Excludes filling unused segment portions with fill pattern + HEAPVERIFY_POST_GC_ONLY = 0x40, // Performs heap verification post-GCs only (instead of before and after each GC) + HEAPVERIFY_DEEP_ON_COMPACT = 0x80 // Performs deep object verfication only on compacting GCs. + }; + + enum GCStressFlags { + GCSTRESS_NONE = 0, + GCSTRESS_ALLOC = 1, // GC on all allocs and 'easy' places + GCSTRESS_TRANSITION = 2, // GC on transitions to preemtive GC + GCSTRESS_INSTR_JIT = 4, // GC on every allowable JITed instr + GCSTRESS_INSTR_NGEN = 8, // GC on every allowable NGEN instr + GCSTRESS_UNIQUE = 16, // GC only on a unique stack trace + }; + + // This is treated like a constructor--it is not allowed to fail. We have it like this because we don't + // have a CRT to run a static constructor for us. For now, at least, we don't want to do any heavy-weight + // snooping of the environment to control any of these settings, so don't add any code like that here. + void Construct() + { + m_gcStressMode = GCSTRESS_NONE; + } + + GCStressFlags GetGCStressLevel() const { return (GCStressFlags) m_gcStressMode; } + void SetGCStressLevel(int val) { m_gcStressMode = (UInt8) val;} + + bool GetGCAllowVeryLargeObjects () const { return true; } + + // We need conservative GC enabled for some edge cases around ICastable support. This doesn't have much + // impact, it just makes the GC slightly more flexible in dealing with interior references (e.g. we can + // conservatively report an interior reference inside a GC free object or in the non-valid tail of the + // heap). + bool GetGCConservative() const { return true; } +}; +extern EEConfig* g_pConfig; + +EXTERN_C UInt32 _tls_index; +inline UInt16 GetClrInstanceId() +{ + return (UInt16)_tls_index; +} + +class IGCHeap; +typedef DPTR(IGCHeap) PTR_IGCHeap; +typedef DPTR(uint32_t) PTR_uint32_t; + +enum CLRDataEnumMemoryFlags : int; + +/* _TRUNCATE */ +#if !defined (_TRUNCATE) +#define _TRUNCATE ((size_t)-1) +#endif /* !defined (_TRUNCATE) */ + +#endif // __GCENV_H__ diff --git a/src/coreclr/src/nativeaot/Runtime/gchandleutilities.h b/src/coreclr/src/nativeaot/Runtime/gchandleutilities.h new file mode 100644 index 0000000000000..5b994601888e2 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/gchandleutilities.h @@ -0,0 +1,37 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#ifndef _GCHANDLEUTILITIES_H_ +#define _GCHANDLEUTILITIES_H_ + +#include "gcinterface.h" + +extern "C" IGCHandleManager* g_pGCHandleManager; + +class GCHandleUtilities +{ +public: + // Retrieves the GC handle table. + static IGCHandleManager* GetGCHandleManager() + { + LIMITED_METHOD_CONTRACT; + + assert(g_pGCHandleManager != nullptr); + return g_pGCHandleManager; + } + +private: + // This class should never be instantiated. + GCHandleUtilities() = delete; +}; + +// Given a handle, returns an OBJECTREF for the object it refers to. +inline OBJECTREF ObjectFromHandle(OBJECTHANDLE handle) +{ + _ASSERTE(handle); + + // Wrap the raw OBJECTREF and return it + return UNCHECKED_OBJECTREF_TO_OBJECTREF(*PTR_UNCHECKED_OBJECTREF(handle)); +} + +#endif // _GCHANDLEUTILITIES_H_ diff --git a/src/coreclr/src/nativeaot/Runtime/gcheaputilities.cpp b/src/coreclr/src/nativeaot/Runtime/gcheaputilities.cpp new file mode 100644 index 0000000000000..9e924b2d3d2d7 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/gcheaputilities.cpp @@ -0,0 +1,79 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include "common.h" +#include "gcenv.h" +#include "gcheaputilities.h" +#include "gchandleutilities.h" + +#include "gceventstatus.h" + +// This is the global GC heap, maintained by the VM. +GPTR_IMPL(IGCHeap, g_pGCHeap); + +// These globals are variables used within the GC and maintained +// by the EE for use in write barriers. It is the responsibility +// of the GC to communicate updates to these globals to the EE through +// GCToEEInterface::StompWriteBarrier. +GPTR_IMPL_INIT(uint32_t, g_card_table, nullptr); +GPTR_IMPL_INIT(uint8_t, g_lowest_address, nullptr); +GPTR_IMPL_INIT(uint8_t, g_highest_address, nullptr); +GVAL_IMPL_INIT(GCHeapType, g_heap_type, GC_HEAP_INVALID); +uint8_t* g_ephemeral_low = (uint8_t*)1; +uint8_t* g_ephemeral_high = (uint8_t*)~0; + +#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES +uint32_t* g_card_bundle_table = nullptr; +#endif + +IGCHandleManager* g_pGCHandleManager = nullptr; + +GcDacVars g_gc_dac_vars; +GPTR_IMPL(GcDacVars, g_gcDacGlobals); + +// GC entrypoints for the the linked-in GC. These symbols are invoked +// directly if we are not using a standalone GC. +extern "C" HRESULT GC_Initialize( + /* In */ IGCToCLR* clrToGC, + /* Out */ IGCHeap** gcHeap, + /* Out */ IGCHandleManager** gcHandleManager, + /* Out */ GcDacVars* gcDacVars +); + +#ifndef DACCESS_COMPILE + +// Initializes a non-standalone GC. The protocol for initializing a non-standalone GC +// is similar to loading a standalone one, except that the GC_VersionInfo and +// GC_Initialize symbols are linked to directory and thus don't need to be loaded. +// +HRESULT GCHeapUtilities::InitializeDefaultGC() +{ + // we should only call this once on startup. Attempting to load a GC + // twice is an error. + assert(g_pGCHeap == nullptr); + + IGCHeap* heap; + IGCHandleManager* manager; + HRESULT initResult = GC_Initialize(nullptr, &heap, &manager, &g_gc_dac_vars); + if (initResult == S_OK) + { + g_pGCHeap = heap; + g_pGCHandleManager = manager; + g_gcDacGlobals = &g_gc_dac_vars; + LOG((LF_GC, LL_INFO100, "GC load successful\n")); + } + else + { + LOG((LF_GC, LL_FATALERROR, "GC initialization failed with HR = 0x%X\n", initResult)); + } + + return initResult; +} + +void GCHeapUtilities::RecordEventStateChange(bool isPublicProvider, GCEventKeyword keywords, GCEventLevel level) +{ + // CoreRT does not support standalone GC. Call GCEventStatus directly to keep things simple. + GCEventStatus::Set(isPublicProvider ? GCEventProvider_Default : GCEventProvider_Private, keywords, level); +} + +#endif // DACCESS_COMPILE diff --git a/src/coreclr/src/nativeaot/Runtime/gcheaputilities.h b/src/coreclr/src/nativeaot/Runtime/gcheaputilities.h new file mode 100644 index 0000000000000..75e0a14a79619 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/gcheaputilities.h @@ -0,0 +1,99 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#ifndef _GCHEAPUTILITIES_H_ +#define _GCHEAPUTILITIES_H_ + +#include "gcinterface.h" +#include "daccess.h" + +// The singular heap instance. +GPTR_DECL(IGCHeap, g_pGCHeap); + +#ifndef DACCESS_COMPILE +extern "C" { +#endif // !DACCESS_COMPILE +GPTR_DECL(uint8_t,g_lowest_address); +GPTR_DECL(uint8_t,g_highest_address); +GPTR_DECL(uint32_t,g_card_table); +GVAL_DECL(GCHeapType, g_heap_type); +#ifndef DACCESS_COMPILE +} +#endif // !DACCESS_COMPILE + +#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES +extern "C" uint32_t* g_card_bundle_table; +#endif // FEATURE_MANUALLY_MANAGED_CARD_BUNDLES + +extern "C" uint8_t* g_ephemeral_low; +extern "C" uint8_t* g_ephemeral_high; + +// g_gc_dac_vars is a structure of pointers to GC globals that the +// DAC uses. It is not exposed directly to the DAC. +extern GcDacVars g_gc_dac_vars; + +// Instead of exposing g_gc_dac_vars to the DAC, a pointer to it +// is exposed here (g_gcDacGlobals). The reason for this is to avoid +// a problem in which a debugger attaches to a program while the program +// is in the middle of initializing the GC DAC vars - if the "publishing" +// of DAC vars isn't atomic, the debugger could see a partially initialized +// GcDacVars structure. +// +// Instead, the debuggee "publishes" GcDacVars by assigning a pointer to g_gc_dac_vars +// to this global, and the DAC will read this global. +typedef DPTR(GcDacVars) PTR_GcDacVars; +GPTR_DECL(GcDacVars, g_gcDacGlobals); + +// GCHeapUtilities provides a number of static methods +// that operate on the global heap instance. It can't be +// instantiated. +class GCHeapUtilities { +public: + // Retrieves the GC heap. + inline static IGCHeap* GetGCHeap() + { + assert(g_pGCHeap != nullptr); + return g_pGCHeap; + } + + // Returns true if the heap has been initialized, false otherwise. + inline static bool IsGCHeapInitialized() + { + return g_pGCHeap != nullptr; + } + + // Returns true if a the heap is initialized and a garbage collection + // is in progress, false otherwise. + inline static BOOL IsGCInProgress(BOOL bConsiderGCStart = FALSE) + { + return GetGCHeap()->IsGCInProgressHelper(bConsiderGCStart); + } + + // Returns true if the held GC heap is a Server GC heap, false otherwise. + inline static bool IsServerHeap() + { + LIMITED_METHOD_CONTRACT; + +#ifdef FEATURE_SVR_GC + _ASSERTE(g_heap_type != GC_HEAP_INVALID); + return (g_heap_type == GC_HEAP_SVR); +#else + return false; +#endif // FEATURE_SVR_GC + } + +#ifndef DACCESS_COMPILE + // Initializes a non-standalone GC. + static HRESULT InitializeDefaultGC(); + + // Records a change in eventing state. This ultimately will inform the GC that it needs to be aware + // of new events being enabled. + static void RecordEventStateChange(bool isPublicProvider, GCEventKeyword keywords, GCEventLevel level); +#endif // DACCESS_COMPILE + +private: + // This class should never be instantiated. + GCHeapUtilities() = delete; +}; + +#endif // _GCHEAPUTILITIES_H_ diff --git a/src/coreclr/src/nativeaot/Runtime/gcrhenv.cpp b/src/coreclr/src/nativeaot/Runtime/gcrhenv.cpp new file mode 100644 index 0000000000000..830f2a1613b41 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/gcrhenv.cpp @@ -0,0 +1,1519 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// +// This module provides data storage and implementations needed by gcrhenv.h to help provide an isolated build +// and runtime environment in which GC and HandleTable code can exist with minimal modifications from the CLR +// mainline. See gcrhenv.h for a more detailed explanation of how this all fits together. +// + +#include "common.h" + +#include "gcenv.h" +#include "gcheaputilities.h" +#include "gchandleutilities.h" +#include "profheapwalkhelper.h" + +#include "gcenv.ee.h" + +#include "RestrictedCallouts.h" + +#include "gcrhinterface.h" + +#include "slist.h" +#include "varint.h" +#include "regdisplay.h" +#include "StackFrameIterator.h" + +#include "thread.h" + +#include "shash.h" +#include "RWLock.h" +#include "TypeManager.h" +#include "RuntimeInstance.h" +#include "objecthandle.h" +#include "eetype.inl" +#include "RhConfig.h" + +#include "threadstore.h" +#include "threadstore.inl" +#include "thread.inl" + +#include "gcdesc.h" +#include "SyncClean.hpp" + +#include "daccess.h" + +#include "GCMemoryHelpers.h" + +#include "holder.h" +#include "volatile.h" + +#ifdef FEATURE_ETW + #ifndef _INC_WINDOWS + typedef void* LPVOID; + typedef uint32_t UINT; + typedef void* PVOID; + typedef uint64_t ULONGLONG; + typedef uint32_t ULONG; + typedef int64_t LONGLONG; + typedef uint8_t BYTE; + typedef uint16_t UINT16; + #endif // _INC_WINDOWS + + #include "etwevents.h" + #include "eventtrace.h" +#else // FEATURE_ETW + #include "etmdummy.h" + #define ETW_EVENT_ENABLED(e,f) false +#endif // FEATURE_ETW + +GPTR_IMPL(EEType, g_pFreeObjectEEType); + +#include "DebuggerHook.h" + +#include "gctoclreventsink.h" + +#ifndef DACCESS_COMPILE + +bool RhInitializeFinalization(); +bool RhStartFinalizerThread(); +void RhEnableFinalization(); + +// Simplified EEConfig -- It is just a static member, which statically initializes to the default values and +// has no dynamic initialization. Some settings may change at runtime, however. (Example: gcstress is +// enabled via a compiled-in call from a given managed module, not through snooping an environment setting.) +// +static EEConfig s_sDummyConfig; +EEConfig* g_pConfig = &s_sDummyConfig; + +// A few settings are now backed by the cut-down version of Redhawk configuration values. +static RhConfig g_sRhConfig; +RhConfig * g_pRhConfig = &g_sRhConfig; + +#ifdef FEATURE_ETW +// +// ----------------------------------------------------------------------------------------------------------- +// +// The automatically generated part of the Redhawk ETW infrastructure (EtwEvents.h) calls the following +// function whenever the system enables or disables tracing for this provider. +// + +UInt32 EtwCallback(UInt32 IsEnabled, RH_ETW_CONTEXT * pContext) +{ + GCHeapUtilities::RecordEventStateChange(!!(pContext->RegistrationHandle == Microsoft_Windows_Redhawk_GC_PublicHandle), + static_cast(pContext->MatchAnyKeyword), + static_cast(pContext->Level)); + + if (IsEnabled && + (pContext->RegistrationHandle == Microsoft_Windows_Redhawk_GC_PrivateHandle) && + GCHeapUtilities::IsGCHeapInitialized()) + { + FireEtwGCSettings(GCHeapUtilities::GetGCHeap()->GetValidSegmentSize(FALSE), + GCHeapUtilities::GetGCHeap()->GetValidSegmentSize(TRUE), + GCHeapUtilities::IsServerHeap()); + GCHeapUtilities::GetGCHeap()->DiagTraceGCSegments(); + } + + // Special check for the runtime provider's GCHeapCollectKeyword. Profilers + // flick this to force a full GC. + if (IsEnabled && + (pContext->RegistrationHandle == Microsoft_Windows_Redhawk_GC_PublicHandle) && + GCHeapUtilities::IsGCHeapInitialized() && + ((pContext->MatchAnyKeyword & CLR_GCHEAPCOLLECT_KEYWORD) != 0)) + { + // Profilers may (optionally) specify extra data in the filter parameter + // to log with the GCStart event. + LONGLONG l64ClientSequenceNumber = 0; + if ((pContext->FilterData != NULL) && + (pContext->FilterData->Type == 1) && + (pContext->FilterData->Size == sizeof(l64ClientSequenceNumber))) + { + l64ClientSequenceNumber = *(LONGLONG *) (pContext->FilterData->Ptr); + } + ETW::GCLog::ForceGC(l64ClientSequenceNumber); + } + + return 0; +} +#endif // FEATURE_ETW + +// +// ----------------------------------------------------------------------------------------------------------- +// +// The rest of Redhawk needs to be able to talk to the GC/HandleTable code (to initialize it, allocate +// objects etc.) without pulling in the entire adaptation layer provided by this file and gcrhenv.h. To this +// end the rest of Redhawk talks to us via a simple interface described in gcrhinterface.h. We provide the +// implementation behind those APIs here. +// + +// Perform any runtime-startup initialization needed by the GC, HandleTable or environmental code in gcrhenv. +// The boolean parameter should be true if a server GC is required and false for workstation. Returns true on +// success or false if a subsystem failed to initialize. + +#ifndef DACCESS_COMPILE +CrstStatic g_SuspendEELock; +#ifdef _MSC_VER +#pragma warning(disable:4815) // zero-sized array in stack object will have no elements +#endif // _MSC_VER +EEType g_FreeObjectEEType; + +// static +bool RedhawkGCInterface::InitializeSubsystems() +{ + g_pConfig->Construct(); + +#ifdef FEATURE_ETW + MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_Context.IsEnabled = FALSE; + MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Context.IsEnabled = FALSE; + + // Register the Redhawk event provider with the system. + RH_ETW_REGISTER_Microsoft_Windows_Redhawk_GC_Private(); + RH_ETW_REGISTER_Microsoft_Windows_Redhawk_GC_Public(); + + MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_Context.RegistrationHandle = Microsoft_Windows_Redhawk_GC_PrivateHandle; + MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Context.RegistrationHandle = Microsoft_Windows_Redhawk_GC_PublicHandle; +#endif // FEATURE_ETW + + if (!InitializeSystemInfo()) + { + return false; + } + + // Initialize the special EEType used to mark free list entries in the GC heap. + g_FreeObjectEEType.InitializeAsGcFreeType(); + g_pFreeObjectEEType = &g_FreeObjectEEType; + + if (!g_SuspendEELock.InitNoThrow(CrstSuspendEE)) + return false; + +#ifdef FEATURE_SVR_GC + // TODO: This should use the logical CPU count adjusted for process affinity and cgroup limits + g_heap_type = (g_pRhConfig->GetUseServerGC() && PalGetProcessCpuCount() > 1) ? GC_HEAP_SVR : GC_HEAP_WKS; +#else + g_heap_type = GC_HEAP_WKS; +#endif + + HRESULT hr = GCHeapUtilities::InitializeDefaultGC(); + if (FAILED(hr)) + return false; + + // Apparently the Windows linker removes global variables if they are never + // read from, which is a problem for g_gcDacGlobals since it's expected that + // only the DAC will read from it. This forces the linker to include + // g_gcDacGlobals. + volatile void* _dummy = g_gcDacGlobals; + + // Initialize the GC subsystem. + hr = g_pGCHeap->Initialize(); + if (FAILED(hr)) + return false; + + if (!RhInitializeFinalization()) + return false; + + // Initialize HandleTable. + if (!GCHandleUtilities::GetGCHandleManager()->Initialize()) + return false; + + return true; +} +#endif // !DACCESS_COMPILE + +// Allocate an object on the GC heap. +// pEEType - type of the object +// uFlags - GC type flags (see gc.h GC_ALLOC_*) +// cbSize - size in bytes of the final object +// pTransitionFrame- transition frame to make stack crawable +// Returns a pointer to the object allocated or NULL on failure. + +COOP_PINVOKE_HELPER(void*, RhpGcAlloc, (EEType *pEEType, UInt32 uFlags, UIntNative cbSize, void * pTransitionFrame)) +{ + Thread * pThread = ThreadStore::GetCurrentThread(); + + pThread->SetCurrentThreadPInvokeTunnelForGcAlloc(pTransitionFrame); + + ASSERT(!pThread->IsDoNotTriggerGcSet()); + + size_t max_object_size; +#ifdef HOST_64BIT + if (g_pConfig->GetGCAllowVeryLargeObjects()) + { + max_object_size = (INT64_MAX - 7 - min_obj_size); + } + else +#endif // HOST_64BIT + { + max_object_size = (INT32_MAX - 7 - min_obj_size); + } + + if (cbSize >= max_object_size) + return NULL; + + const int MaxArrayLength = 0x7FEFFFFF; + const int MaxByteArrayLength = 0x7FFFFFC7; + + // Impose limits on maximum array length in each dimension to allow efficient + // implementation of advanced range check elimination in future. We have to allow + // higher limit for array of bytes (or one byte structs) for backward compatibility. + // Keep in sync with Array.MaxArrayLength in BCL. + if (cbSize > MaxByteArrayLength /* note: comparing allocation size with element count */) + { + // Ensure the above if check covers the minimal interesting size + static_assert(MaxByteArrayLength < (uint64_t)MaxArrayLength * 2, ""); + + if (pEEType->IsArray()) + { + if (pEEType->get_ComponentSize() != 1) + { + size_t elementCount = (cbSize - pEEType->get_BaseSize()) / pEEType->get_ComponentSize(); + if (elementCount > MaxArrayLength) + return NULL; + } + else + { + size_t elementCount = cbSize - pEEType->get_BaseSize(); + if (elementCount > MaxByteArrayLength) + return NULL; + } + } + } + + if (cbSize > RH_LARGE_OBJECT_SIZE) + uFlags |= GC_ALLOC_LARGE_OBJECT_HEAP; + + // Save the EEType for instrumentation purposes. + RedhawkGCInterface::SetLastAllocEEType(pEEType); + + Object * pObject = GCHeapUtilities::GetGCHeap()->Alloc(pThread->GetAllocContext(), cbSize, uFlags); + + // NOTE: we cannot call PublishObject here because the object isn't initialized! + + return pObject; +} + +// returns the object pointer for caller's convenience +COOP_PINVOKE_HELPER(void*, RhpPublishObject, (void* pObject, UIntNative cbSize)) +{ + UNREFERENCED_PARAMETER(cbSize); + ASSERT(cbSize >= LARGE_OBJECT_SIZE); + GCHeapUtilities::GetGCHeap()->PublishObject((uint8_t*)pObject); + return pObject; +} + +// static +void RedhawkGCInterface::InitAllocContext(gc_alloc_context * pAllocContext) +{ + // NOTE: This method is currently unused because the thread's alloc_context is initialized via + // static initialization of tls_CurrentThread. If the initial contents of the alloc_context + // ever change, then a matching change will need to be made to the tls_CurrentThread static + // initializer. + + pAllocContext->init(); +} + +// static +void RedhawkGCInterface::ReleaseAllocContext(gc_alloc_context * pAllocContext) +{ + s_DeadThreadsNonAllocBytes += pAllocContext->alloc_limit - pAllocContext->alloc_ptr; + GCHeapUtilities::GetGCHeap()->FixAllocContext(pAllocContext, NULL, NULL); +} + +// static +void RedhawkGCInterface::WaitForGCCompletion() +{ + GCHeapUtilities::GetGCHeap()->WaitUntilGCComplete(); +} + +//------------------------------------------------------------------------------------------------- +// Used only by GC initialization, this initializes the EEType used to mark free entries in the GC heap. It +// should be an array type with a component size of one (so the GC can easily size it as appropriate) and +// should be marked as not containing any references. The rest of the fields don't matter: the GC does not +// query them and the rest of the runtime will never hold a reference to free object. + +void EEType::InitializeAsGcFreeType() +{ + m_usComponentSize = 1; + m_usFlags = ParameterizedEEType; + m_uBaseSize = sizeof(Array) + SYNC_BLOCK_SKEW; +} + +#endif // !DACCESS_COMPILE + +extern void GcEnumObject(PTR_OBJECTREF pObj, UInt32 flags, EnumGcRefCallbackFunc * fnGcEnumRef, EnumGcRefScanContext * pSc); +extern void GcEnumObjectsConservatively(PTR_OBJECTREF pLowerBound, PTR_OBJECTREF pUpperBound, EnumGcRefCallbackFunc * fnGcEnumRef, EnumGcRefScanContext * pSc); +extern void GcBulkEnumObjects(PTR_OBJECTREF pObjs, DWORD cObjs, EnumGcRefCallbackFunc * fnGcEnumRef, EnumGcRefScanContext * pSc); + +struct EnumGcRefContext : GCEnumContext +{ + EnumGcRefCallbackFunc * f; + EnumGcRefScanContext * sc; +}; + +bool IsOnReadablePortionOfThread(EnumGcRefScanContext * pSc, PTR_VOID pointer) +{ + if (!pSc->thread_under_crawl->IsWithinStackBounds(pointer)) + { + return false; + } + + // If the stack_limit is 0, then it wasn't set properly, and the check below will not + // operate correctly. + ASSERT(pSc->stack_limit != 0); + + // This ensures that the pointer is not in a currently-unused portion of the stack + // because the above check is only verifying against the entire stack bounds, + // but stack_limit is describing the current bound of the stack + if (PTR_TO_TADDR(pointer) < pSc->stack_limit) + { + return false; + } + return true; +} + +#ifdef HOST_64BIT +#define CONSERVATIVE_REGION_MAGIC_NUMBER 0x87DF7A104F09E0A9ULL +#else +#define CONSERVATIVE_REGION_MAGIC_NUMBER 0x4F09E0A9 +#endif + +// This is a structure that is created by executing runtime code in order to report a conservative +// region. In managed code if there is a pinned byref pointer to one of this (with the appropriate +// magic number set in it, and a hash that matches up) then the region from regionPointerLow to +// regionPointerHigh will be reported conservatively. This can only be used to report memory regions +// on the current stack and the structure must itself be located on the stack. +struct ConservativelyReportedRegionDesc +{ + // If this is really a ConservativelyReportedRegionDesc then the magic value will be + // CONSERVATIVE_REGION_MAGIC_NUMBER, and the hash will be the result of CalculateHash + // across magic, regionPointerLow, and regionPointerHigh + uintptr_t magic; + PTR_VOID regionPointerLow; + PTR_VOID regionPointerHigh; + uintptr_t hash; + + static uintptr_t CalculateHash(uintptr_t h1, uintptr_t h2, uintptr_t h3) + { + uintptr_t hash = h1; + hash = ((hash << 13) ^ hash) ^ h2; + hash = ((hash << 13) ^ hash) ^ h3; + return hash; + } +}; + +typedef DPTR(ConservativelyReportedRegionDesc) PTR_ConservativelyReportedRegionDesc; + +bool IsPtrAligned(TADDR value) +{ + return (value & (POINTER_SIZE - 1)) == 0; +} + +// Logic to actually conservatively report a ConservativelyReportedRegionDesc +// This logic is to be used when attempting to promote a pinned, interior pointer. +// It will attempt to heuristically identify ConservativelyReportedRegionDesc structures +// and if they exist, it will conservatively report a memory region. +static void ReportExplicitConservativeReportedRegionIfValid(EnumGcRefContext * pCtx, PTR_PTR_VOID pObject) +{ + // If the stack_limit isn't set (which can only happen for frames which make a p/invoke call + // there cannot be a ConservativelyReportedRegionDesc + if (pCtx->sc->stack_limit == 0) + return; + + PTR_ConservativelyReportedRegionDesc conservativeRegionDesc = (PTR_ConservativelyReportedRegionDesc)(*pObject); + + // Ensure that conservativeRegionDesc pointer points at a readable memory region + if (!IsPtrAligned(PTR_TO_TADDR(conservativeRegionDesc))) + { + return; + } + + if (!IsOnReadablePortionOfThread(pCtx->sc, conservativeRegionDesc)) + { + return; + } + if (!IsOnReadablePortionOfThread(pCtx->sc, conservativeRegionDesc + 1)) + { + return; + } + + // Now, check to see if what we're pointing at is actually a ConservativeRegionDesc + // First: check the magic number. If that doesn't match, it cannot be one + if (conservativeRegionDesc->magic != CONSERVATIVE_REGION_MAGIC_NUMBER) + { + return; + } + + // Second: check to see that the region pointers point at memory which is aligned + // such that the pointers could be pointers to object references + if (!IsPtrAligned(PTR_TO_TADDR(conservativeRegionDesc->regionPointerLow))) + { + return; + } + if (!IsPtrAligned(PTR_TO_TADDR(conservativeRegionDesc->regionPointerHigh))) + { + return; + } + + // Third: check that start is before end. + if (conservativeRegionDesc->regionPointerLow >= conservativeRegionDesc->regionPointerHigh) + { + return; + } + +#ifndef DACCESS_COMPILE + // This fails for cross-bitness dac compiles and isn't really needed in the DAC anyways. + + // Fourth: Compute a hash of the above numbers. Check to see that the hash matches the hash + // value stored + if (ConservativelyReportedRegionDesc::CalculateHash(CONSERVATIVE_REGION_MAGIC_NUMBER, + (uintptr_t)PTR_TO_TADDR(conservativeRegionDesc->regionPointerLow), + (uintptr_t)PTR_TO_TADDR(conservativeRegionDesc->regionPointerHigh)) + != conservativeRegionDesc->hash) + { + return; + } +#endif // DACCESS_COMPILE + + // Fifth: Check to see that the region pointed at is within the bounds of the thread + if (!IsOnReadablePortionOfThread(pCtx->sc, conservativeRegionDesc->regionPointerLow)) + { + return; + } + if (!IsOnReadablePortionOfThread(pCtx->sc, ((PTR_OBJECTREF)conservativeRegionDesc->regionPointerHigh) - 1)) + { + return; + } + + // At this point we're most likely working with a ConservativeRegionDesc. We'll assume + // that's true, and perform conservative reporting. (We've done enough checks to ensure that + // this conservative reporting won't itself cause an AV, even if our heuristics are wrong + // with the second and fifth set of checks) + GcEnumObjectsConservatively((PTR_OBJECTREF)conservativeRegionDesc->regionPointerLow, (PTR_OBJECTREF)conservativeRegionDesc->regionPointerHigh, pCtx->f, pCtx->sc); +} + +static void EnumGcRefsCallback(void * hCallback, PTR_PTR_VOID pObject, UInt32 flags) +{ + EnumGcRefContext * pCtx = (EnumGcRefContext *)hCallback; + + GcEnumObject((PTR_OBJECTREF)pObject, flags, pCtx->f, pCtx->sc); + + const UInt32 interiorPinned = GC_CALL_INTERIOR | GC_CALL_PINNED; + // If this is an interior pinned pointer, check to see if we're working with a ConservativeRegionDesc + // and if so, report a conservative region. NOTE: do this only during promotion as conservative + // reporting has no value during other GC phases. + if (((flags & interiorPinned) == interiorPinned) && (pCtx->sc->promotion)) + { + ReportExplicitConservativeReportedRegionIfValid(pCtx, pObject); + } +} + +// static +void RedhawkGCInterface::EnumGcRefs(ICodeManager * pCodeManager, + MethodInfo * pMethodInfo, + PTR_VOID safePointAddress, + REGDISPLAY * pRegisterSet, + void * pfnEnumCallback, + void * pvCallbackData) +{ + EnumGcRefContext ctx; + ctx.pCallback = EnumGcRefsCallback; + ctx.f = (EnumGcRefCallbackFunc *)pfnEnumCallback; + ctx.sc = (EnumGcRefScanContext *)pvCallbackData; + ctx.sc->stack_limit = pRegisterSet->GetSP(); + + pCodeManager->EnumGcRefs(pMethodInfo, + safePointAddress, + pRegisterSet, + &ctx); +} + +// static +void RedhawkGCInterface::EnumGcRefsInRegionConservatively(PTR_RtuObjectRef pLowerBound, + PTR_RtuObjectRef pUpperBound, + void * pfnEnumCallback, + void * pvCallbackData) +{ + GcEnumObjectsConservatively((PTR_OBJECTREF)pLowerBound, (PTR_OBJECTREF)pUpperBound, (EnumGcRefCallbackFunc *)pfnEnumCallback, (EnumGcRefScanContext *)pvCallbackData); +} + +// static +void RedhawkGCInterface::EnumGcRef(PTR_RtuObjectRef pRef, GCRefKind kind, void * pfnEnumCallback, void * pvCallbackData) +{ + ASSERT((GCRK_Object == kind) || (GCRK_Byref == kind)); + + DWORD flags = 0; + + if (kind == GCRK_Byref) + { + flags |= GC_CALL_INTERIOR; + } + + GcEnumObject((PTR_OBJECTREF)pRef, flags, (EnumGcRefCallbackFunc *)pfnEnumCallback, (EnumGcRefScanContext *)pvCallbackData); +} + +#ifndef DACCESS_COMPILE + +// static +void RedhawkGCInterface::BulkEnumGcObjRef(PTR_RtuObjectRef pRefs, UInt32 cRefs, void * pfnEnumCallback, void * pvCallbackData) +{ + GcBulkEnumObjects((PTR_OBJECTREF)pRefs, cRefs, (EnumGcRefCallbackFunc *)pfnEnumCallback, (EnumGcRefScanContext *)pvCallbackData); +} + +// static +GcSegmentHandle RedhawkGCInterface::RegisterFrozenSegment(void * pSection, size_t SizeSection) +{ +#ifdef FEATURE_BASICFREEZE + segment_info seginfo; + + seginfo.pvMem = pSection; + seginfo.ibFirstObject = sizeof(ObjHeader); + seginfo.ibAllocated = SizeSection; + seginfo.ibCommit = seginfo.ibAllocated; + seginfo.ibReserved = seginfo.ibAllocated; + + return (GcSegmentHandle)GCHeapUtilities::GetGCHeap()->RegisterFrozenSegment(&seginfo); +#else // FEATURE_BASICFREEZE + return NULL; +#endif // FEATURE_BASICFREEZE +} + +// static +void RedhawkGCInterface::UnregisterFrozenSegment(GcSegmentHandle segment) +{ + GCHeapUtilities::GetGCHeap()->UnregisterFrozenSegment((segment_handle)segment); +} + +EXTERN_C UInt32_BOOL g_fGcStressStarted = UInt32_FALSE; // UInt32_BOOL because asm code reads it +#ifdef FEATURE_GC_STRESS +// static +void RedhawkGCInterface::StressGc() +{ + // The GarbageCollect operation below may trash the last win32 error. We save the error here so that it can be + // restored after the GC operation; + Int32 lastErrorOnEntry = PalGetLastError(); + + if (g_fGcStressStarted && !ThreadStore::GetCurrentThread()->IsSuppressGcStressSet() && !ThreadStore::GetCurrentThread()->IsDoNotTriggerGcSet()) + { + GCHeapUtilities::GetGCHeap()->GarbageCollect(); + } + + // Restore the saved error + PalSetLastError(lastErrorOnEntry); +} +#endif // FEATURE_GC_STRESS + + +#ifdef FEATURE_GC_STRESS +COOP_PINVOKE_HELPER(void, RhpInitializeGcStress, ()) +{ + g_fGcStressStarted = UInt32_TRUE; + g_pConfig->SetGCStressLevel(EEConfig::GCSTRESS_INSTR_NGEN); // this is the closest CLR equivalent to what we do. +} +#endif // FEATURE_GC_STRESS + +#endif // !DACCESS_COMPILE + +// +// Support for scanning the GC heap, objects and roots. +// + +// Enumerate every reference field in an object, calling back to the specified function with the given context +// for each such reference found. +// static +void RedhawkGCInterface::ScanObject(void *pObject, GcScanObjectFunction pfnScanCallback, void *pContext) +{ +#if !defined(DACCESS_COMPILE) && defined(FEATURE_EVENT_TRACE) + GCHeapUtilities::GetGCHeap()->DiagWalkObject((Object*)pObject, (walk_fn)pfnScanCallback, pContext); +#else + UNREFERENCED_PARAMETER(pObject); + UNREFERENCED_PARAMETER(pfnScanCallback); + UNREFERENCED_PARAMETER(pContext); +#endif // DACCESS_COMPILE +} + +// When scanning for object roots we use existing GC APIs used for object promotion and moving. We use an +// adapter callback to transform the promote function signature used for these methods into something simpler +// that avoids exposing unnecessary implementation details. The pointer to a ScanContext normally passed to +// promotion functions is actually a pointer to the structure below which serves to recall the actual function +// pointer and context for the real context. +struct ScanRootsContext +{ + GcScanRootFunction m_pfnCallback; + void * m_pContext; +}; + +// Callback with a EnumGcRefCallbackFunc signature that forwards the call to a callback with a GcScanFunction signature +// and its own context. +void ScanRootsCallbackWrapper(Object** pObject, EnumGcRefScanContext* pContext, DWORD dwFlags) +{ + UNREFERENCED_PARAMETER(dwFlags); + + ScanRootsContext * pRealContext = (ScanRootsContext*)pContext; + + (*pRealContext->m_pfnCallback)((void**)&pObject, pRealContext->m_pContext); +} + +// Enumerate all the object roots located on the specified thread's stack. It is only safe to call this from +// the context of a GC. +// +// static +void RedhawkGCInterface::ScanStackRoots(Thread *pThread, GcScanRootFunction pfnScanCallback, void *pContext) +{ +#ifndef DACCESS_COMPILE + ScanRootsContext sContext; + sContext.m_pfnCallback = pfnScanCallback; + sContext.m_pContext = pContext; + + pThread->GcScanRoots(reinterpret_cast(ScanRootsCallbackWrapper), &sContext); +#else + UNREFERENCED_PARAMETER(pThread); + UNREFERENCED_PARAMETER(pfnScanCallback); + UNREFERENCED_PARAMETER(pContext); +#endif // !DACCESS_COMPILE +} + +// Enumerate all the object roots located in statics. It is only safe to call this from the context of a GC. +// +// static +void RedhawkGCInterface::ScanStaticRoots(GcScanRootFunction pfnScanCallback, void *pContext) +{ +#ifndef DACCESS_COMPILE + ScanRootsContext sContext; + sContext.m_pfnCallback = pfnScanCallback; + sContext.m_pContext = pContext; + + GetRuntimeInstance()->EnumAllStaticGCRefs(reinterpret_cast(ScanRootsCallbackWrapper), &sContext); +#else + UNREFERENCED_PARAMETER(pfnScanCallback); + UNREFERENCED_PARAMETER(pContext); +#endif // !DACCESS_COMPILE +} + +// Enumerate all the object roots located in handle tables. It is only safe to call this from the context of a +// GC. +// +// static +void RedhawkGCInterface::ScanHandleTableRoots(GcScanRootFunction pfnScanCallback, void *pContext) +{ +#if !defined(DACCESS_COMPILE) && defined(FEATURE_EVENT_TRACE) + ScanRootsContext sContext; + sContext.m_pfnCallback = pfnScanCallback; + sContext.m_pContext = pContext; + Ref_ScanPointers(2, 2, (EnumGcRefScanContext*)&sContext, ScanRootsCallbackWrapper); +#else + UNREFERENCED_PARAMETER(pfnScanCallback); + UNREFERENCED_PARAMETER(pContext); +#endif // !DACCESS_COMPILE +} + +#ifndef DACCESS_COMPILE + +UInt32 RedhawkGCInterface::GetGCDescSize(void * pType) +{ + MethodTable * pMT = (MethodTable *)pType; + + if (!pMT->ContainsPointersOrCollectible()) + return 0; + + return (UInt32)CGCDesc::GetCGCDescFromMT(pMT)->GetSize(); +} + +COOP_PINVOKE_HELPER(void, RhpCopyObjectContents, (Object* pobjDest, Object* pobjSrc)) +{ + size_t cbDest = pobjDest->GetSize() - sizeof(ObjHeader); + size_t cbSrc = pobjSrc->GetSize() - sizeof(ObjHeader); + if (cbSrc != cbDest) + return; + + ASSERT(pobjDest->get_EEType()->HasReferenceFields() == pobjSrc->get_EEType()->HasReferenceFields()); + + if (pobjDest->get_EEType()->HasReferenceFields()) + { + GCSafeCopyMemoryWithWriteBarrier(pobjDest, pobjSrc, cbDest); + } + else + { + memcpy(pobjDest, pobjSrc, cbDest); + } +} + +COOP_PINVOKE_HELPER(Boolean, RhCompareObjectContentsAndPadding, (Object* pObj1, Object* pObj2)) +{ + ASSERT(pObj1->get_EEType()->IsEquivalentTo(pObj2->get_EEType())); + EEType * pEEType = pObj1->get_EEType(); + size_t cbFields = pEEType->get_BaseSize() - (sizeof(ObjHeader) + sizeof(EEType*)); + + UInt8 * pbFields1 = (UInt8*)pObj1 + sizeof(EEType*); + UInt8 * pbFields2 = (UInt8*)pObj2 + sizeof(EEType*); + + return (memcmp(pbFields1, pbFields2, cbFields) == 0) ? Boolean_true : Boolean_false; +} + +// Thread static representing the last allocation. +// This is used to log the type information for each slow allocation. +DECLSPEC_THREAD +EEType * RedhawkGCInterface::tls_pLastAllocationEEType = NULL; + +// Get the last allocation for this thread. +EEType * RedhawkGCInterface::GetLastAllocEEType() +{ + return tls_pLastAllocationEEType; +} + +// Set the last allocation for this thread. +void RedhawkGCInterface::SetLastAllocEEType(EEType * pEEType) +{ + tls_pLastAllocationEEType = pEEType; +} + +uint64_t RedhawkGCInterface::s_DeadThreadsNonAllocBytes = 0; + +uint64_t RedhawkGCInterface::GetDeadThreadsNonAllocBytes() +{ +#ifdef HOST_64BIT + return s_DeadThreadsNonAllocBytes; +#else + // As it could be noticed we read 64bit values that may be concurrently updated. + // Such reads are not guaranteed to be atomic on 32bit so extra care should be taken. + return PalInterlockedCompareExchange64((Int64*)&s_DeadThreadsNonAllocBytes, 0, 0); +#endif +} + +void RedhawkGCInterface::DestroyTypedHandle(void * handle) +{ + GCHandleUtilities::GetGCHandleManager()->DestroyHandleOfUnknownType((OBJECTHANDLE)handle); +} + +void* RedhawkGCInterface::CreateTypedHandle(void* pObject, int type) +{ + return (void*)GCHandleUtilities::GetGCHandleManager()->GetGlobalHandleStore()->CreateHandleOfType((Object*)pObject, (HandleType)type); +} + +void GCToEEInterface::SuspendEE(SUSPEND_REASON reason) +{ +#ifdef FEATURE_EVENT_TRACE + ETW::GCLog::ETW_GC_INFO Info; + Info.SuspendEE.Reason = reason; + Info.SuspendEE.GcCount = (((reason == SUSPEND_FOR_GC) || (reason == SUSPEND_FOR_GC_PREP)) ? + (UInt32)GCHeapUtilities::GetGCHeap()->GetGcCount() : (UInt32)-1); +#endif // FEATURE_EVENT_TRACE + + FireEtwGCSuspendEEBegin_V1(Info.SuspendEE.Reason, Info.SuspendEE.GcCount, GetClrInstanceId()); + + g_SuspendEELock.Enter(); + + GCHeapUtilities::GetGCHeap()->SetGCInProgress(TRUE); + + GetThreadStore()->SuspendAllThreads(true); + + FireEtwGCSuspendEEEnd_V1(GetClrInstanceId()); + +#ifdef APP_LOCAL_RUNTIME + // now is a good opportunity to retry starting the finalizer thread + RhStartFinalizerThread(); +#endif +} + +void GCToEEInterface::RestartEE(bool /*bFinishedGC*/) +{ + FireEtwGCRestartEEBegin_V1(GetClrInstanceId()); + + SyncClean::CleanUp(); + + GetThreadStore()->ResumeAllThreads(true); + GCHeapUtilities::GetGCHeap()->SetGCInProgress(FALSE); + + g_SuspendEELock.Leave(); + + FireEtwGCRestartEEEnd_V1(GetClrInstanceId()); +} + +void GCToEEInterface::GcStartWork(int condemned, int /*max_gen*/) +{ + DebuggerHook::OnBeforeGcCollection(); + + // Invoke any registered callouts for the start of the collection. + RestrictedCallouts::InvokeGcCallouts(GCRC_StartCollection, condemned); +} + +// EE can perform post stack scanning action, while the user threads are still suspended +void GCToEEInterface::AfterGcScanRoots(int condemned, int /*max_gen*/, ScanContext* /*sc*/) +{ + // Invoke any registered callouts for the end of the mark phase. + RestrictedCallouts::InvokeGcCallouts(GCRC_AfterMarkPhase, condemned); +} + +void GCToEEInterface::GcBeforeBGCSweepWork() +{ +} + +void GCToEEInterface::GcDone(int condemned) +{ + // Invoke any registered callouts for the end of the collection. + RestrictedCallouts::InvokeGcCallouts(GCRC_EndCollection, condemned); +} + +bool GCToEEInterface::RefCountedHandleCallbacks(Object * pObject) +{ + return RestrictedCallouts::InvokeRefCountedHandleCallbacks(pObject); +} + +void GCToEEInterface::SyncBlockCacheWeakPtrScan(HANDLESCANPROC /*scanProc*/, uintptr_t /*lp1*/, uintptr_t /*lp2*/) +{ +} + +void GCToEEInterface::SyncBlockCacheDemote(int /*max_gen*/) +{ +} + +void GCToEEInterface::SyncBlockCachePromotionsGranted(int /*max_gen*/) +{ +} + +uint32_t GCToEEInterface::GetActiveSyncBlockCount() +{ + return 0; +} + +gc_alloc_context * GCToEEInterface::GetAllocContext() +{ + return ThreadStore::GetCurrentThread()->GetAllocContext(); +} +#endif // !DACCESS_COMPILE + +uint8_t* GCToEEInterface::GetLoaderAllocatorObjectForGC(Object* pObject) +{ + return nullptr; +} + +bool GCToEEInterface::IsPreemptiveGCDisabled() +{ + return ThreadStore::GetCurrentThread()->IsCurrentThreadInCooperativeMode(); +} + +bool GCToEEInterface::EnablePreemptiveGC() +{ +#ifndef DACCESS_COMPILE + Thread* pThread = ThreadStore::GetCurrentThread(); + + if (pThread->IsCurrentThreadInCooperativeMode()) + { + pThread->EnablePreemptiveMode(); + return true; + } +#else + UNREFERENCED_PARAMETER(pThread); +#endif + return false; +} + +void GCToEEInterface::DisablePreemptiveGC() +{ +#ifndef DACCESS_COMPILE + ThreadStore::GetCurrentThread()->DisablePreemptiveMode(); +#else + UNREFERENCED_PARAMETER(pThread); +#endif +} + +Thread* GCToEEInterface::GetThread() +{ +#ifndef DACCESS_COMPILE + return ThreadStore::GetCurrentThread(); +#else + return NULL; +#endif +} + +#ifndef DACCESS_COMPILE + +#ifdef FEATURE_EVENT_TRACE +void ProfScanRootsHelper(Object** ppObject, ScanContext* pSC, uint32_t dwFlags) +{ + Object* pObj = *ppObject; + if (dwFlags& GC_CALL_INTERIOR) + { + pObj = GCHeapUtilities::GetGCHeap()->GetContainingObject(pObj, true); + if (pObj == nullptr) + return; + } + ScanRootsHelper(pObj, ppObject, pSC, dwFlags); +} + +void GcScanRootsForETW(promote_func* fn, int condemned, int max_gen, ScanContext* sc) +{ + UNREFERENCED_PARAMETER(condemned); + UNREFERENCED_PARAMETER(max_gen); + + FOREACH_THREAD(pThread) + { + if (pThread->IsGCSpecial()) + continue; + + if (GCHeapUtilities::GetGCHeap()->IsThreadUsingAllocationContextHeap(pThread->GetAllocContext(), sc->thread_number)) + continue; + + sc->thread_under_crawl = pThread; + sc->dwEtwRootKind = kEtwGCRootKindStack; + pThread->GcScanRoots(reinterpret_cast(fn), sc); + sc->dwEtwRootKind = kEtwGCRootKindOther; + } + END_FOREACH_THREAD +} + +void ScanHandleForETW(Object** pRef, Object* pSec, uint32_t flags, ScanContext* context, bool isDependent) +{ + ProfilingScanContext* pSC = (ProfilingScanContext*)context; + + // Notify ETW of the handle + if (ETW::GCLog::ShouldWalkHeapRootsForEtw()) + { + ETW::GCLog::RootReference( + pRef, + *pRef, // object being rooted + pSec, // pSecondaryNodeForDependentHandle + isDependent, + pSC, + 0, // dwGCFlags, + flags); // ETW handle flags + } +} + +// This is called only if we've determined that either: +// a) The Profiling API wants to do a walk of the heap, and it has pinned the +// profiler in place (so it cannot be detached), and it's thus safe to call into the +// profiler, OR +// b) ETW infrastructure wants to do a walk of the heap either to log roots, +// objects, or both. +// This can also be called to do a single walk for BOTH a) and b) simultaneously. Since +// ETW can ask for roots, but not objects +void GCProfileWalkHeapWorker(BOOL fShouldWalkHeapRootsForEtw, BOOL fShouldWalkHeapObjectsForEtw) +{ + ProfilingScanContext SC(FALSE); + unsigned max_generation = GCHeapUtilities::GetGCHeap()->GetMaxGeneration(); + + // **** Scan roots: Only scan roots if profiling API wants them or ETW wants them. + if (fShouldWalkHeapRootsForEtw) + { + GcScanRootsForETW(&ProfScanRootsHelper, max_generation, max_generation, &SC); + SC.dwEtwRootKind = kEtwGCRootKindFinalizer; + GCHeapUtilities::GetGCHeap()->DiagScanFinalizeQueue(&ProfScanRootsHelper, &SC); + + // Handles are kept independent of wks/svr/concurrent builds + SC.dwEtwRootKind = kEtwGCRootKindHandle; + GCHeapUtilities::GetGCHeap()->DiagScanHandles(&ScanHandleForETW, max_generation, &SC); + } + + // **** Scan dependent handles: only if ETW wants roots + if (fShouldWalkHeapRootsForEtw) + { + // GcScanDependentHandlesForProfiler double-checks + // CORProfilerTrackConditionalWeakTableElements() before calling into the profiler + + ProfilingScanContext* pSC = &SC; + + // we'll re-use pHeapId (which was either unused (0) or freed by EndRootReferences2 + // (-1)), so reset it to NULL + _ASSERTE((*((size_t *)(&pSC->pHeapId)) == (size_t)(-1)) || + (*((size_t *)(&pSC->pHeapId)) == (size_t)(0))); + pSC->pHeapId = NULL; + + GCHeapUtilities::GetGCHeap()->DiagScanDependentHandles(&ScanHandleForETW, max_generation, &SC); + } + + ProfilerWalkHeapContext profilerWalkHeapContext(FALSE, SC.pvEtwContext); + + // **** Walk objects on heap: only if ETW wants them. + if (fShouldWalkHeapObjectsForEtw) + { + GCHeapUtilities::GetGCHeap()->DiagWalkHeap(&HeapWalkHelper, &profilerWalkHeapContext, max_generation, true /* walk the large object heap */); + } + + #ifdef FEATURE_EVENT_TRACE + // **** Done! Indicate to ETW helpers that the heap walk is done, so any buffers + // should be flushed into the ETW stream + if (fShouldWalkHeapObjectsForEtw || fShouldWalkHeapRootsForEtw) + { + ETW::GCLog::EndHeapDump(&profilerWalkHeapContext); + } +#endif // FEATURE_EVENT_TRACE +} +#endif // defined(FEATURE_EVENT_TRACE) + +void GCProfileWalkHeap() +{ + +#ifdef FEATURE_EVENT_TRACE + if (ETW::GCLog::ShouldWalkStaticsAndCOMForEtw()) + ETW::GCLog::WalkStaticsAndCOMForETW(); + + BOOL fShouldWalkHeapRootsForEtw = ETW::GCLog::ShouldWalkHeapRootsForEtw(); + BOOL fShouldWalkHeapObjectsForEtw = ETW::GCLog::ShouldWalkHeapObjectsForEtw(); +#else // !FEATURE_EVENT_TRACE + BOOL fShouldWalkHeapRootsForEtw = FALSE; + BOOL fShouldWalkHeapObjectsForEtw = FALSE; +#endif // FEATURE_EVENT_TRACE + +#ifdef FEATURE_EVENT_TRACE + // we need to walk the heap if one of GC_PROFILING or FEATURE_EVENT_TRACE + // is defined, since both of them make use of the walk heap worker. + if (fShouldWalkHeapRootsForEtw || fShouldWalkHeapObjectsForEtw) + { + GCProfileWalkHeapWorker(fShouldWalkHeapRootsForEtw, fShouldWalkHeapObjectsForEtw); + } +#endif // defined(FEATURE_EVENT_TRACE) +} + + +void GCToEEInterface::DiagGCStart(int gen, bool isInduced) +{ + UNREFERENCED_PARAMETER(gen); + UNREFERENCED_PARAMETER(isInduced); +} + +void GCToEEInterface::DiagUpdateGenerationBounds() +{ +} + +void GCToEEInterface::DiagWalkFReachableObjects(void* gcContext) +{ + UNREFERENCED_PARAMETER(gcContext); +} + +void GCToEEInterface::DiagGCEnd(size_t index, int gen, int reason, bool fConcurrent) +{ + UNREFERENCED_PARAMETER(index); + UNREFERENCED_PARAMETER(gen); + UNREFERENCED_PARAMETER(reason); + + if (!fConcurrent) + { + GCProfileWalkHeap(); + } +} + +// Note on last parameter: when calling this for bgc, only ETW +// should be sending these events so that existing profapi profilers +// don't get confused. +void WalkMovedReferences(uint8_t* begin, uint8_t* end, + ptrdiff_t reloc, + void* context, + bool fCompacting, + bool fBGC) +{ + UNREFERENCED_PARAMETER(begin); + UNREFERENCED_PARAMETER(end); + UNREFERENCED_PARAMETER(reloc); + UNREFERENCED_PARAMETER(context); + UNREFERENCED_PARAMETER(fCompacting); + UNREFERENCED_PARAMETER(fBGC); +} + +// +// Diagnostics code +// + +#ifdef FEATURE_EVENT_TRACE +// Tracks all surviving objects (moved or otherwise). +inline bool ShouldTrackSurvivorsForProfilerOrEtw() +{ + if (ETW::GCLog::ShouldTrackMovementForEtw()) + return true; + + return false; +} +#endif // FEATURE_EVENT_TRACE + +void GCToEEInterface::DiagWalkSurvivors(void* gcContext, bool fCompacting) +{ +#ifdef FEATURE_EVENT_TRACE + if (ShouldTrackSurvivorsForProfilerOrEtw()) + { + size_t context = 0; + ETW::GCLog::BeginMovedReferences(&context); + GCHeapUtilities::GetGCHeap()->DiagWalkSurvivorsWithType(gcContext, &WalkMovedReferences, (void*)context, walk_for_gc); + ETW::GCLog::EndMovedReferences(context); + } +#else + UNREFERENCED_PARAMETER(gcContext); +#endif // FEATURE_EVENT_TRACE +} + +void GCToEEInterface::DiagWalkUOHSurvivors(void* gcContext, int gen) +{ +#ifdef FEATURE_EVENT_TRACE + if (ShouldTrackSurvivorsForProfilerOrEtw()) + { + size_t context = 0; + ETW::GCLog::BeginMovedReferences(&context); + GCHeapUtilities::GetGCHeap()->DiagWalkSurvivorsWithType(gcContext, &WalkMovedReferences, (void*)context, walk_for_uoh, gen); + ETW::GCLog::EndMovedReferences(context); + } +#else + UNREFERENCED_PARAMETER(gcContext); +#endif // FEATURE_EVENT_TRACE +} + +void GCToEEInterface::DiagWalkBGCSurvivors(void* gcContext) +{ +#ifdef FEATURE_EVENT_TRACE + if (ShouldTrackSurvivorsForProfilerOrEtw()) + { + size_t context = 0; + ETW::GCLog::BeginMovedReferences(&context); + GCHeapUtilities::GetGCHeap()->DiagWalkSurvivorsWithType(gcContext, &WalkMovedReferences, (void*)context, walk_for_bgc); + ETW::GCLog::EndMovedReferences(context); + } +#else + UNREFERENCED_PARAMETER(gcContext); +#endif // FEATURE_EVENT_TRACE +} + +void GCToEEInterface::StompWriteBarrier(WriteBarrierParameters* args) +{ + // CoreRT doesn't patch the write barrier like CoreCLR does, but it + // still needs to record the changes in the GC heap. + + bool is_runtime_suspended = args->is_runtime_suspended; + + switch (args->operation) + { + case WriteBarrierOp::StompResize: + // StompResize requires a new card table, a new lowest address, and + // a new highest address + assert(args->card_table != nullptr); + assert(args->lowest_address != nullptr); + assert(args->highest_address != nullptr); + + // We are sensitive to the order of writes here(more comments on this further in the method) + // In particular g_card_table must be written before writing the heap bounds. + // For platforms with weak memory ordering we will issue fences, for x64/x86 we are ok + // as long as compiler does not reorder these writes. + // That is unlikely since we have method calls in between. + // Just to be robust agains possible refactoring/inlining we will do a compiler-fenced store here. + VolatileStore(&g_card_table, args->card_table); + +#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES + assert(args->card_bundle_table != nullptr); + g_card_bundle_table = args->card_bundle_table; +#endif + + // IMPORTANT: managed heap segments may surround unmanaged/stack segments. In such cases adding another managed + // heap segment may put a stack/unmanaged write inside the new heap range. However the old card table would + // not cover it. Therefore we must ensure that the write barriers see the new table before seeing the new bounds. + // + // On architectures with strong ordering, we only need to prevent compiler reordering. + // Otherwise we put a process-wide fence here (so that we could use an ordinary read in the barrier) + +#if defined(HOST_ARM64) || defined(HOST_ARM) + if (!is_runtime_suspended) + { + // If runtime is not suspended, force all threads to see the changed table before seeing updated heap boundaries. + // See: http://vstfdevdiv:8080/DevDiv2/DevDiv/_workitems/edit/346765 + FlushProcessWriteBuffers(); + } +#endif + + g_lowest_address = args->lowest_address; + g_highest_address = args->highest_address; + +#if defined(HOST_ARM64) || defined(HOST_ARM) + if (!is_runtime_suspended) + { + // If runtime is not suspended, force all threads to see the changed state before observing future allocations. + FlushProcessWriteBuffers(); + } +#endif + return; + case WriteBarrierOp::StompEphemeral: + // StompEphemeral requires a new ephemeral low and a new ephemeral high + assert(args->ephemeral_low != nullptr); + assert(args->ephemeral_high != nullptr); + g_ephemeral_low = args->ephemeral_low; + g_ephemeral_high = args->ephemeral_high; + return; + case WriteBarrierOp::Initialize: + // This operation should only be invoked once, upon initialization. + assert(g_card_table == nullptr); + assert(g_lowest_address == nullptr); + assert(g_highest_address == nullptr); + assert(args->card_table != nullptr); + assert(args->lowest_address != nullptr); + assert(args->highest_address != nullptr); + assert(args->ephemeral_low != nullptr); + assert(args->ephemeral_high != nullptr); + assert(args->is_runtime_suspended && "the runtime must be suspended here!"); + + g_card_table = args->card_table; + +#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES + assert(g_card_bundle_table == nullptr); + g_card_bundle_table = args->card_bundle_table; +#endif + + g_lowest_address = args->lowest_address; + g_highest_address = args->highest_address; + g_ephemeral_low = args->ephemeral_low; + g_ephemeral_high = args->ephemeral_high; + return; + case WriteBarrierOp::SwitchToWriteWatch: + case WriteBarrierOp::SwitchToNonWriteWatch: + assert(!"CoreRT does not have an implementation of non-OS WriteWatch"); + return; + default: + assert(!"Unknokwn WriteBarrierOp enum"); + return; + } +} + +void GCToEEInterface::EnableFinalization(bool foundFinalizers) +{ + if (foundFinalizers) + RhEnableFinalization(); +} + +void GCToEEInterface::HandleFatalError(unsigned int exitCode) +{ + UNREFERENCED_PARAMETER(exitCode); + EEPOLICY_HANDLE_FATAL_ERROR(exitCode); +} + +bool GCToEEInterface::EagerFinalized(Object* obj) +{ + UNREFERENCED_PARAMETER(obj); + return false; +} + +bool GCToEEInterface::IsGCThread() +{ + Thread* pCurrentThread = ThreadStore::RawGetCurrentThread(); + return pCurrentThread->IsGCSpecial() || pCurrentThread == ThreadStore::GetSuspendingThread(); +} + +bool GCToEEInterface::WasCurrentThreadCreatedByGC() +{ + return ThreadStore::RawGetCurrentThread()->IsGCSpecial(); +} + +struct ThreadStubArguments +{ + void (*m_pRealStartRoutine)(void*); + void* m_pRealContext; + bool m_isSuspendable; + CLREventStatic m_ThreadStartedEvent; +}; + +bool GCToEEInterface::CreateThread(void (*threadStart)(void*), void* arg, bool is_suspendable, const char* name) +{ + UNREFERENCED_PARAMETER(name); + + ThreadStubArguments threadStubArgs; + + threadStubArgs.m_pRealStartRoutine = threadStart; + threadStubArgs.m_pRealContext = arg; + threadStubArgs.m_isSuspendable = is_suspendable; + + if (!threadStubArgs.m_ThreadStartedEvent.CreateAutoEventNoThrow(false)) + { + return false; + } + + // Helper used to wrap the start routine of background GC threads so we can do things like initialize the + // Redhawk thread state which requires running in the new thread's context. + auto threadStub = [](void* argument) -> DWORD + { + ThreadStubArguments* pStartContext = (ThreadStubArguments*)argument; + + if (pStartContext->m_isSuspendable) + { + // Initialize the Thread for this thread. The false being passed indicates that the thread store lock + // should not be acquired as part of this operation. This is necessary because this thread is created in + // the context of a garbage collection and the lock is already held by the GC. + ASSERT(GCHeapUtilities::IsGCInProgress()); + + ThreadStore::AttachCurrentThread(false); + } + + ThreadStore::RawGetCurrentThread()->SetGCSpecial(true); + + auto realStartRoutine = pStartContext->m_pRealStartRoutine; + void* realContext = pStartContext->m_pRealContext; + + pStartContext->m_ThreadStartedEvent.Set(); + + STRESS_LOG_RESERVE_MEM(GC_STRESSLOG_MULTIPLY); + + realStartRoutine(realContext); + + return 0; + }; + + if (!PalStartBackgroundGCThread(threadStub, &threadStubArgs)) + { + threadStubArgs.m_ThreadStartedEvent.CloseEvent(); + return false; + } + + uint32_t res = threadStubArgs.m_ThreadStartedEvent.Wait(INFINITE, FALSE); + threadStubArgs.m_ThreadStartedEvent.CloseEvent(); + ASSERT(res == WAIT_OBJECT_0); + + return true; +} + +// CoreRT does not use async pinned handles +void GCToEEInterface::WalkAsyncPinnedForPromotion(Object* object, ScanContext* sc, promote_func* callback) +{ + UNREFERENCED_PARAMETER(object); + UNREFERENCED_PARAMETER(sc); + UNREFERENCED_PARAMETER(callback); +} + +void GCToEEInterface::WalkAsyncPinned(Object* object, void* context, void (*callback)(Object*, Object*, void*)) +{ + UNREFERENCED_PARAMETER(object); + UNREFERENCED_PARAMETER(context); + UNREFERENCED_PARAMETER(callback); +} + +IGCToCLREventSink* GCToEEInterface::EventSink() +{ + return &g_gcToClrEventSink; +} + +uint32_t GCToEEInterface::GetTotalNumSizedRefHandles() +{ + return -1; +} + +bool GCToEEInterface::AnalyzeSurvivorsRequested(int condemnedGeneration) +{ + return false; +} + +void GCToEEInterface::AnalyzeSurvivorsFinished(int condemnedGeneration) +{ +} + +void GCToEEInterface::VerifySyncTableEntry() +{ +} + +void GCToEEInterface::UpdateGCEventStatus(int currentPublicLevel, int currentPublicKeywords, int currentPrivateLevel, int currentPrivateKeywords) +{ + UNREFERENCED_PARAMETER(currentPublicLevel); + UNREFERENCED_PARAMETER(currentPublicKeywords); + UNREFERENCED_PARAMETER(currentPrivateLevel); + UNREFERENCED_PARAMETER(currentPrivateKeywords); + // TODO: Linux LTTng +} + +MethodTable* GCToEEInterface::GetFreeObjectMethodTable() +{ + assert(g_pFreeObjectEEType != nullptr); + return (MethodTable*)g_pFreeObjectEEType; +} + +bool GCToEEInterface::GetBooleanConfigValue(const char* privateKey, const char* publicKey, bool* value) +{ + // these configuration values are given to us via startup flags. + if (strcmp(privateKey, "gcServer") == 0) + { + *value = g_heap_type == GC_HEAP_SVR; + return true; + } + + if (strcmp(privateKey, "gcConcurrent") == 0) + { + *value = !g_pRhConfig->GetDisableBGC(); + return true; + } + + if (strcmp(privateKey, "gcConservative") == 0) + { + *value = g_pConfig->GetGCConservative(); + return true; + } + + return false; +} + +bool GCToEEInterface::GetIntConfigValue(const char* privateKey, const char* publicKey, int64_t* value) +{ + if (strcmp(privateKey, "HeapVerify") == 0) + { + *value = g_pRhConfig->GetHeapVerify(); + return true; + } + + if (strcmp(privateKey, "GCgen0size") == 0) + { +#if defined(USE_PORTABLE_HELPERS) && !defined(HOST_WASM) + // CORERT-TODO: remove this + // https://github.com/dotnet/corert/issues/2033 + *value = 100 * 1024 * 1024; +#else + *value = 0; +#endif + return true; + } + + return false; +} + +bool GCToEEInterface::GetStringConfigValue(const char* privateKey, const char* publicKey, const char** value) +{ + UNREFERENCED_PARAMETER(privateKey); + UNREFERENCED_PARAMETER(publicKey); + UNREFERENCED_PARAMETER(value); + return false; +} + +void GCToEEInterface::FreeStringConfigValue(const char* value) +{ + delete[] value; +} + +#endif // !DACCESS_COMPILE + +// NOTE: this method is not in thread.cpp because it needs access to the layout of alloc_context for DAC to know the +// size, but thread.cpp doesn't generally need to include the GC environment headers for any other reason. +gc_alloc_context * Thread::GetAllocContext() +{ + return dac_cast(dac_cast(this) + offsetof(Thread, m_rgbAllocContextBuffer)); +} + +GPTR_IMPL(Thread, g_pFinalizerThread); +GPTR_IMPL(Thread, g_pGcThread); + +#ifndef DACCESS_COMPILE + +bool __SwitchToThread(uint32_t dwSleepMSec, uint32_t /*dwSwitchCount*/) +{ + if (dwSleepMSec > 0) + { + PalSleep(dwSleepMSec); + return true; + } + return !!PalSwitchToThread(); +} + +#endif // DACCESS_COMPILE + +void LogSpewAlways(const char * /*fmt*/, ...) +{ +} + +#if defined(FEATURE_EVENT_TRACE) && !defined(DACCESS_COMPILE) +ProfilingScanContext::ProfilingScanContext(BOOL fProfilerPinnedParam) + : ScanContext() +{ + pHeapId = NULL; + fProfilerPinned = fProfilerPinnedParam; + pvEtwContext = NULL; +#ifdef FEATURE_CONSERVATIVE_GC + // To not confuse GCScan::GcScanRoots + promotion = g_pConfig->GetGCConservative(); +#endif +} +#endif // defined(FEATURE_EVENT_TRACE) && !defined(DACCESS_COMPILE) diff --git a/src/coreclr/src/nativeaot/Runtime/gcrhinterface.h b/src/coreclr/src/nativeaot/Runtime/gcrhinterface.h new file mode 100644 index 0000000000000..15090c81f33e7 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/gcrhinterface.h @@ -0,0 +1,168 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// +// This header contains the definition of an interface between the GC/HandleTable portions of the Redhawk +// codebase and the regular Redhawk code. The former has all sorts of legacy environmental requirements (see +// gcrhenv.h) that we don't wish to pull into the rest of Redhawk. +// +// Since this file is included in both worlds it has no dependencies and uses a very simple subset of types +// etc. so that it will build cleanly in both. The actual implementation of the class defined here is in +// gcrhenv.cpp, since the implementation needs access to the guts of the GC/HandleTable. +// +// This is just an initial stab at the interface. +// + +#ifndef __GCRHINTERFACE_INCLUDED +#define __GCRHINTERFACE_INCLUDED + +#ifndef DACCESS_COMPILE +// Global data cells exported by the GC. +extern "C" unsigned char *g_ephemeral_low; +extern "C" unsigned char *g_ephemeral_high; +extern "C" unsigned char *g_lowest_address; +extern "C" unsigned char *g_highest_address; +#endif + +struct gc_alloc_context; +class MethodInfo; +struct REGDISPLAY; +class Thread; +enum GCRefKind : unsigned char; +class ICodeManager; +class EEType; + +// ----------------------------------------------------------------------------------------------------------- +// RtuObjectRef +// ----------------------------------------------------------------------------------------------------------- +// +// READ THIS! +// +// This struct exists for type description purposes, but you must never directly refer to the object +// reference. The only code allowed to do this is the code inherited directly from the CLR, which all +// includes gcrhenv.h. If your code is outside the namespace of gcrhenv.h, direct object reference +// manipulation is prohibited--use C# instead. +// +// To enforce this, we declare RtuObjectRef as a class with no public members. +// +class RtuObjectRef +{ +#ifndef DACCESS_COMPILE +private: +#else +public: +#endif + TADDR pvObject; +}; + +typedef DPTR(RtuObjectRef) PTR_RtuObjectRef; + +// ----------------------------------------------------------------------------------------------------------- + +// We provide various ways to enumerate GC objects or roots, each of which calls back to a user supplied +// function for each object (within the context of a garbage collection). The following function types +// describe these callbacks. Unfortunately the signatures aren't very specific: we don't want to reference +// Object* or Object** from this module, see the comment for RtuObjectRef, but this very narrow category of +// callers can't use RtuObjectRef (they really do need to drill down into the Object). The lesser evil here is +// to be a bit loose in the signature rather than exposing the Object class to the rest of Redhawk. + +// Callback when enumerating objects on the GC heap or objects referenced from instance fields of another +// object. The GC dictates the shape of this signature (we're hijacking functionality originally developed for +// profiling). The real signature is: +// int ScanFunction(Object* pObject, void* pContext) +// where: +// return : treated as a boolean, zero indicates the enumeration should terminate, all other values +// say continue +// pObject : pointer to the current object being scanned +// pContext : user context passed to the original scan function and otherwise uninterpreted +typedef int (*GcScanObjectFunction)(void*, void*); + +// Callback when enumerating GC roots (stack locations, statics and handles). Similar to the callback above +// except there is no means to terminate the scan (no return value) and the root location (pointer to pointer +// to object) is returned instead of a direct pointer to the object: +// void ScanFunction(Object** pRoot, void* pContext) +typedef void (*GcScanRootFunction)(void**, void*); + +typedef void * GcSegmentHandle; + +#define RH_LARGE_OBJECT_SIZE 85000 + +// A 'clump' is defined as the size of memory covered by 1 byte in the card table. These constants are +// verified against gcpriv.h in gcrhee.cpp. +#if (POINTER_SIZE == 8) +#define CLUMP_SIZE 0x800 +#define LOG2_CLUMP_SIZE 11 +#elif (POINTER_SIZE == 4) +#define CLUMP_SIZE 0x400 +#define LOG2_CLUMP_SIZE 10 +#else +#error unexpected pointer size +#endif + +class RedhawkGCInterface +{ +public: + // Perform any runtime-startup initialization needed by the GC, HandleTable or environmental code in + // gcrhenv. Returns true on success or false if a subsystem failed to initialize. + static bool InitializeSubsystems(); + + static void InitAllocContext(gc_alloc_context * pAllocContext); + static void ReleaseAllocContext(gc_alloc_context * pAllocContext); + + static void WaitForGCCompletion(); + + static void EnumGcRef(PTR_RtuObjectRef pRef, GCRefKind kind, void * pfnEnumCallback, void * pvCallbackData); + + static void BulkEnumGcObjRef(PTR_RtuObjectRef pRefs, UInt32 cRefs, void * pfnEnumCallback, void * pvCallbackData); + + static void EnumGcRefs(ICodeManager * pCodeManager, + MethodInfo * pMethodInfo, + PTR_VOID safePointAddress, + REGDISPLAY * pRegisterSet, + void * pfnEnumCallback, + void * pvCallbackData); + + static void EnumGcRefsInRegionConservatively(PTR_RtuObjectRef pLowerBound, + PTR_RtuObjectRef pUpperBound, + void * pfnEnumCallback, + void * pvCallbackData); + + static GcSegmentHandle RegisterFrozenSegment(void * pSection, size_t SizeSection); + static void UnregisterFrozenSegment(GcSegmentHandle segment); + +#ifdef FEATURE_GC_STRESS + static void StressGc(); +#endif // FEATURE_GC_STRESS + + // Various routines used to enumerate objects contained within a given scope (on the GC heap, as reference + // fields of an object, on a thread stack, in a static or in one of the handle tables). + static void ScanObject(void *pObject, GcScanObjectFunction pfnScanCallback, void *pContext); + static void ScanStackRoots(Thread *pThread, GcScanRootFunction pfnScanCallback, void *pContext); + static void ScanStaticRoots(GcScanRootFunction pfnScanCallback, void *pContext); + static void ScanHandleTableRoots(GcScanRootFunction pfnScanCallback, void *pContext); + + // Returns size GCDesc. Used by type cloning. + static UInt32 GetGCDescSize(void * pType); + + // These methods are used to get and set the type information for the last allocation on each thread. + static EEType * GetLastAllocEEType(); + static void SetLastAllocEEType(EEType *pEEType); + + static uint64_t GetDeadThreadsNonAllocBytes(); + + // Used by debugger hook + static void* CreateTypedHandle(void* object, int type); + static void DestroyTypedHandle(void* handle); + +private: + // The EEType for the last allocation. This value is used inside of the GC allocator + // to emit allocation ETW events with type information. We set this value unconditionally to avoid + // race conditions where ETW is enabled after the value is set. + DECLSPEC_THREAD static EEType * tls_pLastAllocationEEType; + + // Tracks the amount of bytes that were reserved for threads in their gc_alloc_context and went unused when they died. + // Used for GC.GetTotalAllocatedBytes + static uint64_t s_DeadThreadsNonAllocBytes; +}; + +#endif // __GCRHINTERFACE_INCLUDED diff --git a/src/coreclr/src/nativeaot/Runtime/gcrhscan.cpp b/src/coreclr/src/nativeaot/Runtime/gcrhscan.cpp new file mode 100644 index 0000000000000..8496e20ad09a9 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/gcrhscan.cpp @@ -0,0 +1,176 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +#include "common.h" + +#include "gcenv.h" +#include "gcheaputilities.h" +#include "objecthandle.h" + +#include "gcenv.ee.h" + +#include "PalRedhawkCommon.h" + +#include "gcrhinterface.h" + +#include "slist.h" +#include "varint.h" +#include "regdisplay.h" +#include "StackFrameIterator.h" + +#include "thread.h" + +#include "shash.h" +#include "RWLock.h" +#include "RuntimeInstance.h" +#include "threadstore.h" +#include "threadstore.inl" +#include "thread.inl" + +#include "DebuggerHook.h" + +#ifndef DACCESS_COMPILE + +void GcEnumObjectsConservatively(PTR_PTR_Object ppLowerBound, PTR_PTR_Object ppUpperBound, EnumGcRefCallbackFunc * fnGcEnumRef, EnumGcRefScanContext * pSc); + +void EnumAllStaticGCRefs(EnumGcRefCallbackFunc * fn, EnumGcRefScanContext * sc) +{ + GetRuntimeInstance()->EnumAllStaticGCRefs(reinterpret_cast(fn), sc); +} + +/* + * Scan all stack and statics roots + */ + +void GCToEEInterface::GcScanRoots(EnumGcRefCallbackFunc * fn, int condemned, int max_gen, EnumGcRefScanContext * sc) +{ + DebuggerProtectedBufferListNode* cursor = DebuggerHook::s_debuggerProtectedBuffers; + while (cursor != nullptr) + { + GcEnumObjectsConservatively((PTR_PTR_Object)cursor->address, (PTR_PTR_Object)(cursor->address + cursor->size), fn, sc); + cursor = cursor->next; + } + + // STRESS_LOG1(LF_GCROOTS, LL_INFO10, "GCScan: Phase = %s\n", sc->promotion ? "promote" : "relocate"); + + FOREACH_THREAD(pThread) + { + // Skip "GC Special" threads which are really background workers that will never have any roots. + if (pThread->IsGCSpecial()) + continue; + +#if !defined (ISOLATED_HEAPS) + // @TODO: it is very bizarre that this IsThreadUsingAllocationContextHeap takes a copy of the + // allocation context instead of a reference or a pointer to it. This seems very wasteful given how + // large the alloc_context is. + if (!GCHeapUtilities::GetGCHeap()->IsThreadUsingAllocationContextHeap(pThread->GetAllocContext(), + sc->thread_number)) + { + // STRESS_LOG2(LF_GC|LF_GCROOTS, LL_INFO100, "{ Scan of Thread %p (ID = %x) declined by this heap\n", + // pThread, pThread->GetThreadId()); + } + else +#endif + { + STRESS_LOG1(LF_GC|LF_GCROOTS, LL_INFO100, "{ Starting scan of Thread %p\n", pThread); + sc->thread_under_crawl = pThread; +#if defined(FEATURE_EVENT_TRACE) && !defined(DACCESS_COMPILE) + sc->dwEtwRootKind = kEtwGCRootKindStack; +#endif + pThread->GcScanRoots(reinterpret_cast(fn), sc); + +#if defined(FEATURE_EVENT_TRACE) && !defined(DACCESS_COMPILE) + sc->dwEtwRootKind = kEtwGCRootKindOther; +#endif + STRESS_LOG1(LF_GC|LF_GCROOTS, LL_INFO100, "Ending scan of Thread %p }\n", pThread); + } + } + END_FOREACH_THREAD + + sc->thread_under_crawl = NULL; + + if ((!GCHeapUtilities::IsServerHeap() || sc->thread_number == 0) ||(condemned == max_gen && sc->promotion)) + { +#if defined(FEATURE_EVENT_TRACE) && !defined(DACCESS_COMPILE) + sc->dwEtwRootKind = kEtwGCRootKindHandle; +#endif + EnumAllStaticGCRefs(fn, sc); + } +} + +void GCToEEInterface::GcEnumAllocContexts (enum_alloc_context_func* fn, void* param) +{ + FOREACH_THREAD(thread) + { + (*fn) (thread->GetAllocContext(), param); + } + END_FOREACH_THREAD +} + +#endif //!DACCESS_COMPILE + +void PromoteCarefully(PTR_PTR_Object obj, UInt32 flags, EnumGcRefCallbackFunc * fnGcEnumRef, EnumGcRefScanContext * pSc) +{ + // + // Sanity check that the flags contain only these three values + // + assert((flags & ~(GC_CALL_INTERIOR|GC_CALL_PINNED|GC_CALL_CHECK_APP_DOMAIN)) == 0); + + // + // Sanity check that GC_CALL_INTERIOR FLAG is set + // + assert(flags & GC_CALL_INTERIOR); + + // If the object reference points into the stack, we + // must not promote it, the GC cannot handle these. + if (pSc->thread_under_crawl->IsWithinStackBounds(*obj)) + return; + + fnGcEnumRef(obj, pSc, flags); +} + +void GcEnumObject(PTR_PTR_Object ppObj, UInt32 flags, EnumGcRefCallbackFunc * fnGcEnumRef, EnumGcRefScanContext * pSc) +{ + // + // Sanity check that the flags contain only these three values + // + assert((flags & ~(GC_CALL_INTERIOR|GC_CALL_PINNED|GC_CALL_CHECK_APP_DOMAIN)) == 0); + + // for interior pointers, we optimize the case in which + // it points into the current threads stack area + // + if (flags & GC_CALL_INTERIOR) + PromoteCarefully (ppObj, flags, fnGcEnumRef, pSc); + else + fnGcEnumRef(ppObj, pSc, flags); +} + +void GcBulkEnumObjects(PTR_PTR_Object pObjs, UInt32 cObjs, EnumGcRefCallbackFunc * fnGcEnumRef, EnumGcRefScanContext * pSc) +{ + PTR_PTR_Object ppObj = pObjs; + + for (UInt32 i = 0; i < cObjs; i++) + fnGcEnumRef(ppObj++, pSc, 0); +} + +// Scan a contiguous range of memory and report everything that looks like it could be a GC reference as a +// pinned interior reference. Pinned in case we are wrong (so the GC won't try to move the object and thus +// corrupt the original memory value by relocating it). Interior since we (a) can't easily tell whether a +// real reference is interior or not and interior is the more conservative choice that will work for both and +// (b) because it might not be a real GC reference at all and in that case falsely listing the reference as +// non-interior will cause the GC to make assumptions and crash quite quickly. +void GcEnumObjectsConservatively(PTR_PTR_Object ppLowerBound, PTR_PTR_Object ppUpperBound, EnumGcRefCallbackFunc * fnGcEnumRef, EnumGcRefScanContext * pSc) +{ + // Only report potential references in the promotion phase. Since we report everything as pinned there + // should be no work to do in the relocation phase. + if (pSc->promotion) + { + for (PTR_PTR_Object ppObj = ppLowerBound; ppObj < ppUpperBound; ppObj++) + { + // Only report values that lie in the GC heap range. This doesn't conclusively guarantee that the + // value is a GC heap reference but it's a cheap check that weeds out a lot of spurious values. + PTR_Object pObj = *ppObj; + if (((PTR_UInt8)pObj >= g_lowest_address) && ((PTR_UInt8)pObj <= g_highest_address)) + fnGcEnumRef(ppObj, pSc, GC_CALL_INTERIOR|GC_CALL_PINNED); + } + } +} diff --git a/src/coreclr/src/nativeaot/Runtime/gctoclreventsink.cpp b/src/coreclr/src/nativeaot/Runtime/gctoclreventsink.cpp new file mode 100644 index 0000000000000..8838b3461007b --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/gctoclreventsink.cpp @@ -0,0 +1,312 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include "common.h" +#include "gctoclreventsink.h" + +GCToCLREventSink g_gcToClrEventSink; + +void GCToCLREventSink::FireDynamicEvent(const char* eventName, void* payload, uint32_t payloadSize) +{ + LIMITED_METHOD_CONTRACT; + +#ifndef FEATURE_REDHAWK + const size_t EventNameMaxSize = 255; + + WCHAR wideEventName[EventNameMaxSize]; + if (MultiByteToWideChar(CP_ACP, 0, eventName, -1, wideEventName, EventNameMaxSize) == 0) + { + return; + } + + FireEtwGCDynamicEvent(wideEventName, payloadSize, (const BYTE*)payload, GetClrInstanceId()); +#endif // !FEATURE_REDHAWK +} + +void GCToCLREventSink::FireGCStart_V2(uint32_t count, uint32_t depth, uint32_t reason, uint32_t type) +{ + LIMITED_METHOD_CONTRACT; + +#ifdef FEATURE_ETW + ETW::GCLog::ETW_GC_INFO gcStartInfo; + gcStartInfo.GCStart.Count = count; + gcStartInfo.GCStart.Depth = depth; + gcStartInfo.GCStart.Reason = static_cast(reason); + gcStartInfo.GCStart.Type = static_cast(type); + ETW::GCLog::FireGcStart(&gcStartInfo); +#endif // FEATURE_ETW +} + +void GCToCLREventSink::FireGCGenerationRange(uint8_t generation, void* rangeStart, uint64_t rangeUsedLength, uint64_t rangeReservedLength) +{ + LIMITED_METHOD_CONTRACT; + + FireEtwGCGenerationRange(generation, rangeStart, rangeUsedLength, rangeReservedLength, GetClrInstanceId()); +} + +void GCToCLREventSink::FireGCEnd_V1(uint32_t count, uint32_t depth) +{ + LIMITED_METHOD_CONTRACT; + + FireEtwGCEnd_V1(count, depth, GetClrInstanceId()); +} + +void GCToCLREventSink::FireGCHeapStats_V2( + uint64_t generationSize0, + uint64_t totalPromotedSize0, + uint64_t generationSize1, + uint64_t totalPromotedSize1, + uint64_t generationSize2, + uint64_t totalPromotedSize2, + uint64_t generationSize3, + uint64_t totalPromotedSize3, + uint64_t generationSize4, + uint64_t totalPromotedSize4, + uint64_t finalizationPromotedSize, + uint64_t finalizationPromotedCount, + uint32_t pinnedObjectCount, + uint32_t sinkBlockCount, + uint32_t gcHandleCount) +{ + LIMITED_METHOD_CONTRACT; + + // TODO: FireEtwGCHeapStats_V2 + FireEtwGCHeapStats_V1(generationSize0, totalPromotedSize0, generationSize1, totalPromotedSize1, + generationSize2, totalPromotedSize2, generationSize3, totalPromotedSize3, + finalizationPromotedSize, finalizationPromotedCount, pinnedObjectCount, + sinkBlockCount, gcHandleCount, GetClrInstanceId()); +} + +void GCToCLREventSink::FireGCCreateSegment_V1(void* address, size_t size, uint32_t type) +{ + LIMITED_METHOD_CONTRACT; + + FireEtwGCCreateSegment_V1((uint64_t)address, static_cast(size), type, GetClrInstanceId()); +} + +void GCToCLREventSink::FireGCFreeSegment_V1(void* address) +{ + LIMITED_METHOD_CONTRACT; + + FireEtwGCFreeSegment_V1((uint64_t)address, GetClrInstanceId()); +} + +void GCToCLREventSink::FireGCCreateConcurrentThread_V1() +{ + LIMITED_METHOD_CONTRACT; + + FireEtwGCCreateConcurrentThread_V1(GetClrInstanceId()); +} + +void GCToCLREventSink::FireGCTerminateConcurrentThread_V1() +{ + LIMITED_METHOD_CONTRACT; + + FireEtwGCTerminateConcurrentThread_V1(GetClrInstanceId()); +} + +void GCToCLREventSink::FireGCTriggered(uint32_t reason) +{ + LIMITED_METHOD_CONTRACT; + + FireEtwGCTriggered(reason, GetClrInstanceId()); +} + +void GCToCLREventSink::FireGCMarkWithType(uint32_t heapNum, uint32_t type, uint64_t bytes) +{ + LIMITED_METHOD_CONTRACT; + + FireEtwGCMarkWithType(heapNum, GetClrInstanceId(), type, bytes); +} + +void GCToCLREventSink::FireGCJoin_V2(uint32_t heap, uint32_t joinTime, uint32_t joinType, uint32_t joinId) +{ + LIMITED_METHOD_CONTRACT; + + FireEtwGCJoin_V2(heap, joinTime, joinType, GetClrInstanceId(), joinId); +} + +void GCToCLREventSink::FireGCGlobalHeapHistory_V3(uint64_t finalYoungestDesired, + int32_t numHeaps, + uint32_t condemnedGeneration, + uint32_t gen0reductionCount, + uint32_t reason, + uint32_t globalMechanisms, + uint32_t pauseMode, + uint32_t memoryPressure, + uint32_t condemnReasons0, + uint32_t condemnReasons1) +{ + LIMITED_METHOD_CONTRACT; + + // TODO: FireEtwGCGlobalHeapHistory_V3 + FireEtwGCGlobalHeapHistory_V2(finalYoungestDesired, numHeaps, condemnedGeneration, gen0reductionCount, reason, + globalMechanisms, GetClrInstanceId(), pauseMode, memoryPressure); +} + +void GCToCLREventSink::FireGCAllocationTick_V1(uint32_t allocationAmount, uint32_t allocationKind) +{ + LIMITED_METHOD_CONTRACT; + + FireEtwGCAllocationTick_V1(allocationAmount, allocationKind, GetClrInstanceId()); +} + +void GCToCLREventSink::FireGCAllocationTick_V3(uint64_t allocationAmount, uint32_t allocationKind, uint32_t heapIndex, void* objectAddress) +{ + LIMITED_METHOD_CONTRACT; + + void * typeId = RedhawkGCInterface::GetLastAllocEEType(); + const WCHAR * name = nullptr; + + if (typeId != nullptr) + { + FireEtwGCAllocationTick_V3(static_cast(allocationAmount), + allocationKind, + GetClrInstanceId(), + allocationAmount, + typeId, + name, + heapIndex, + objectAddress); + } +} + +void GCToCLREventSink::FirePinObjectAtGCTime(void* object, uint8_t** ppObject) +{ + UNREFERENCED_PARAMETER(object); + UNREFERENCED_PARAMETER(ppObject); +} + +void GCToCLREventSink::FirePinPlugAtGCTime(uint8_t* plugStart, uint8_t* plugEnd, uint8_t* gapBeforeSize) +{ + FireEtwPinPlugAtGCTime(plugStart, plugEnd, gapBeforeSize, GetClrInstanceId()); +} + +void GCToCLREventSink::FireGCPerHeapHistory_V3(void *freeListAllocated, + void *freeListRejected, + void *endOfSegAllocated, + void *condemnedAllocated, + void *pinnedAllocated, + void *pinnedAllocatedAdvance, + uint32_t runningFreeListEfficiency, + uint32_t condemnReasons0, + uint32_t condemnReasons1, + uint32_t compactMechanisms, + uint32_t expandMechanisms, + uint32_t heapIndex, + void *extraGen0Commit, + uint32_t count, + uint32_t valuesLen, + void *values) +{ + FireEtwGCPerHeapHistory_V3(GetClrInstanceId(), + freeListAllocated, + freeListRejected, + endOfSegAllocated, + condemnedAllocated, + pinnedAllocated, + pinnedAllocatedAdvance, + runningFreeListEfficiency, + condemnReasons0, + condemnReasons1, + compactMechanisms, + expandMechanisms, + heapIndex, + extraGen0Commit, + count, + valuesLen, + values); +} + + + +void GCToCLREventSink::FireBGCBegin() +{ + FireEtwBGCBegin(GetClrInstanceId()); +} + +void GCToCLREventSink::FireBGC1stNonConEnd() +{ + FireEtwBGC1stNonConEnd(GetClrInstanceId()); +} + +void GCToCLREventSink::FireBGC1stConEnd() +{ + FireEtwBGC1stConEnd(GetClrInstanceId()); +} + +void GCToCLREventSink::FireBGC1stSweepEnd(uint32_t genNumber) +{ + //FireEtwBGC1stSweepEnd(genNumber, GetClrInstanceId()); TODO +} + +void GCToCLREventSink::FireBGC2ndNonConBegin() +{ + FireEtwBGC2ndNonConBegin(GetClrInstanceId()); +} + +void GCToCLREventSink::FireBGC2ndNonConEnd() +{ + FireEtwBGC2ndNonConEnd(GetClrInstanceId()); +} + +void GCToCLREventSink::FireBGC2ndConBegin() +{ + FireEtwBGC2ndConBegin(GetClrInstanceId()); +} + +void GCToCLREventSink::FireBGC2ndConEnd() +{ + FireEtwBGC2ndConEnd(GetClrInstanceId()); +} + +void GCToCLREventSink::FireBGCDrainMark(uint64_t objects) +{ + FireEtwBGCDrainMark(objects, GetClrInstanceId()); +} + +void GCToCLREventSink::FireBGCRevisit(uint64_t pages, uint64_t objects, uint32_t isLarge) +{ + FireEtwBGCRevisit(pages, objects, isLarge, GetClrInstanceId()); +} + +void GCToCLREventSink::FireBGCOverflow_V1(uint64_t min, uint64_t max, uint64_t objects, uint32_t isLarge, uint32_t genNumber) +{ + // TODO: FireBGCOverflow_V1 + FireEtwBGCOverflow(min, max, objects, isLarge, GetClrInstanceId()); +} + +void GCToCLREventSink::FireBGCAllocWaitBegin(uint32_t reason) +{ + FireEtwBGCAllocWaitBegin(reason, GetClrInstanceId()); +} + +void GCToCLREventSink::FireBGCAllocWaitEnd(uint32_t reason) +{ + FireEtwBGCAllocWaitEnd(reason, GetClrInstanceId()); +} + +void GCToCLREventSink::FireGCFullNotify_V1(uint32_t genNumber, uint32_t isAlloc) +{ + FireEtwGCFullNotify_V1(genNumber, isAlloc, GetClrInstanceId()); +} + +void GCToCLREventSink::FireSetGCHandle(void* handleID, void* objectID, uint32_t kind, uint32_t generation) +{ + FireEtwSetGCHandle(handleID, objectID, kind, generation, -1, GetClrInstanceId()); +} + +void GCToCLREventSink::FirePrvSetGCHandle(void* handleID, void* objectID, uint32_t kind, uint32_t generation) +{ + FireEtwPrvSetGCHandle(handleID, objectID, kind, generation, -1, GetClrInstanceId()); +} + +void GCToCLREventSink::FireDestroyGCHandle(void *handleID) +{ + FireEtwDestroyGCHandle(handleID, GetClrInstanceId()); +} + +void GCToCLREventSink::FirePrvDestroyGCHandle(void *handleID) +{ + FireEtwPrvDestroyGCHandle(handleID, GetClrInstanceId()); +} diff --git a/src/coreclr/src/nativeaot/Runtime/gctoclreventsink.h b/src/coreclr/src/nativeaot/Runtime/gctoclreventsink.h new file mode 100644 index 0000000000000..1611f71250dbf --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/gctoclreventsink.h @@ -0,0 +1,92 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#ifndef __GCTOCLREVENTSINK_H__ +#define __GCTOCLREVENTSINK_H__ + +#include "gcenv.h" +#include "gcinterface.h" + +class GCToCLREventSink : public IGCToCLREventSink +{ +public: + void FireDynamicEvent(const char* eventName, void* payload, uint32_t payloadSize); + void FireGCStart_V2(uint32_t count, uint32_t depth, uint32_t reason, uint32_t type); + void FireGCEnd_V1(uint32_t count, uint32_t depth); + void FireGCGenerationRange(uint8_t generation, void* rangeStart, uint64_t rangeUsedLength, uint64_t rangeReservedLength); + void FireGCHeapStats_V2(uint64_t generationSize0, + uint64_t totalPromotedSize0, + uint64_t generationSize1, + uint64_t totalPromotedSize1, + uint64_t generationSize2, + uint64_t totalPromotedSize2, + uint64_t generationSize3, + uint64_t totalPromotedSize3, + uint64_t generationSize4, + uint64_t totalPromotedSize4, + uint64_t finalizationPromotedSize, + uint64_t finalizationPromotedCount, + uint32_t pinnedObjectCount, + uint32_t sinkBlockCount, + uint32_t gcHandleCount); + void FireGCCreateSegment_V1(void* address, size_t size, uint32_t type); + void FireGCFreeSegment_V1(void* address); + void FireGCCreateConcurrentThread_V1(); + void FireGCTerminateConcurrentThread_V1(); + void FireGCTriggered(uint32_t reason); + void FireGCMarkWithType(uint32_t heapNum, uint32_t type, uint64_t bytes); + void FireGCJoin_V2(uint32_t heap, uint32_t joinTime, uint32_t joinType, uint32_t joinId); + void FireGCGlobalHeapHistory_V3(uint64_t finalYoungestDesired, + int32_t numHeaps, + uint32_t condemnedGeneration, + uint32_t gen0reductionCount, + uint32_t reason, + uint32_t globalMechanisms, + uint32_t pauseMode, + uint32_t memoryPressure, + uint32_t condemnReasons0, + uint32_t condemnReasons1); + void FireGCAllocationTick_V1(uint32_t allocationAmount, uint32_t allocationKind); + void FireGCAllocationTick_V3(uint64_t allocationAmount, uint32_t allocationKind, uint32_t heapIndex, void* objectAddress); + void FirePinObjectAtGCTime(void* object, uint8_t** ppObject); + void FirePinPlugAtGCTime(uint8_t* plug_start, uint8_t* plug_end, uint8_t* gapBeforeSize); + void FireGCPerHeapHistory_V3(void *freeListAllocated, + void *freeListRejected, + void *endOfSegAllocated, + void *condemnedAllocated, + void *pinnedAllocated, + void *pinnedAllocatedAdvance, + uint32_t runningFreeListEfficiency, + uint32_t condemnReasons0, + uint32_t condemnReasons1, + uint32_t compactMechanisms, + uint32_t expandMechanisms, + uint32_t heapIndex, + void *extraGen0Commit, + uint32_t count, + uint32_t valuesLen, + void *values); + void FireBGCBegin(); + void FireBGC1stNonConEnd(); + void FireBGC1stConEnd(); + void FireBGC1stSweepEnd(uint32_t genNumber); + void FireBGC2ndNonConBegin(); + void FireBGC2ndNonConEnd(); + void FireBGC2ndConBegin(); + void FireBGC2ndConEnd(); + void FireBGCDrainMark(uint64_t objects); + void FireBGCRevisit(uint64_t pages, uint64_t objects, uint32_t isLarge); + void FireBGCOverflow_V1(uint64_t min, uint64_t max, uint64_t objects, uint32_t isLarge, uint32_t genNumber); + void FireBGCAllocWaitBegin(uint32_t reason); + void FireBGCAllocWaitEnd(uint32_t reason); + void FireGCFullNotify_V1(uint32_t genNumber, uint32_t isAlloc); + void FireSetGCHandle(void *handleID, void *objectID, uint32_t kind, uint32_t generation); + void FirePrvSetGCHandle(void *handleID, void *objectID, uint32_t kind, uint32_t generation); + void FireDestroyGCHandle(void *handleID); + void FirePrvDestroyGCHandle(void *handleID); +}; + +extern GCToCLREventSink g_gcToClrEventSink; + +#endif // __GCTOCLREVENTSINK_H__ + diff --git a/src/coreclr/src/nativeaot/Runtime/holder.h b/src/coreclr/src/nativeaot/Runtime/holder.h new file mode 100644 index 0000000000000..a894ab32e1f0c --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/holder.h @@ -0,0 +1,183 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// ----------------------------------------------------------------------------------------------------------- +// Cut down versions of the Holder and Wrapper template classes used in the CLR. If this coding pattern is +// also common in the Redhawk code then it might be worth investigating pulling the whole holder.h header file +// over (a quick look indicates it might not drag in too many extra dependencies). +// + +// ----------------------------------------------------------------------------------------------------------- +// This version of holder does not have a default constructor. + +#if defined(_MSC_VER) && (_MSC_VER < 1900) +#define EQUALS_DEFAULT +#else +#define EQUALS_DEFAULT = default +#endif + +template +class HolderNoDefaultValue +{ +public: + HolderNoDefaultValue(TYPE value, bool fTake = true) : m_value(value), m_held(false) + { if (fTake) { ACQUIRE_FUNC(value); m_held = true; } } + + ~HolderNoDefaultValue() { if (m_held) RELEASE_FUNC(m_value); } + + TYPE GetValue() { return m_value; } + + void Acquire() { ACQUIRE_FUNC(m_value); m_held = true; } + void Release() { if (m_held) { RELEASE_FUNC(m_value); m_held = false; } } + void SuppressRelease() { m_held = false; } + TYPE Extract() { m_held = false; return GetValue(); } + + HolderNoDefaultValue(HolderNoDefaultValue && other) EQUALS_DEFAULT; + +protected: + TYPE m_value; + bool m_held; + +private: + // No one should be copying around holder types. + HolderNoDefaultValue & operator=(const HolderNoDefaultValue & other); + HolderNoDefaultValue(const HolderNoDefaultValue & other); +}; + +// ----------------------------------------------------------------------------------------------------------- +template +class Holder : public HolderNoDefaultValue +{ + typedef HolderNoDefaultValue MY_PARENT; +public: + Holder() : MY_PARENT(DEFAULTVALUE, false) {} + Holder(TYPE value, bool fTake = true) : MY_PARENT(value, fTake) {} + + Holder(Holder && other) EQUALS_DEFAULT; + +private: + // No one should be copying around holder types. + Holder & operator=(const Holder & other); + Holder(const Holder & other); +}; + +// ----------------------------------------------------------------------------------------------------------- +template +class Wrapper : public Holder +{ + typedef Holder MY_PARENT; + +public: + Wrapper() : MY_PARENT() {} + Wrapper(TYPE value, bool fTake = true) : MY_PARENT(value, fTake) {} + Wrapper(Wrapper && other) EQUALS_DEFAULT; + + FORCEINLINE TYPE& operator=(TYPE const & value) + { + MY_PARENT::Release(); + MY_PARENT::m_value = value; + MY_PARENT::Acquire(); + return MY_PARENT::m_value; + } + + FORCEINLINE const TYPE &operator->() { return MY_PARENT::m_value; } + FORCEINLINE const TYPE &operator*() { return MY_PARENT::m_value; } + FORCEINLINE operator TYPE() { return MY_PARENT::m_value; } + +private: + // No one should be copying around wrapper types. + Wrapper & operator=(const Wrapper & other); + Wrapper(const Wrapper & other); +}; + +// ----------------------------------------------------------------------------------------------------------- +template +FORCEINLINE void DoNothing(TYPE /*value*/) +{ +} + +// ----------------------------------------------------------------------------------------------------------- +template +FORCEINLINE void Delete(TYPE *value) +{ + delete value; +} + +// ----------------------------------------------------------------------------------------------------------- +template , + void (*RELEASE_FUNC)(PTR_TYPE) = Delete, + PTR_TYPE NULL_VAL = nullptr, + typename BASE = Wrapper > +class NewHolder : public BASE +{ +public: + NewHolder(PTR_TYPE p = NULL_VAL) : BASE(p) + { } + + PTR_TYPE& operator=(PTR_TYPE p) + { return BASE::operator=(p); } + + bool IsNull() + { return BASE::GetValue() == NULL_VAL; } +}; + +//----------------------------------------------------------------------------- +// NewArrayHolder : New []'ed pointer holder +// { +// NewArrayHolder foo = new (nothrow) Foo [30]; +// } // delete [] foo on out of scope +//----------------------------------------------------------------------------- + +template +FORCEINLINE void DeleteArray(TYPE *value) +{ + delete [] value; + value = NULL; +} + +template , + void (*RELEASE_FUNC)(PTR_TYPE) = DeleteArray, + PTR_TYPE NULL_VAL = nullptr, + typename BASE = Wrapper > +class NewArrayHolder : public BASE +{ +public: + NewArrayHolder(PTR_TYPE p = NULL_VAL) : BASE(p) + { } + + PTR_TYPE& operator=(PTR_TYPE p) + { return BASE::operator=(p); } + + bool IsNull() + { return BASE::GetValue() == NULL_VAL; } +}; + +// ----------------------------------------------------------------------------------------------------------- +template +FORCEINLINE void Destroy(TYPE * value) +{ + value->Destroy(); +} + +// ----------------------------------------------------------------------------------------------------------- +template , + void (*RELEASE_FUNC)(PTR_TYPE) = Destroy, + PTR_TYPE NULL_VAL = nullptr, + typename BASE = Wrapper > +class CreateHolder : public BASE +{ +public: + CreateHolder(PTR_TYPE p = NULL_VAL) : BASE(p) + { } + + PTR_TYPE& operator=(PTR_TYPE p) + { return BASE::operator=(p); } +}; + + diff --git a/src/coreclr/src/nativeaot/Runtime/i386/AllocFast.S b/src/coreclr/src/nativeaot/Runtime/i386/AllocFast.S new file mode 100644 index 0000000000000..876f2dfbcb80d --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/i386/AllocFast.S @@ -0,0 +1,4 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// TODO: Implement diff --git a/src/coreclr/src/nativeaot/Runtime/i386/AllocFast.asm b/src/coreclr/src/nativeaot/Runtime/i386/AllocFast.asm new file mode 100644 index 0000000000000..505661a8df519 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/i386/AllocFast.asm @@ -0,0 +1,445 @@ +;; Licensed to the .NET Foundation under one or more agreements. +;; The .NET Foundation licenses this file to you under the MIT license. + + .586 + .model flat + option casemap:none + .code + + +include AsmMacros.inc + +;; Allocate non-array, non-finalizable object. If the allocation doesn't fit into the current thread's +;; allocation context then automatically fallback to the slow allocation path. +;; ECX == EEType +FASTCALL_FUNC RhpNewFast, 4 + + ;; edx = GetThread(), TRASHES eax + INLINE_GETTHREAD edx, eax + + ;; + ;; ecx contains EEType pointer + ;; + mov eax, [ecx + OFFSETOF__EEType__m_uBaseSize] + + ;; + ;; eax: base size + ;; ecx: EEType pointer + ;; edx: Thread pointer + ;; + + add eax, [edx + OFFSETOF__Thread__m_alloc_context__alloc_ptr] + cmp eax, [edx + OFFSETOF__Thread__m_alloc_context__alloc_limit] + ja AllocFailed + + ;; set the new alloc pointer + mov [edx + OFFSETOF__Thread__m_alloc_context__alloc_ptr], eax + + ;; calc the new object pointer + sub eax, [ecx + OFFSETOF__EEType__m_uBaseSize] + + ;; set the new object's EEType pointer + mov [eax], ecx + ret + +AllocFailed: + + ;; + ;; SLOW PATH, call RhpGcAlloc(EEType *pEEType, UInt32 uFlags, UIntNative cbSize, void * pTransitionFrame) + ;; + ;; ecx: EEType pointer + ;; + push ebp + mov ebp, esp + + PUSH_COOP_PINVOKE_FRAME edx + + ;; Preserve EEType in ESI. + mov esi, ecx + + ;; Push alloc helper arguments + push edx ; transition frame + push dword ptr [ecx + OFFSETOF__EEType__m_uBaseSize] ; Size + xor edx, edx ; Flags + ;; Passing EEType in ecx + + ;; void* RhpGcAlloc(EEType *pEEType, UInt32 uFlags, UIntNative cbSize, void * pTransitionFrame) + call RhpGcAlloc + + ;; Set the new object's EEType pointer on success. + test eax, eax + jz NewFast_OOM + mov [eax + OFFSETOF__Object__m_pEEType], esi + + ;; If the object is bigger than RH_LARGE_OBJECT_SIZE, we must publish it to the BGC + mov edx, [esi + OFFSETOF__EEType__m_uBaseSize] + cmp edx, RH_LARGE_OBJECT_SIZE + jb NewFast_SkipPublish + mov ecx, eax ;; ecx: object + ;; edx: already contains object size + call RhpPublishObject ;; eax: this function returns the object that was passed-in +NewFast_SkipPublish: + + POP_COOP_PINVOKE_FRAME + + pop ebp + ret + +NewFast_OOM: + ;; This is the failure path. We're going to tail-call to a managed helper that will throw + ;; an out of memory exception that the caller of this allocator understands. + + mov eax, esi ; Preserve EEType pointer over POP_COOP_PINVOKE_FRAME + + POP_COOP_PINVOKE_FRAME + + ;; Cleanup our ebp frame + pop ebp + + mov ecx, eax ; EEType pointer + xor edx, edx ; Indicate that we should throw OOM. + jmp RhExceptionHandling_FailedAllocation + +FASTCALL_ENDFUNC + +;; Allocate non-array object with finalizer. +;; ECX == EEType +FASTCALL_FUNC RhpNewFinalizable, 4 + ;; Create EBP frame. + push ebp + mov ebp, esp + + PUSH_COOP_PINVOKE_FRAME edx + + ;; Preserve EEType in ESI + mov esi, ecx + + ;; Push alloc helper arguments + push edx ; transition frame + push dword ptr [ecx + OFFSETOF__EEType__m_uBaseSize] ; Size + mov edx, GC_ALLOC_FINALIZE ; Flags + ;; Passing EEType in ecx + + ;; void* RhpGcAlloc(EEType *pEEType, UInt32 uFlags, UIntNative cbSize, void * pTransitionFrame) + call RhpGcAlloc + + ;; Set the new object's EEType pointer on success. + test eax, eax + jz NewFinalizable_OOM + mov [eax + OFFSETOF__Object__m_pEEType], esi + + ;; If the object is bigger than RH_LARGE_OBJECT_SIZE, we must publish it to the BGC + mov edx, [esi + OFFSETOF__EEType__m_uBaseSize] + cmp edx, RH_LARGE_OBJECT_SIZE + jb NewFinalizable_SkipPublish + mov ecx, eax ;; ecx: object + ;; edx: already contains object size + call RhpPublishObject ;; eax: this function returns the object that was passed-in +NewFinalizable_SkipPublish: + + POP_COOP_PINVOKE_FRAME + + ;; Collapse EBP frame and return + pop ebp + ret + +NewFinalizable_OOM: + ;; This is the failure path. We're going to tail-call to a managed helper that will throw + ;; an out of memory exception that the caller of this allocator understands. + + mov eax, esi ; Preserve EEType pointer over POP_COOP_PINVOKE_FRAME + + POP_COOP_PINVOKE_FRAME + + ;; Cleanup our ebp frame + pop ebp + + mov ecx, eax ; EEType pointer + xor edx, edx ; Indicate that we should throw OOM. + jmp RhExceptionHandling_FailedAllocation + +FASTCALL_ENDFUNC + +;; Allocate a new string. +;; ECX == EEType +;; EDX == element count +FASTCALL_FUNC RhNewString, 8 + + push ecx + push edx + + ;; Make sure computing the aligned overall allocation size won't overflow + cmp edx, MAX_STRING_LENGTH + ja StringSizeOverflow + + ; Compute overall allocation size (align(base size + (element size * elements), 4)). + lea eax, [(edx * STRING_COMPONENT_SIZE) + (STRING_BASE_SIZE + 3)] + and eax, -4 + + ; ECX == EEType + ; EAX == allocation size + ; EDX == scratch + + INLINE_GETTHREAD edx, ecx ; edx = GetThread(), TRASHES ecx + + ; ECX == scratch + ; EAX == allocation size + ; EDX == thread + + mov ecx, eax + add eax, [edx + OFFSETOF__Thread__m_alloc_context__alloc_ptr] + jc StringAllocContextOverflow + cmp eax, [edx + OFFSETOF__Thread__m_alloc_context__alloc_limit] + ja StringAllocContextOverflow + + ; ECX == allocation size + ; EAX == new alloc ptr + ; EDX == thread + + ; set the new alloc pointer + mov [edx + OFFSETOF__Thread__m_alloc_context__alloc_ptr], eax + + ; calc the new object pointer + sub eax, ecx + + pop edx + pop ecx + + ; set the new object's EEType pointer and element count + mov [eax + OFFSETOF__Object__m_pEEType], ecx + mov [eax + OFFSETOF__String__m_Length], edx + ret + +StringAllocContextOverflow: + ; ECX == string size + ; original ECX pushed + ; original EDX pushed + + ; Re-push original ECX + push [esp + 4] + + ; Create EBP frame. + mov [esp + 8], ebp + lea ebp, [esp + 8] + + PUSH_COOP_PINVOKE_FRAME edx + + ; Preserve the string size in edi + mov edi, ecx + + ; Get the EEType and put it in ecx. + mov ecx, dword ptr [ebp - 8] + + ; Push alloc helper arguments (thread, size, flags, EEType). + push edx ; transition frame + push edi ; Size + xor edx, edx ; Flags + ;; Passing EEType in ecx + + ;; void* RhpGcAlloc(EEType *pEEType, UInt32 uFlags, UIntNative cbSize, void * pTransitionFrame) + call RhpGcAlloc + + ; Set the new object's EEType pointer and length on success. + test eax, eax + jz StringOutOfMemoryWithFrame + + mov ecx, [ebp - 8] + mov edx, [ebp - 4] + mov [eax + OFFSETOF__Object__m_pEEType], ecx + mov [eax + OFFSETOF__String__m_Length], edx + + ;; If the object is bigger than RH_LARGE_OBJECT_SIZE, we must publish it to the BGC + cmp edi, RH_LARGE_OBJECT_SIZE + jb NewString_SkipPublish + mov ecx, eax ;; ecx: object + mov edx, edi ;; edx: object size + call RhpPublishObject ;; eax: this function returns the object that was passed-in +NewString_SkipPublish: + + POP_COOP_PINVOKE_FRAME + add esp, 8 ; pop ecx / edx + pop ebp + ret + +StringOutOfMemoryWithFrame: + ; This is the OOM failure path. We're going to tail-call to a managed helper that will throw + ; an out of memory exception that the caller of this allocator understands. + + mov eax, [ebp - 8] ; Preserve EEType pointer over POP_COOP_PINVOKE_FRAME + + POP_COOP_PINVOKE_FRAME + add esp, 8 ; pop ecx / edx + pop ebp ; restore ebp + + mov ecx, eax ; EEType pointer + xor edx, edx ; Indicate that we should throw OOM. + jmp RhExceptionHandling_FailedAllocation + +StringSizeOverflow: + ;; We get here if the size of the final string object can't be represented as an unsigned + ;; 32-bit value. We're going to tail-call to a managed helper that will throw + ;; an OOM exception that the caller of this allocator understands. + + add esp, 8 ; pop ecx / edx + + ;; ecx holds EEType pointer already + xor edx, edx ; Indicate that we should throw OOM. + jmp RhExceptionHandling_FailedAllocation + +FASTCALL_ENDFUNC + + +;; Allocate one dimensional, zero based array (SZARRAY). +;; ECX == EEType +;; EDX == element count +FASTCALL_FUNC RhpNewArray, 8 + + push ecx + push edx + + ; Compute overall allocation size (align(base size + (element size * elements), 4)). + ; if the element count is <= 0x10000, no overflow is possible because the component size is + ; <= 0xffff, and thus the product is <= 0xffff0000, and the base size for the worst case + ; (32 dimensional MdArray) is less than 0xffff. + movzx eax, word ptr [ecx + OFFSETOF__EEType__m_usComponentSize] + cmp edx,010000h + ja ArraySizeBig + mul edx + add eax, [ecx + OFFSETOF__EEType__m_uBaseSize] + add eax, 3 +ArrayAlignSize: + and eax, -4 + + ; ECX == EEType + ; EAX == array size + ; EDX == scratch + + INLINE_GETTHREAD edx, ecx ; edx = GetThread(), TRASHES ecx + + ; ECX == scratch + ; EAX == array size + ; EDX == thread + + mov ecx, eax + add eax, [edx + OFFSETOF__Thread__m_alloc_context__alloc_ptr] + jc ArrayAllocContextOverflow + cmp eax, [edx + OFFSETOF__Thread__m_alloc_context__alloc_limit] + ja ArrayAllocContextOverflow + + ; ECX == array size + ; EAX == new alloc ptr + ; EDX == thread + + ; set the new alloc pointer + mov [edx + OFFSETOF__Thread__m_alloc_context__alloc_ptr], eax + + ; calc the new object pointer + sub eax, ecx + + pop edx + pop ecx + + ; set the new object's EEType pointer and element count + mov [eax + OFFSETOF__Object__m_pEEType], ecx + mov [eax + OFFSETOF__Array__m_Length], edx + ret + +ArraySizeBig: + ; Compute overall allocation size (align(base size + (element size * elements), 4)). + ; if the element count is negative, it's an overflow, otherwise it's out of memory + cmp edx, 0 + jl ArraySizeOverflow + mul edx + jc ArrayOutOfMemoryNoFrame + add eax, [ecx + OFFSETOF__EEType__m_uBaseSize] + jc ArrayOutOfMemoryNoFrame + add eax, 3 + jc ArrayOutOfMemoryNoFrame + jmp ArrayAlignSize + +ArrayAllocContextOverflow: + ; ECX == array size + ; original ECX pushed + ; original EDX pushed + + ; Re-push original ECX + push [esp + 4] + + ; Create EBP frame. + mov [esp + 8], ebp + lea ebp, [esp + 8] + + PUSH_COOP_PINVOKE_FRAME edx + + ; Preserve the array size in edi + mov edi, ecx + + ; Get the EEType and put it in ecx. + mov ecx, dword ptr [ebp - 8] + + ; Push alloc helper arguments (thread, size, flags, EEType). + push edx ; transition frame + push edi ; Size + xor edx, edx ; Flags + ;; Passing EEType in ecx + + ;; void* RhpGcAlloc(EEType *pEEType, UInt32 uFlags, UIntNative cbSize, void * pTransitionFrame) + call RhpGcAlloc + + ; Set the new object's EEType pointer and length on success. + test eax, eax + jz ArrayOutOfMemoryWithFrame + + mov ecx, [ebp - 8] + mov edx, [ebp - 4] + mov [eax + OFFSETOF__Object__m_pEEType], ecx + mov [eax + OFFSETOF__Array__m_Length], edx + + ;; If the object is bigger than RH_LARGE_OBJECT_SIZE, we must publish it to the BGC + cmp edi, RH_LARGE_OBJECT_SIZE + jb NewArray_SkipPublish + mov ecx, eax ;; ecx: object + mov edx, edi ;; edx: object size + call RhpPublishObject ;; eax: this function returns the object that was passed-in +NewArray_SkipPublish: + + POP_COOP_PINVOKE_FRAME + add esp, 8 ; pop ecx / edx + pop ebp + ret + +ArrayOutOfMemoryWithFrame: + ; This is the OOM failure path. We're going to tail-call to a managed helper that will throw + ; an out of memory exception that the caller of this allocator understands. + + mov eax, [ebp - 8] ; Preserve EEType pointer over POP_COOP_PINVOKE_FRAME + + POP_COOP_PINVOKE_FRAME + add esp, 8 ; pop ecx / edx + pop ebp ; restore ebp + + mov ecx, eax ; EEType pointer + xor edx, edx ; Indicate that we should throw OOM. + jmp RhExceptionHandling_FailedAllocation + +ArrayOutOfMemoryNoFrame: + add esp, 8 ; pop ecx / edx + + ; ecx holds EEType pointer already + xor edx, edx ; Indicate that we should throw OOM. + jmp RhExceptionHandling_FailedAllocation + +ArraySizeOverflow: + ; We get here if the size of the final array object can't be represented as an unsigned + ; 32-bit value. We're going to tail-call to a managed helper that will throw + ; an overflow exception that the caller of this allocator understands. + + add esp, 8 ; pop ecx / edx + + ; ecx holds EEType pointer already + mov edx, 1 ; Indicate that we should throw OverflowException + jmp RhExceptionHandling_FailedAllocation + +FASTCALL_ENDFUNC + + end diff --git a/src/coreclr/src/nativeaot/Runtime/i386/AsmMacros.inc b/src/coreclr/src/nativeaot/Runtime/i386/AsmMacros.inc new file mode 100644 index 0000000000000..1df5744d8e354 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/i386/AsmMacros.inc @@ -0,0 +1,218 @@ +;; Licensed to the .NET Foundation under one or more agreements. +;; The .NET Foundation licenses this file to you under the MIT license. + +include AsmOffsets.inc ; generated by the build from AsmOffsets.cpp + +;; +;; MACROS +;; + +FASTCALL_FUNC macro FuncName,cbArgs + FuncNameReal EQU @&FuncName&@&cbArgs + FuncNameReal proc public +endm + +FASTCALL_ENDFUNC macro + FuncNameReal endp +endm + +ALTERNATE_ENTRY macro Name + +decoratedName TEXTEQU @CatStr( _, Name ) ) + +decoratedName label proc +PUBLIC decoratedName + endm + +LABELED_RETURN_ADDRESS macro Name + +decoratedName TEXTEQU @CatStr( _, Name ) ) + +decoratedName label proc +PUBLIC decoratedName + endm + +EXPORT_POINTER_TO_ADDRESS macro Name + + local AddressToExport + +AddressToExport label proc + + .const + + align 4 + +Name dd offset AddressToExport + + public Name + + .code + + endm + +__tls_array equ 2Ch ;; offsetof(TEB, ThreadLocalStoragePointer) + +;; +;; __declspec(thread) version +;; +INLINE_GETTHREAD macro destReg, trashReg + ASSUME fs : NOTHING + EXTERN __tls_index : DWORD + EXTERN _tls_CurrentThread : DWORD + + mov destReg, [__tls_index] + mov trashReg, fs:[__tls_array] + mov destReg, [trashReg + destReg * 4] + add destReg, SECTIONREL _tls_CurrentThread +endm + + +INLINE_THREAD_UNHIJACK macro threadReg, trashReg1, trashReg2 + ;; + ;; Thread::Unhijack() + ;; + mov trashReg1, [threadReg + OFFSETOF__Thread__m_pvHijackedReturnAddress] + cmp trashReg1, 0 + je @F + + mov trashReg2, [threadReg + OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation] + mov [trashReg2], trashReg1 + mov dword ptr [threadReg + OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation], 0 + mov dword ptr [threadReg + OFFSETOF__Thread__m_pvHijackedReturnAddress], 0 + +@@: +endm + +;; +;; Macro used from unmanaged helpers called from managed code where the helper does not transition immediately +;; into pre-emptive mode but may cause a GC and thus requires the stack is crawlable. This is typically the +;; case for helpers that meddle in GC state (e.g. allocation helpers) where the code must remain in +;; cooperative mode since it handles object references and internal GC state directly but a garbage collection +;; may be inevitable. In these cases we need to be able to transition to pre-meptive mode deep within the +;; unmanaged code but still be able to initialize the stack iterator at the first stack frame which may hold +;; interesting GC references. In all our helper cases this corresponds to the most recent managed frame (e.g. +;; the helper's caller). +;; +;; This macro builds a frame describing the current state of managed code. +;; +;; The macro assumes it is called from a helper that has already set up an EBP frame and that the values of +;; EBX, ESI and EDI remain unchanged from their values in managed code. It pushes the frame at the top of the +;; stack. +;; +;; EAX is trashed by this macro. +;; +PUSH_COOP_PINVOKE_FRAME macro transitionFrameReg + lea eax, [ebp + 8] ; get the ESP of the caller + push eax ; save ESP + push edi + push esi + push ebx + push PTFF_SAVE_ALL_PRESERVED + PTFF_SAVE_RSP + push eax ; Thread * (unused by stackwalker) + mov eax, [ebp + 0] ; Find previous EBP value + push eax ; save EBP + mov eax, [ebp + 4] ; Find the return address + push eax ; save m_RIP + + lea transitionFrameReg, [esp + 0] ; transitionFrameReg == address of frame +endm + +;; +;; Remove the frame from a previous call to PUSH_COOP_PINVOKE_FRAME from the top of the stack and restore EBX, +;; ESI and EDI to their previous values. +;; +;; TRASHES ECX +;; +POP_COOP_PINVOKE_FRAME macro + add esp, 4*4 + pop ebx + pop esi + pop edi + pop ecx +endm + + +;; +;; CONSTANTS -- INTEGER +;; +TSF_Attached equ 01h +TSF_SuppressGcStress equ 08h +TSF_DoNotTriggerGc equ 10h + +;; GC type flags +GC_ALLOC_FINALIZE equ 1 + +;; Note: these must match the defs in PInvokeTransitionFrameFlags +PTFF_SAVE_RBX equ 00000001h +PTFF_SAVE_RSI equ 00000002h +PTFF_SAVE_RDI equ 00000004h +PTFF_SAVE_ALL_PRESERVED equ 00000007h ;; NOTE: RBP is not included in this set! +PTFF_SAVE_RSP equ 00008000h +PTFF_SAVE_RAX equ 00000100h ;; RAX is saved if it contains a GC ref and we're in hijack handler +PTFF_SAVE_ALL_SCRATCH equ 00000700h +PTFF_RAX_IS_GCREF equ 00010000h ;; iff PTFF_SAVE_RAX: set -> eax is Object, clear -> eax is scalar +PTFF_RAX_IS_BYREF equ 00020000h ;; iff PTFF_SAVE_RAX: set -> eax is ByRef, clear -> eax is Object or scalar +PTFF_THREAD_ABORT equ 00040000h ;; indicates that ThreadAbortException should be thrown when returning from the transition + +;; These must match the TrapThreadsFlags enum +TrapThreadsFlags_None equ 0 +TrapThreadsFlags_AbortInProgress equ 1 +TrapThreadsFlags_TrapThreads equ 2 + +;; This must match HwExceptionCode.STATUS_REDHAWK_THREAD_ABORT +STATUS_REDHAWK_THREAD_ABORT equ 43h + +;; +;; Rename fields of nested structs +;; +OFFSETOF__Thread__m_alloc_context__alloc_ptr equ OFFSETOF__Thread__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr +OFFSETOF__Thread__m_alloc_context__alloc_limit equ OFFSETOF__Thread__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_limit + +;; +;; CONSTANTS -- SYMBOLS +;; + +RhDebugBreak equ @RhDebugBreak@0 +RhpGcAlloc equ @RhpGcAlloc@16 +G_LOWEST_ADDRESS equ _g_lowest_address +G_HIGHEST_ADDRESS equ _g_highest_address +G_EPHEMERAL_LOW equ _g_ephemeral_low +G_EPHEMERAL_HIGH equ _g_ephemeral_high +G_CARD_TABLE equ _g_card_table +RhpWaitForSuspend2 equ @RhpWaitForSuspend2@0 +RhpWaitForGC2 equ @RhpWaitForGC2@4 +RhpReversePInvokeAttachOrTrapThread2 equ @RhpReversePInvokeAttachOrTrapThread2@4 +RhpTrapThreads equ _RhpTrapThreads +RhpPublishObject equ @RhpPublishObject@8 + +ifdef FEATURE_GC_STRESS +THREAD__HIJACKFORGCSTRESS equ ?HijackForGcStress@Thread@@SGXPAUPAL_LIMITED_CONTEXT@@@Z +REDHAWKGCINTERFACE__STRESSGC equ ?StressGc@RedhawkGCInterface@@SGXXZ +endif ;; FEATURE_GC_STRESS + +;; +;; IMPORTS +;; +EXTERN RhpGcAlloc : PROC +EXTERN RhDebugBreak : PROC +EXTERN RhpWaitForSuspend2 : PROC +EXTERN RhpWaitForGC2 : PROC +EXTERN RhpReversePInvokeAttachOrTrapThread2 : PROC +EXTERN RhExceptionHandling_FailedAllocation : PROC +EXTERN RhpPublishObject : PROC +EXTERN RhpCalculateStackTraceWorker : PROC +EXTERN RhThrowHwEx : PROC +EXTERN RhThrowEx : PROC +EXTERN RhRethrow : PROC + +ifdef FEATURE_GC_STRESS +EXTERN THREAD__HIJACKFORGCSTRESS : PROC +EXTERN REDHAWKGCINTERFACE__STRESSGC : PROC +endif ;; FEATURE_GC_STRESS + +EXTERN G_LOWEST_ADDRESS : DWORD +EXTERN G_HIGHEST_ADDRESS : DWORD +EXTERN G_EPHEMERAL_LOW : DWORD +EXTERN G_EPHEMERAL_HIGH : DWORD +EXTERN G_CARD_TABLE : DWORD +EXTERN RhpTrapThreads : DWORD diff --git a/src/coreclr/src/nativeaot/Runtime/i386/AsmOffsetsCpu.h b/src/coreclr/src/nativeaot/Runtime/i386/AsmOffsetsCpu.h new file mode 100644 index 0000000000000..a92f24d789b47 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/i386/AsmOffsetsCpu.h @@ -0,0 +1,48 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// +// This file is used by AsmOffsets.h to validate that our +// assembly-code offsets always match their C++ counterparts. +// +// NOTE: the offsets MUST be in hex notation WITHOUT the 0x prefix + +PLAT_ASM_SIZEOF(c0, ExInfo) +PLAT_ASM_OFFSET(0, ExInfo, m_pPrevExInfo) +PLAT_ASM_OFFSET(4, ExInfo, m_pExContext) +PLAT_ASM_OFFSET(8, ExInfo, m_exception) +PLAT_ASM_OFFSET(0c, ExInfo, m_kind) +PLAT_ASM_OFFSET(0d, ExInfo, m_passNumber) +PLAT_ASM_OFFSET(10, ExInfo, m_idxCurClause) +PLAT_ASM_OFFSET(14, ExInfo, m_frameIter) +PLAT_ASM_OFFSET(bc, ExInfo, m_notifyDebuggerSP) + +PLAT_ASM_OFFSET(0, PInvokeTransitionFrame, m_RIP) +PLAT_ASM_OFFSET(4, PInvokeTransitionFrame, m_FramePointer) +PLAT_ASM_OFFSET(8, PInvokeTransitionFrame, m_pThread) +PLAT_ASM_OFFSET(0c, PInvokeTransitionFrame, m_Flags) +PLAT_ASM_OFFSET(10, PInvokeTransitionFrame, m_PreservedRegs) + +PLAT_ASM_SIZEOF(a8, StackFrameIterator) +PLAT_ASM_OFFSET(08, StackFrameIterator, m_FramePointer) +PLAT_ASM_OFFSET(0c, StackFrameIterator, m_ControlPC) +PLAT_ASM_OFFSET(10, StackFrameIterator, m_RegDisplay) +PLAT_ASM_OFFSET(a4, StackFrameIterator, m_OriginalControlPC) + +PLAT_ASM_SIZEOF(1c, PAL_LIMITED_CONTEXT) +PLAT_ASM_OFFSET(0, PAL_LIMITED_CONTEXT, IP) + +PLAT_ASM_OFFSET(4, PAL_LIMITED_CONTEXT, Rsp) +PLAT_ASM_OFFSET(8, PAL_LIMITED_CONTEXT, Rbp) +PLAT_ASM_OFFSET(0c, PAL_LIMITED_CONTEXT, Rdi) +PLAT_ASM_OFFSET(10, PAL_LIMITED_CONTEXT, Rsi) +PLAT_ASM_OFFSET(14, PAL_LIMITED_CONTEXT, Rax) +PLAT_ASM_OFFSET(18, PAL_LIMITED_CONTEXT, Rbx) + +PLAT_ASM_SIZEOF(28, REGDISPLAY) +PLAT_ASM_OFFSET(1c, REGDISPLAY, SP) + +PLAT_ASM_OFFSET(0c, REGDISPLAY, pRbx) +PLAT_ASM_OFFSET(10, REGDISPLAY, pRbp) +PLAT_ASM_OFFSET(14, REGDISPLAY, pRsi) +PLAT_ASM_OFFSET(18, REGDISPLAY, pRdi) diff --git a/src/coreclr/src/nativeaot/Runtime/i386/CallDescrWorker.S b/src/coreclr/src/nativeaot/Runtime/i386/CallDescrWorker.S new file mode 100644 index 0000000000000..876f2dfbcb80d --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/i386/CallDescrWorker.S @@ -0,0 +1,4 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// TODO: Implement diff --git a/src/coreclr/src/nativeaot/Runtime/i386/CallDescrWorker.asm b/src/coreclr/src/nativeaot/Runtime/i386/CallDescrWorker.asm new file mode 100644 index 0000000000000..628aa4e131c11 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/i386/CallDescrWorker.asm @@ -0,0 +1,96 @@ +;; Licensed to the .NET Foundation under one or more agreements. +;; The .NET Foundation licenses this file to you under the MIT license. + + .586 + .model flat + option casemap:none + .code + +include AsmMacros.inc + +ifdef FEATURE_DYNAMIC_CODE +;;;;;;;;;;;;;;;;;;;;;;; CallingConventionConverter Thunks Helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;------------------------------------------------------------------------------ +; This helper routine enregisters the appropriate arguments and makes the +; actual call. +;------------------------------------------------------------------------------ +; void __fastcall CallDescrWorker(CallDescrWorkerParams * pParams) +FASTCALL_FUNC RhCallDescrWorker, 4 + push ebp + mov ebp, esp + push ebx + mov ebx, ecx + + mov ecx, [ebx + OFFSETOF__CallDescrData__numStackSlots] + mov eax, [ebx + OFFSETOF__CallDescrData__pSrc] ; copy the stack + test ecx, ecx + jz donestack + lea eax, [eax + 4 * ecx - 4] ; last argument + push dword ptr [eax] + dec ecx + jz donestack + sub eax, 4 + push dword ptr [eax] + dec ecx + jz donestack +stackloop: + sub eax, 4 + push dword ptr [eax] + dec ecx + jnz stackloop +donestack: + + ; now we must push each field of the ArgumentRegister structure + mov eax, [ebx + OFFSETOF__CallDescrData__pArgumentRegisters] + mov edx, dword ptr [eax] + mov ecx, dword ptr [eax + 4] + mov eax,[ebx + OFFSETOF__CallDescrData__pTarget] + call eax + + EXPORT_POINTER_TO_ADDRESS _PointerToReturnFromCallDescrThunk + + ; Symbol used to identify thunk call to managed function so the special + ; case unwinder can unwind through this function. Sadly we cannot directly + ; export this symbol right now because it confuses DIA unwinder to believe + ; it's the beginning of a new method, therefore we export the address + ; by means of an auxiliary variable. + + ; Save FP return value if necessary + mov ecx, [ebx + OFFSETOF__CallDescrData__fpReturnSize] + cmp ecx, 0 + je ReturnsInt + + cmp ecx, 4 + je ReturnsFloat + cmp ecx, 8 + je ReturnsDouble + ; unexpected + jmp Epilog + +ReturnsInt: +; Unlike desktop returnValue is a pointer to a return buffer, not the buffer itself + mov ebx, [ebx + OFFSETOF__CallDescrData__pReturnBuffer] + mov [ebx], eax + mov [ebx + 4], edx + +Epilog: + pop ebx + pop ebp + retn + +ReturnsFloat: + mov ebx, [ebx + OFFSETOF__CallDescrData__pReturnBuffer] + fstp dword ptr [ebx] ; Spill the Float return value + jmp Epilog + +ReturnsDouble: + mov ebx, [ebx + OFFSETOF__CallDescrData__pReturnBuffer] + fstp qword ptr [ebx] ; Spill the Double return value + jmp Epilog + +FASTCALL_ENDFUNC + +endif + +end diff --git a/src/coreclr/src/nativeaot/Runtime/i386/CallingConventionConverterHelpers.S b/src/coreclr/src/nativeaot/Runtime/i386/CallingConventionConverterHelpers.S new file mode 100644 index 0000000000000..876f2dfbcb80d --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/i386/CallingConventionConverterHelpers.S @@ -0,0 +1,4 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// TODO: Implement diff --git a/src/coreclr/src/nativeaot/Runtime/i386/CallingConventionConverterHelpers.asm b/src/coreclr/src/nativeaot/Runtime/i386/CallingConventionConverterHelpers.asm new file mode 100644 index 0000000000000..3f596327345ef --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/i386/CallingConventionConverterHelpers.asm @@ -0,0 +1,126 @@ +;; Licensed to the .NET Foundation under one or more agreements. +;; The .NET Foundation licenses this file to you under the MIT license. + +.586 +.model flat +option casemap:none +.code + +;; ----------------------------------------------------------------------------------------------------------- +;; standard macros +;; ----------------------------------------------------------------------------------------------------------- +LEAF_ENTRY macro Name, Section + Section segment para 'CODE' + public Name + Name proc +endm + +LEAF_END macro Name, Section + Name endp + Section ends +endm + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; DATA SECTIONS ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; +;; struct ReturnBlock +;; { +;; 8 bytes of space +;; Used to hold return information. +;; eax, and 32bit float returns use the first 4 bytes, +;; eax,edx and 64bit float returns use the full 8 bytes +;; }; +;; + +ReturnInformation__ReturnData EQU 4h + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; Interop Thunks Helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; ? CallingConventionConverter_ReturnVoidReturnThunk(int cbBytesOfStackToPop) +;; +LEAF_ENTRY CallingConventionConverter_ReturnVoidReturnThunk, _TEXT + pop edx ; pop return address into edx + add esp,ecx ; remove ecx bytes from the call stack + push edx ; put the return address back on the stack + ret ; return to it (use a push/ret pair here so that the return stack buffer still works) +LEAF_END CallingConventionConverter_ReturnVoidReturnThunk, _TEXT + +;; +;; int CallingConventionConverter_ReturnIntegerReturnThunk(int cbBytesOfStackToPop, ReturnBlock*) +;; +LEAF_ENTRY CallingConventionConverter_ReturnIntegerReturnThunk, _TEXT + pop eax ; pop return address into edx + add esp,ecx ; remove ecx bytes from the call stack + push eax ; put the return address back on the stack + mov eax, [edx] ; setup eax and edx to hold the return value + mov edx, [edx + 4] + ret ; return (use a push/ret pair here so that the return stack buffer still works) +LEAF_END CallingConventionConverter_ReturnIntegerReturnThunk, _TEXT + +;; +;; float CallingConventionConverter_Return4ByteFloatReturnThunk(int cbBytesOfStackToPop, ReturnBlock*) +;; +LEAF_ENTRY CallingConventionConverter_Return4ByteFloatReturnThunk, _TEXT + pop eax ; pop return address into edx + add esp,ecx ; remove ecx bytes from the call stack + push eax ; put the return address back on the stack + fld dword ptr [edx]; fill in the return value + ret ; return (use a push/ret pair here so that the return stack buffer still works) +LEAF_END CallingConventionConverter_Return4ByteFloatReturnThunk, _TEXT + +;; +;; double CallingConventionConverter_Return4ByteFloatReturnThunk(int cbBytesOfStackToPop, ReturnBlock*) +;; +LEAF_ENTRY CallingConventionConverter_Return8ByteFloatReturnThunk, _TEXT + pop eax ; pop return address into edx + add esp,ecx ; remove ecx bytes from the call stack + push eax ; put the return address back on the stack + fld qword ptr [edx]; fill in the return value + ret ; return (use a push/ret pair here so that the return stack buffer still works) +LEAF_END CallingConventionConverter_Return8ByteFloatReturnThunk, _TEXT + +;; +;; Note: The "__jmpstub__" prefix is used to indicate to debugger +;; that it must step-through this stub when it encounters it while +;; stepping. +;; + +;; +;; __jmpstub__CallingConventionConverter_CommonCallingStub(?) +;; +LEAF_ENTRY __jmpstub__CallingConventionConverter_CommonCallingStub, _TEXT + ;; rax <- stub info + push ebp + mov ebp, esp + push [eax] ; First argument + mov eax,[eax+4] ; + push [eax] ; Pointer to CallingConventionConverter Managed thunk + mov eax,[eax+4] ; Pointer to UniversalTransitionThunk + jmp eax +LEAF_END __jmpstub__CallingConventionConverter_CommonCallingStub, _TEXT + + ;; + ;; void CallingConventionConverter_GetStubs(IntPtr *returnVoidStub, IntPtr *returnIntegerStub, IntPtr* commonCallingStub, IntPtr *return4ByteFloat, IntPtr *return8ByteFloat) + ;; +LEAF_ENTRY CallingConventionConverter_GetStubs, _TEXT + lea eax, [CallingConventionConverter_ReturnVoidReturnThunk] + mov ecx, [esp+04h] + mov [ecx], eax + lea eax, [CallingConventionConverter_ReturnIntegerReturnThunk] + mov ecx, [esp+08h] + mov [ecx], eax + lea eax, [__jmpstub__CallingConventionConverter_CommonCallingStub] + mov ecx, [esp+0Ch] + mov [ecx], eax + lea eax, [CallingConventionConverter_Return4ByteFloatReturnThunk] + mov ecx, [esp+10h] + mov [ecx], eax + lea eax, [CallingConventionConverter_Return8ByteFloatReturnThunk] + mov ecx, [esp+14h] + mov [ecx], eax + retn 14h +LEAF_END CallingConventionConverter_GetStubs, _TEXT + + +end diff --git a/src/coreclr/src/nativeaot/Runtime/i386/DivModHelpers.asm b/src/coreclr/src/nativeaot/Runtime/i386/DivModHelpers.asm new file mode 100644 index 0000000000000..edc292110e6ab --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/i386/DivModHelpers.asm @@ -0,0 +1,256 @@ +;; Licensed to the .NET Foundation under one or more agreements. +;; The .NET Foundation licenses this file to you under the MIT license. + + + .586 + .model flat + option casemap:none + .code + + +include AsmMacros.inc + +EXTERN RhExceptionHandling_ThrowClasslibOverflowException : PROC +EXTERN RhExceptionHandling_ThrowClasslibDivideByZeroException : PROC +EXTERN __alldiv : PROC +EXTERN __allrem : PROC +EXTERN __aulldiv : PROC +EXTERN __aullrem : PROC +EXTERN __aulldvrm : PROC +EXTERN __alldvrm : PROC + +esp_offsetof_dividend_low equ 4 +esp_offsetof_dividend_high equ 8 +esp_offsetof_divisor_low equ 12 +esp_offsetof_divisor_high equ 16 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; RhpLDiv +;; +;; INPUT: [ESP+4]: dividend low +;; [ESP+8]: dividend high +;; [ESP+12]: divisor low +;; [ESP+16]: divisor high +;; +;; OUTPUT: EAX: result low +;; EDX: result high +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +FASTCALL_FUNC RhpLDiv, 16 + + ;; pretest for the problematic cases of overflow and divide by zero + ;; overflow: dividend = 0x80000000`00000000 and divisor = -1l = 0xffffffff`ffffffff + ;; divide by zero: divisor = 0x00000000`00000000 + ;; + ;; quick pretest - if the two halves of the divisor are unequal, we cannot + ;; have one of the problematic cases + mov eax,[esp+esp_offsetof_divisor_low] + cmp eax,[esp+esp_offsetof_divisor_high] + je LDivDoMoreTests +LDivOkToDivide: + ;; tailcall to the actual divide routine + jmp __alldiv +LDivDoMoreTests: + ;; we know the high and low halves of the divisor are equal + ;; + ;; check for the divide by zero case + test eax,eax + je ThrowClasslibDivideByZeroException + ;; + ;; is the divisor == -1l? I.e., can we have the overflow case? + cmp eax,-1 + jne LDivOkToDivide + ;; + ;; is the dividend == 0x80000000`00000000? + cmp dword ptr [esp+esp_offsetof_dividend_low],0 + jne LDivOkToDivide + cmp dword ptr [esp+esp_offsetof_dividend_high],80000000h + jne LDivOkToDivide +FASTCALL_ENDFUNC + + ;; make it look like the managed code called this directly + ;; by popping the parameters and putting the return address in the proper place +ThrowClasslibOverflowException proc + pop ecx + add esp,16 + push ecx + ;; passing return address in ecx + jmp RhExceptionHandling_ThrowClasslibOverflowException +ThrowClasslibOverflowException endp + +ThrowClasslibDivideByZeroException proc + pop ecx + add esp,16 + push ecx + ;; passing return address in ecx + jmp RhExceptionHandling_ThrowClasslibDivideByZeroException +ThrowClasslibDivideByZeroException endp + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; RhpLMod +;; +;; INPUT: [ESP+4]: dividend low +;; [ESP+8]: dividend high +;; [ESP+12]: divisor low +;; [ESP+16]: divisor high +;; +;; OUTPUT: EAX: result low +;; EDX: result high +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +FASTCALL_FUNC RhpLMod, 16 + + ;; pretest for the problematic cases of overflow and divide by zero + ;; overflow: dividend = 0x80000000`00000000 and divisor = -1l = 0xffffffff`ffffffff + ;; divide by zero: divisor = 0x00000000`00000000 + ;; + ;; quick pretest - if the two halves of the divisor are unequal, we cannot + ;; have one of the problematic cases + mov eax,[esp+esp_offsetof_divisor_low] + cmp eax,[esp+esp_offsetof_divisor_high] + je LModDoMoreTests +LModOkToDivide: + jmp __allrem +LModDoMoreTests: + ;; we know the high and low halves of the divisor are equal + ;; + ;; check for the divide by zero case + test eax,eax + je ThrowClasslibDivideByZeroException + ;; + ;; is the divisor == -1l? I.e., can we have the overflow case? + cmp eax,-1 + jne LModOkToDivide + ;; + ;; is the dividend == 0x80000000`00000000? + cmp dword ptr [esp+esp_offsetof_dividend_low],0 + jne LModOkToDivide + cmp dword ptr [esp+esp_offsetof_dividend_high],80000000h + jne LModOkToDivide + jmp ThrowClasslibOverflowException + +FASTCALL_ENDFUNC + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; RhpLDivMod +;; +;; INPUT: [ESP+4]: dividend low +;; [ESP+8]: dividend high +;; [ESP+12]: divisor low +;; [ESP+16]: divisor high +;; +;; OUTPUT: EAX: quotient low +;; EDX: quotient high +;; ECX: remainder high +;; EBX: remainder high +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +FASTCALL_FUNC RhpLDivMod, 16 + + ;; pretest for the problematic cases of overflow and divide by zero + ;; overflow: dividend = 0x80000000`00000000 and divisor = -1l = 0xffffffff`ffffffff + ;; divide by zero: divisor = 0x00000000`00000000 + ;; + ;; quick pretest - if the two halves of the divisor are unequal, we cannot + ;; have one of the problematic cases + mov eax,[esp+esp_offsetof_divisor_low] + cmp eax,[esp+esp_offsetof_divisor_high] + je LDivModDoMoreTests +LDivModOkToDivide: + jmp __alldvrm +LDivModDoMoreTests: + ;; we know the high and low halves of the divisor are equal + ;; + ;; check for the divide by zero case + test eax,eax + je ThrowClasslibDivideByZeroException + ;; + ;; is the divisor == -1l? I.e., can we have the overflow case? + cmp eax,-1 + jne LDivModOkToDivide + ;; + ;; is the dividend == 0x80000000`00000000? + cmp dword ptr [esp+esp_offsetof_dividend_low],0 + jne LDivModOkToDivide + cmp dword ptr [esp+esp_offsetof_dividend_high],80000000h + jne LDivModOkToDivide + jmp ThrowClasslibOverflowException + +FASTCALL_ENDFUNC + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; RhpULDiv +;; +;; INPUT: [ESP+4]: dividend low +;; [ESP+8]: dividend high +;; [ESP+12]: divisor low +;; [ESP+16]: divisor high +;; +;; OUTPUT: EAX: result low +;; EDX: result high +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +FASTCALL_FUNC RhpULDiv, 16 + + ;; pretest for divide by zero + mov eax,[esp+esp_offsetof_divisor_low] + or eax,[esp+esp_offsetof_divisor_high] + jne __aulldiv + jmp ThrowClasslibDivideByZeroException + +FASTCALL_ENDFUNC + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; RhpULMod +;; +;; INPUT: [ESP+4]: dividend low +;; [ESP+8]: dividend high +;; [ESP+12]: divisor low +;; [ESP+16]: divisor high +;; +;; OUTPUT: EAX: result low +;; EDX: result high +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +FASTCALL_FUNC RhpULMod, 16 + + ;; pretest for divide by zero + mov eax,[esp+esp_offsetof_divisor_low] + or eax,[esp+esp_offsetof_divisor_high] + jne __aullrem + jmp ThrowClasslibDivideByZeroException + +FASTCALL_ENDFUNC + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; RhpULDivMod +;; +;; INPUT: [ESP+4]: dividend low +;; [ESP+8]: dividend high +;; [ESP+12]: divisor low +;; [ESP+16]: divisor high +;; +;; OUTPUT: EAX: quotient low +;; EDX: quotient high +;; ECX: remainder high +;; EBX: remainder high +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +FASTCALL_FUNC RhpULDivMod, 16 + + ;; pretest for divide by zero + mov eax,[esp+esp_offsetof_divisor_low] + or eax,[esp+esp_offsetof_divisor_high] + jne __aulldvrm + jmp ThrowClasslibDivideByZeroException + +FASTCALL_ENDFUNC + + + end diff --git a/src/coreclr/src/nativeaot/Runtime/i386/ExceptionHandling.S b/src/coreclr/src/nativeaot/Runtime/i386/ExceptionHandling.S new file mode 100644 index 0000000000000..876f2dfbcb80d --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/i386/ExceptionHandling.S @@ -0,0 +1,4 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// TODO: Implement diff --git a/src/coreclr/src/nativeaot/Runtime/i386/ExceptionHandling.asm b/src/coreclr/src/nativeaot/Runtime/i386/ExceptionHandling.asm new file mode 100644 index 0000000000000..500efdff66ac6 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/i386/ExceptionHandling.asm @@ -0,0 +1,480 @@ +;; Licensed to the .NET Foundation under one or more agreements. +;; The .NET Foundation licenses this file to you under the MIT license. + + + .586 + .model flat + option casemap:none + .code + + +include AsmMacros.inc + +RhpCallFunclet equ @RhpCallFunclet@0 +RhpThrowHwEx equ @RhpThrowHwEx@0 + +extern RhpCallFunclet : proc + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; RhpThrowHwEx +;; +;; INPUT: ECX: exception code of fault +;; EDX: faulting RIP +;; +;; OUTPUT: +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +FASTCALL_FUNC RhpThrowHwEx, 0 + + esp_offsetof_ExInfo textequ %0 + esp_offsetof_Context textequ %SIZEOF__ExInfo + + push edx ; make it look like we were called by pushing the faulting IP like a return address + push ebp + mov ebp, esp + + lea eax, [esp+8] ;; calculate the RSP of the throw site + ;; edx already contains the throw site IP + +;; struct PAL_LIMITED_CONTEXT +;; { + push ebx + push eax + push esi + push edi + mov ebx, [ebp] + push ebx ;; 'faulting' Rbp + push eax ;; 'faulting' Rsp + push edx ;; 'faulting' IP +;; }; + + sub esp, SIZEOF__ExInfo + + INLINE_GETTHREAD eax, edx ;; eax <- thread, edx <- trashed + + lea edx, [esp + esp_offsetof_ExInfo] ;; edx <- ExInfo* + + xor esi, esi + mov [edx + OFFSETOF__ExInfo__m_exception], esi ;; init the exception object to null + mov byte ptr [edx + OFFSETOF__ExInfo__m_passNumber], 1 ;; init to the first pass + mov dword ptr [edx + OFFSETOF__ExInfo__m_idxCurClause], 0FFFFFFFFh + mov byte ptr [edx + OFFSETOF__ExInfo__m_kind], 2 ;; ExKind.HardwareFault + + ;; link the ExInfo into the thread's ExInfo chain + mov ebx, [eax + OFFSETOF__Thread__m_pExInfoStackHead] + mov [edx + OFFSETOF__ExInfo__m_pPrevExInfo], ebx ;; pExInfo->m_pPrevExInfo = m_pExInfoStackHead + mov [eax + OFFSETOF__Thread__m_pExInfoStackHead], edx ;; m_pExInfoStackHead = pExInfo + + ;; set the exception context field on the ExInfo + lea ebx, [esp + esp_offsetof_Context] ;; ebx <- PAL_LIMITED_CONTEXT* + mov [edx + OFFSETOF__ExInfo__m_pExContext], ebx ;; init ExInfo.m_pExContext + + ;; ecx still contains the exception code + ;; edx contains the address of the ExInfo + call RhThrowHwEx + + EXPORT_POINTER_TO_ADDRESS _PointerToRhpThrowHwEx2 + + ;; no return + int 3 + +FASTCALL_ENDFUNC + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; RhpThrowEx +;; +;; INPUT: ECX: exception object +;; +;; OUTPUT: +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +FASTCALL_FUNC RhpThrowEx, 0 + + esp_offsetof_ExInfo textequ %0 + esp_offsetof_Context textequ %SIZEOF__ExInfo + + push ebp + mov ebp, esp + + lea eax, [esp+8] ;; calculate the RSP of the throw site + mov edx, [esp+4] ;; get the throw site IP via the return address + +;; struct PAL_LIMITED_CONTEXT +;; { + push ebx + push eax + push esi + push edi + mov ebx, [ebp] + push ebx ;; 'faulting' Rbp + push eax ;; 'faulting' Rsp + push edx ;; 'faulting' IP +;; }; + + sub esp, SIZEOF__ExInfo + + ;; ------------------------- + + lea ebx, [eax-4] ;; ebx <- addr of return address + INLINE_GETTHREAD eax, edx ;; eax <- thread, edx <- trashed + + ;; There is runtime C# code that can tail call to RhpThrowEx using a binder intrinsic. So the return + ;; address could have been hijacked when we were in that C# code and we must remove the hijack and + ;; reflect the correct return address in our exception context record. The other throw helpers don't + ;; need this because they cannot be tail-called from C#. + + INLINE_THREAD_UNHIJACK eax, esi, edx ;; trashes esi, edx + + mov edx, [ebx] ;; edx <- return address + mov [esp + esp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__IP], edx ;; set 'faulting' IP after unhijack + + lea edx, [esp + esp_offsetof_ExInfo] ;; edx <- ExInfo* + + xor esi, esi + mov [edx + OFFSETOF__ExInfo__m_exception], esi ;; init the exception object to null + mov byte ptr [edx + OFFSETOF__ExInfo__m_passNumber], 1 ;; init to the first pass + mov dword ptr [edx + OFFSETOF__ExInfo__m_idxCurClause], 0FFFFFFFFh + mov byte ptr [edx + OFFSETOF__ExInfo__m_kind], 1 ;; ExKind.Throw + + ;; link the ExInfo into the thread's ExInfo chain + mov ebx, [eax + OFFSETOF__Thread__m_pExInfoStackHead] + mov [edx + OFFSETOF__ExInfo__m_pPrevExInfo], ebx ;; pExInfo->m_pPrevExInfo = m_pExInfoStackHead + mov [eax + OFFSETOF__Thread__m_pExInfoStackHead], edx ;; m_pExInfoStackHead = pExInfo + + ;; set the exception context field on the ExInfo + lea ebx, [esp + esp_offsetof_Context] ;; ebx <- PAL_LIMITED_CONTEXT* + mov [edx + OFFSETOF__ExInfo__m_pExContext], ebx ;; init ExInfo.m_pExContext + + ;; ecx still contains the exception object + ;; edx contains the address of the ExInfo + call RhThrowEx + + EXPORT_POINTER_TO_ADDRESS _PointerToRhpThrowEx2 + + ;; no return + int 3 + +FASTCALL_ENDFUNC + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; void FASTCALL RhpRethrow() +;; +;; SUMMARY: Similar to RhpThrowEx, except that it passes along the currently active ExInfo +;; +;; INPUT: +;; +;; OUTPUT: +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +FASTCALL_FUNC RhpRethrow, 0 + + + esp_offsetof_ExInfo textequ %0 + esp_offsetof_Context textequ %SIZEOF__ExInfo + + push ebp + mov ebp, esp + + lea eax, [esp+8] ;; calculate the RSP of the throw site + mov edx, [esp+4] ;; get the throw site IP via the return address + +;; struct PAL_LIMITED_CONTEXT +;; { + push ebx + push eax + push esi + push edi + mov ebx, [ebp] + push ebx ;; 'faulting' Rbp + push eax ;; 'faulting' Rsp + push edx ;; 'faulting' IP +;; }; + + sub esp, SIZEOF__ExInfo + + ;; ------------------------- + + lea ebx, [eax-4] ;; ebx <- addr of return address + INLINE_GETTHREAD eax, edx ;; eax <- thread, edx <- trashed + + lea edx, [esp + esp_offsetof_ExInfo] ;; edx <- ExInfo* + + xor esi, esi + mov [edx + OFFSETOF__ExInfo__m_exception], esi ;; init the exception object to null + mov byte ptr [edx + OFFSETOF__ExInfo__m_passNumber], 1 ;; init to the first pass + mov dword ptr [edx + OFFSETOF__ExInfo__m_idxCurClause], 0FFFFFFFFh + mov byte ptr [edx + OFFSETOF__ExInfo__m_kind], 0 ;; init to a deterministic value (ExKind.None) + + ;; link the ExInfo into the thread's ExInfo chain + mov ecx, [eax + OFFSETOF__Thread__m_pExInfoStackHead] ;; ecx <- currently active ExInfo + mov [edx + OFFSETOF__ExInfo__m_pPrevExInfo], ecx ;; pExInfo->m_pPrevExInfo = m_pExInfoStackHead + mov [eax + OFFSETOF__Thread__m_pExInfoStackHead], edx ;; m_pExInfoStackHead = pExInfo + + ;; set the exception context field on the ExInfo + lea ebx, [esp + esp_offsetof_Context] ;; ebx <- PAL_LIMITED_CONTEXT* + mov [edx + OFFSETOF__ExInfo__m_pExContext], ebx ;; init ExInfo.m_pExContext + + ;; ecx contains the currently active ExInfo + ;; edx contains the address of the new ExInfo + call RhRethrow + + EXPORT_POINTER_TO_ADDRESS _PointerToRhpRethrow2 + + ;; no return + int 3 + +FASTCALL_ENDFUNC + +;; +;; Prologue of all funclet calling helpers (RhpCallXXXXFunclet) +;; +FUNCLET_CALL_PROLOGUE macro localsCount + push ebp + mov ebp, esp + + push ebx ;; save preserved registers (for the stackwalker) + push esi ;; + push edi ;; + + stack_alloc_size = localsCount * 4 + + if stack_alloc_size ne 0 + sub esp, stack_alloc_size + endif +endm + +;; +;; Epilogue of all funclet calling helpers (RhpCallXXXXFunclet) +;; +FUNCLET_CALL_EPILOGUE macro + if stack_alloc_size ne 0 + add esp, stack_alloc_size + endif + pop edi + pop esi + pop ebx + pop ebp +endm + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; void* FASTCALL RhpCallCatchFunclet(RtuObjectRef exceptionObj, void* pHandlerIP, REGDISPLAY* pRegDisplay, +;; ExInfo* pExInfo) +;; +;; INPUT: ECX: exception object +;; EDX: handler funclet address +;; [ESP + 4]: REGDISPLAY* +;; [ESP + 8]: ExInfo* +;; +;; OUTPUT: +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +FASTCALL_FUNC RhpCallCatchFunclet, 0 + + FUNCLET_CALL_PROLOGUE 2 + + esp_offsetof_ResumeIP textequ %00h ;; [esp + 00h]: continuation address + esp_offsetof_is_handling_thread_abort textequ %04h ;; [esp + 04h]: set if we are handling ThreadAbortException + ;; [esp + 08h]: edi save + ;; [esp + 0ch]: esi save + ;; [esp + 10h]: ebx save + esp_offsetof_PrevEBP textequ %14h ;; [esp + 14h]: prev ebp + esp_offsetof_RetAddr textequ %18h ;; [esp + 18h]: return address + esp_offsetof_RegDisplay textequ %1ch ;; [esp + 1Ch]: REGDISPLAY* + esp_offsetof_ExInfo textequ %20h ;; [esp + 20h]: ExInfo* + + ;; Clear the DoNotTriggerGc state before calling out to our managed catch funclet. + INLINE_GETTHREAD eax, ebx ;; eax <- Thread*, ebx is trashed + lock and dword ptr [eax + OFFSETOF__Thread__m_ThreadStateFlags], NOT TSF_DoNotTriggerGc + + cmp ecx, [eax + OFFSETOF__Thread__m_threadAbortException] + setz byte ptr [esp + esp_offsetof_is_handling_thread_abort] + + mov edi, [esp + esp_offsetof_RegDisplay] ;; edi <- REGDISPLAY * + + mov eax, [edi + OFFSETOF__REGDISPLAY__pRbx] + mov ebx, [eax] + + mov eax, [edi + OFFSETOF__REGDISPLAY__pRbp] + mov eax, [eax] + push eax ; save the funclet's EBP value for later + + mov eax, [edi + OFFSETOF__REGDISPLAY__pRsi] + mov esi, [eax] + + mov eax, [edi + OFFSETOF__REGDISPLAY__pRdi] + mov edi, [eax] + + pop eax ; get the funclet's EBP value + + ;; ECX still contains the exception object + ;; EDX: funclet IP + ;; EAX: funclet EBP + call RhpCallFunclet + + EXPORT_POINTER_TO_ADDRESS _PointerToRhpCallCatchFunclet2 + + ;; eax: resume IP + mov [esp + esp_offsetof_ResumeIP], eax ;; save for later + + INLINE_GETTHREAD edx, ecx ;; edx <- Thread*, trash ecx + + ;; We must unhijack the thread at this point because the section of stack where the hijack is applied + ;; may go dead. If it does, then the next time we try to unhijack the thread, it will corrupt the stack. + INLINE_THREAD_UNHIJACK edx, ecx, eax ;; Thread in edx, trashes ecx and eax + + mov ecx, [esp + esp_offsetof_ExInfo] ;; ecx <- current ExInfo * + mov eax, [esp + esp_offsetof_RegDisplay] ;; eax <- REGDISPLAY* + mov eax, [eax + OFFSETOF__REGDISPLAY__SP] ;; eax <- resume SP value + + @@: mov ecx, [ecx + OFFSETOF__ExInfo__m_pPrevExInfo] ;; ecx <- next ExInfo + cmp ecx, 0 + je @F ;; we're done if it's null + cmp ecx, eax + jl @B ;; keep looping if it's lower than the new SP + + @@: mov [edx + OFFSETOF__Thread__m_pExInfoStackHead], ecx ;; store the new head on the Thread + + test [RhpTrapThreads], TrapThreadsFlags_AbortInProgress + jz @f + + ;; test if the exception handled by the catch was the ThreadAbortException + cmp byte ptr [esp + esp_offsetof_is_handling_thread_abort], 0 + je @f + + ;; RhpCallFunclet preserved our local EBP value, so let's fetch the correct one for the resume address + mov ecx, [esp + esp_offsetof_RegDisplay] ;; ecx <- REGDISPLAY * + mov ecx, [ecx + OFFSETOF__REGDISPLAY__pRbp] + mov ebp, [ecx] + + ;; It was the ThreadAbortException, so rethrow it + mov ecx, STATUS_REDHAWK_THREAD_ABORT + mov edx, [esp + esp_offsetof_ResumeIP] + mov esp, eax ;; reset the SP to resume SP value + jmp RhpThrowHwEx ;; Throw the ThreadAbortException as a special kind of hardware exception + + @@: + ;; RhpCallFunclet preserved our local EBP value, so let's fetch the correct one for the resume address + mov ecx, [esp + esp_offsetof_RegDisplay] ;; ecx <- REGDISPLAY * + mov ecx, [ecx + OFFSETOF__REGDISPLAY__pRbp] + mov ebp, [ecx] + + ;; reset ESP and jump to the continuation address + mov ecx, [esp + esp_offsetof_ResumeIP] + mov esp, eax + jmp ecx + +FASTCALL_ENDFUNC + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; void FASTCALL RhpCallFinallyFunclet(void* pHandlerIP, REGDISPLAY* pRegDisplay) +;; +;; INPUT: ECX: handler funclet address +;; EDX: REGDISPLAY* +;; +;; OUTPUT: +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +FASTCALL_FUNC RhpCallFinallyFunclet, 0 + + FUNCLET_CALL_PROLOGUE 0 + + push edx ;; save REGDISPLAY* + + ;; Clear the DoNotTriggerGc state before calling out to our managed catch funclet. + INLINE_GETTHREAD eax, ebx ;; eax <- Thread*, ebx is trashed + lock and dword ptr [eax + OFFSETOF__Thread__m_ThreadStateFlags], NOT TSF_DoNotTriggerGc + + ;; + ;; load preserved registers for funclet + ;; + + mov eax, [edx + OFFSETOF__REGDISPLAY__pRbx] + mov ebx, [eax] + + mov eax, [edx + OFFSETOF__REGDISPLAY__pRsi] + mov esi, [eax] + + mov eax, [edx + OFFSETOF__REGDISPLAY__pRdi] + mov edi, [eax] + + mov eax, [edx + OFFSETOF__REGDISPLAY__pRbp] + mov eax, [eax] + mov edx, ecx + + ;; ECX: not used + ;; EDX: funclet IP + ;; EAX: funclet EBP + call RhpCallFunclet + + EXPORT_POINTER_TO_ADDRESS _PointerToRhpCallFinallyFunclet2 + + pop edx ;; restore REGDISPLAY* + + ;; + ;; save preserved registers from funclet + ;; + mov eax, [edx + OFFSETOF__REGDISPLAY__pRbx] + mov [eax], ebx + + mov eax, [edx + OFFSETOF__REGDISPLAY__pRsi] + mov [eax], esi + + mov eax, [edx + OFFSETOF__REGDISPLAY__pRdi] + mov [eax], edi + + INLINE_GETTHREAD eax, ebx ;; eax <- Thread*, ebx is trashed + lock or dword ptr [eax + OFFSETOF__Thread__m_ThreadStateFlags], TSF_DoNotTriggerGc + + FUNCLET_CALL_EPILOGUE + ret + +FASTCALL_ENDFUNC + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; void* FASTCALL RhpCallFilterFunclet(RtuObjectRef exceptionObj, void* pFilterIP, REGDISPLAY* pRegDisplay) +;; +;; INPUT: ECX: exception object +;; EDX: filter funclet address +;; [ESP + 4]: REGDISPLAY* +;; +;; OUTPUT: +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +FASTCALL_FUNC RhpCallFilterFunclet, 0 + + FUNCLET_CALL_PROLOGUE 0 + + push edx ;; save filter funclet address + + ;; + ;; load preserved registers for funclet + ;; + mov edx, [ebp + 8] + mov eax, [edx + OFFSETOF__REGDISPLAY__pRbp] + mov eax, [eax] + + ;; ECX still contains exception object + ;; EAX contains the funclet EBP value + mov edx, [esp + 0] ;; reload filter funclet address + + call RhpCallFunclet + + EXPORT_POINTER_TO_ADDRESS _PointerToRhpCallFilterFunclet2 + + ;; EAX contains the result of the filter execution + mov edx, [ebp + 8] + + pop ecx ;; pop scratch slot + + FUNCLET_CALL_EPILOGUE + ret + +FASTCALL_ENDFUNC + + end diff --git a/src/coreclr/src/nativeaot/Runtime/i386/FloatingPoint.asm b/src/coreclr/src/nativeaot/Runtime/i386/FloatingPoint.asm new file mode 100644 index 0000000000000..30977c5f9cd0f --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/i386/FloatingPoint.asm @@ -0,0 +1,77 @@ +;; Licensed to the .NET Foundation under one or more agreements. +;; The .NET Foundation licenses this file to you under the MIT license. + + .586 + .xmm + .model flat + option casemap:none + .code + + +include AsmMacros.inc + +FASTCALL_FUNC RhpFltRemRev, 8 ; float dividend, float divisor + + fld dword ptr [esp+8] ; divisor + fld dword ptr [esp+4] ; dividend + +fremloop: + fprem + wait + fnstsw ax + wait + sahf + jp fremloop ; Continue while the FPU status bit C2 is set + + fxch st(1) ; swap, so divisor is on top and result is in st(1) + fstp st(0) ; Pop the divisor from the FP stack + + ret 8 + +FASTCALL_ENDFUNC + +FASTCALL_FUNC RhpDblRemRev, 16 ; double dividend, double divisor + + fld qword ptr [esp+0Ch] + fld qword ptr [esp+4] + +fremloopd: + fprem + wait + fnstsw ax + wait + sahf + jp fremloopd ; Continue while the FPU status bit C2 is set + + fxch st(1) ; swap, so divisor is on top and result is in st(1) + fstp st(0) ; Pop the divisor from the FP stack + + ret 10h + +FASTCALL_ENDFUNC + + +FASTCALL_FUNC RhpFltRemRev_SSE2, 0 ; float dividend, float divisor + sub esp, 12 ;; 4 bytes of our stack, 8 bytes args + movd dword ptr [esp], xmm0 + movd dword ptr [esp+4], xmm1 + call @RhpFltRemRev@8 ;; pops 8 bytes of stack + fstp dword ptr [esp] + movd xmm0, dword ptr [esp] + add esp, 4 + ret +FASTCALL_ENDFUNC + +FASTCALL_FUNC RhpDblRemRev_SSE2, 0 ; float dividend, float divisor + sub esp, 24 ;; 8 bytes of our stack, 16 bytes args + movq qword ptr [esp], xmm0 + movq qword ptr [esp+8], xmm1 + call @RhpDblRemRev@16 ;; pops 16 bytes of stack + fstp qword ptr [esp] + movq xmm0, qword ptr [esp] + add esp, 8 + ret +FASTCALL_ENDFUNC + + + end diff --git a/src/coreclr/src/nativeaot/Runtime/i386/GC.asm b/src/coreclr/src/nativeaot/Runtime/i386/GC.asm new file mode 100644 index 0000000000000..e34c980cc3cb4 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/i386/GC.asm @@ -0,0 +1,64 @@ +;; Licensed to the .NET Foundation under one or more agreements. +;; The .NET Foundation licenses this file to you under the MIT license. + +;; +;; Unmanaged helpers used by the managed System.GC class. +;; + + .586 + .model flat + option casemap:none + .code + +include AsmMacros.inc + +;; DWORD getcpuid(DWORD arg, unsigned char result[16]) + +FASTCALL_FUNC getcpuid, 8 + + push ebx + push esi + mov esi, edx + mov eax, ecx + xor ecx, ecx + cpuid + mov [esi+ 0], eax + mov [esi+ 4], ebx + mov [esi+ 8], ecx + mov [esi+12], edx + pop esi + pop ebx + + ret + +FASTCALL_ENDFUNC + +;; The following function uses Deterministic Cache Parameter leafs to crack the cache hierarchy information on Prescott & Above platforms. +;; This function takes 3 arguments: +;; Arg1 is an input to ECX. Used as index to specify which cache level to return infoformation on by CPUID. +;; Arg2 is an input to EAX. For deterministic code enumeration, we pass in 4H in arg2. +;; Arg3 is a pointer to the return buffer +;; No need to check whether or not CPUID is supported because we have already called CPUID with success to come here. + +;; DWORD getextcpuid(DWORD arg1, DWORD arg2, unsigned char result[16]) + +FASTCALL_FUNC getextcpuid, 12 + + push ebx + push esi + mov ecx, ecx + mov eax, edx + cpuid + mov esi, [esp + 12] + mov [esi+ 0], eax + mov [esi+ 4], ebx + mov [esi+ 8], ecx + mov [esi+12], edx + pop esi + pop ebx + + ret + +FASTCALL_ENDFUNC + + end diff --git a/src/coreclr/src/nativeaot/Runtime/i386/GcProbe.asm b/src/coreclr/src/nativeaot/Runtime/i386/GcProbe.asm new file mode 100644 index 0000000000000..f8af02620a30d --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/i386/GcProbe.asm @@ -0,0 +1,556 @@ +;; Licensed to the .NET Foundation under one or more agreements. +;; The .NET Foundation licenses this file to you under the MIT license. + + .586 + .xmm + .model flat + option casemap:none + .code + + +include AsmMacros.inc + +DEFAULT_PROBE_SAVE_FLAGS equ PTFF_SAVE_ALL_PRESERVED + PTFF_SAVE_RSP +PROBE_SAVE_FLAGS_EVERYTHING equ DEFAULT_PROBE_SAVE_FLAGS + PTFF_SAVE_ALL_SCRATCH +PROBE_SAVE_FLAGS_RAX_IS_GCREF equ DEFAULT_PROBE_SAVE_FLAGS + PTFF_SAVE_RAX + PTFF_RAX_IS_GCREF +;; +;; Macro to clear the hijack state. This is safe to do because the suspension code will not Unhijack this +;; thread if it finds it at an IP that isn't managed code. +;; +;; Register state on entry: +;; EDX: thread pointer +;; +;; Register state on exit: +;; No changes +;; +ClearHijackState macro + mov dword ptr [edx + OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation], 0 + mov dword ptr [edx + OFFSETOF__Thread__m_pvHijackedReturnAddress], 0 +endm + +;; +;; The prolog for all GC suspension hijackes (normal and stress). Sets up an EBP frame, +;; fixes up the hijacked return address, and clears the hijack state. +;; +;; Register state on entry: +;; All registers correct for return to the original return address. +;; +;; Register state on exit: +;; EAX: not trashed or saved +;; EBP: new EBP frame with correct return address +;; ESP: points to saved scratch registers (ECX & EDX) +;; ECX: trashed +;; EDX: thread pointer +;; +HijackFixupProlog macro + push eax ; save a slot for the repaired return address + push ebp + mov ebp, esp + push ecx ; save scratch registers + push edx ; save scratch registers + + ;; edx <- GetThread(), TRASHES ecx + INLINE_GETTHREAD edx, ecx + + ;; + ;; Fix the stack by pushing the original return address + ;; + mov ecx, [edx + OFFSETOF__Thread__m_pvHijackedReturnAddress] + mov [ebp + 4], ecx + + ClearHijackState +endm + +;; +;; Epilog for the normal and GC stress hijack functions. Restores scratch registers +;; and returns to the original return address. +;; +;; Register state on entry: +;; ESP: points to saved scratch registers +;; EBP: ebp frame +;; ECX, EDX: trashed +;; All other registers correct for return to the original return address. +;; +;; Register state on exit: +;; All registers restored as they were when the hijack was first reached. +;; +HijackFixupEpilog macro + pop edx + pop ecx + pop ebp + ret +endm + +;; +;; Sets up a PInvokeTranstionFrame with room for all registers. +;; +;; Register state on entry: +;; EDX: thread pointer +;; BITMASK_REG_OR_VALUE: register bitmask, PTTR_SAVE_ALL_PRESERVED at a minimum +;; EBP: ebp frame setup with correct return address +;; ESP: points to saved scratch registers +;; +;; Register state on exit: +;; ESP: pointer to a PInvokeTransitionFrame on the stack +;; EBX: thread pointer +;; EAX: trashed +;; ESI, EDI, EBX, EAX all saved in the frame +;; +;; ECX is NOT trashed if BITMASK_REG_OR_VALUE is a literal value and not a register +;; +PushProbeFrame macro BITMASK_REG_OR_VALUE + push eax ; EAX + lea eax, [ebp + 8] ; get caller ESP + push eax ; ESP + push edi ; EDI + push esi ; ESI + push ebx ; EBX + push BITMASK_REG_OR_VALUE ; register bitmask +ifdef _DEBUG + mov eax, BITMASK_REG_OR_VALUE + and eax, DEFAULT_PROBE_SAVE_FLAGS + cmp eax, DEFAULT_PROBE_SAVE_FLAGS ; make sure we have at least the flags to match what the macro pushes + je @F + call RhDebugBreak +@@: +endif ;; _DEBUG + push edx ; Thread * + mov eax, [ebp + 0] ; find previous EBP value + push eax ; m_FramePointer + mov eax, [ebp + 4] ; get return address + push eax ; m_RIP + + mov ebx, edx ; save Thread pointer for later +endm + +;; +;; Pops off the PInvokeTransitionFrame setup in PushProbeFrame above, restoring all registers. +;; +;; Register state on entry: +;; ESP: pointer to a PInvokeTransitionFrame on the stack +;; +;; Register state on exit: +;; ESP: points to saved scratch registers, PInvokeTransitionFrame removed +;; EBX: restored +;; ESI: restored +;; EDI: restored +;; EAX: restored +;; +PopProbeFrame macro + add esp, 4*4h + pop ebx + pop esi + pop edi + pop eax ; discard ESP + pop eax +endm + +;; +;; Set the Thread state and wait for a GC to complete. +;; +;; Register state on entry: +;; ESP: pointer to a PInvokeTransitionFrame on the stack +;; EBX: thread pointer +;; EBP: EBP frame +;; +;; Register state on exit: +;; ESP: pointer to a PInvokeTransitionFrame on the stack +;; EBX: thread pointer +;; EBP: EBP frame +;; All other registers trashed +;; + +EXTERN _RhpWaitForGCNoAbort : PROC + +WaitForGCCompletion macro + test dword ptr [ebx + OFFSETOF__Thread__m_ThreadStateFlags], TSF_SuppressGcStress + TSF_DoNotTriggerGc + jnz @F + + mov ecx, esp + call _RhpWaitForGCNoAbort +@@: + +endm + +RhpThrowHwEx equ @RhpThrowHwEx@0 +extern RhpThrowHwEx : proc + +;; +;; Main worker for our GC probes. Do not call directly!! This assumes that HijackFixupProlog has been done. +;; Instead, go through RhpGcProbeHijack* or RhpGcStressHijack*. This waits for the +;; GC to complete then returns to the original return address. +;; +;; Register state on entry: +;; ECX: register bitmask +;; EDX: thread pointer +;; EBP: EBP frame +;; ESP: scratch registers pushed (ECX & EDX) +;; +;; Register state on exit: +;; All registers restored as they were when the hijack was first reached. +;; +RhpGcProbe proc + test [RhpTrapThreads], TrapThreadsFlags_TrapThreads + jnz SynchronousRendezVous + + HijackFixupEpilog + +SynchronousRendezVous: + PushProbeFrame ecx ; bitmask in ECX + + WaitForGCCompletion + + mov edx, [esp + OFFSETOF__PInvokeTransitionFrame__m_Flags] + ;; + ;; Restore preserved registers -- they may have been updated by GC + ;; + PopProbeFrame + + test edx, PTFF_THREAD_ABORT + jnz Abort + + HijackFixupEpilog +Abort: + mov ecx, STATUS_REDHAWK_THREAD_ABORT + pop edx + pop eax ;; ecx was pushed here, but we don't care for its value + pop ebp + pop edx ;; return address as exception RIP + jmp RhpThrowHwEx + +RhpGcProbe endp + +ifdef FEATURE_GC_STRESS +;; +;; Set the Thread state and invoke RedhawkGCInterface::StressGC(). +;; +;; Assumes EBX is the Thread pointer. +;; +;; Register state on entry: +;; EBX: thread pointer +;; EBP: EBP frame +;; ESP: pointer to a PInvokeTransitionFrame on the stack +;; +;; Register state on exit: +;; ESP: pointer to a PInvokeTransitionFrame on the stack +;; EBP: EBP frame +;; All other registers trashed +;; +StressGC macro + mov [ebx + OFFSETOF__Thread__m_pHackPInvokeTunnel], esp + call REDHAWKGCINTERFACE__STRESSGC +endm + +;; +;; Worker for our GC stress probes. Do not call directly!! +;; Instead, go through RhpGcStressHijack. This performs the GC Stress +;; work and returns to the original return address. +;; +;; Register state on entry: +;; EDX: thread pointer +;; ECX: register bitmask +;; EBP: EBP frame +;; ESP: scratch registers pushed (ECX and EDX) +;; +;; Register state on exit: +;; All registers restored as they were when the hijack was first reached. +;; +RhpGcStressProbe proc + PushProbeFrame ecx ; bitmask in ECX + + StressGC + + ;; + ;; Restore preserved registers -- they may have been updated by GC + ;; + PopProbeFrame + + HijackFixupEpilog + +RhpGcStressProbe endp + +endif ;; FEATURE_GC_STRESS + +FASTCALL_FUNC RhpGcProbeHijackScalar, 0 + + HijackFixupProlog + mov ecx, DEFAULT_PROBE_SAVE_FLAGS + jmp RhpGcProbe + +FASTCALL_ENDFUNC + +FASTCALL_FUNC RhpGcProbeHijackObject, 0 + + HijackFixupProlog + mov ecx, DEFAULT_PROBE_SAVE_FLAGS + PTFF_SAVE_RAX + PTFF_RAX_IS_GCREF + jmp RhpGcProbe + +FASTCALL_ENDFUNC + +FASTCALL_FUNC RhpGcProbeHijackByref, 0 + + HijackFixupProlog + mov ecx, DEFAULT_PROBE_SAVE_FLAGS + PTFF_SAVE_RAX + PTFF_RAX_IS_BYREF + jmp RhpGcProbe + +FASTCALL_ENDFUNC + +ifdef FEATURE_GC_STRESS +FASTCALL_FUNC RhpGcStressHijackScalar, 0 + + HijackFixupProlog + mov ecx, DEFAULT_PROBE_SAVE_FLAGS + jmp RhpGcStressProbe + +FASTCALL_ENDFUNC + +FASTCALL_FUNC RhpGcStressHijackObject, 0 + + HijackFixupProlog + mov ecx, DEFAULT_PROBE_SAVE_FLAGS + PTFF_SAVE_RAX + PTFF_RAX_IS_GCREF + jmp RhpGcStressProbe + +FASTCALL_ENDFUNC + +FASTCALL_FUNC RhpGcStressHijackByref, 0 + + HijackFixupProlog + mov ecx, DEFAULT_PROBE_SAVE_FLAGS + PTFF_SAVE_RAX + PTFF_RAX_IS_BYREF + jmp RhpGcStressProbe + +FASTCALL_ENDFUNC + +FASTCALL_FUNC RhpHijackForGcStress, 0 + push ebp + mov ebp, esp + + ;; + ;; Setup a PAL_LIMITED_CONTEXT that looks like what you'd get if you had suspended this thread at the + ;; IP after the call to this helper. + ;; + + push edx + push ecx + push ebx + push eax + push esi + push edi + + mov eax, [ebp] + push eax ;; (caller) Ebp + lea eax, [ebp + 8] + push eax ;; Esp + mov eax, [ebp + 4] + push eax ;; Eip + + push esp ;; address of PAL_LIMITED_CONTEXT + call THREAD__HIJACKFORGCSTRESS + + ;; Note: we only restore the scratch registers here. No GC has occured, so restoring + ;; the callee saved ones is unnecessary. + add esp, 14h + pop eax + pop ebx + pop ecx + pop edx + pop ebp + ret +FASTCALL_ENDFUNC +endif ;; FEATURE_GC_STRESS + +;; +;; The following functions are _jumped_ to when we need to transfer control from one method to another for EH +;; dispatch. These are needed to properly coordinate with the GC hijacking logic. We are essentially replacing +;; the return from the throwing method with a jump to the handler in the caller, but we need to be aware of +;; any return address hijack that may be in place for GC suspension. These routines use a quick test of the +;; return address against a specific GC hijack routine, and then fixup the stack pointer to what it would be +;; after a real return from the throwing method. Then, if we are not hijacked we can simply jump to the +;; handler in the caller. +;; +;; If we are hijacked, then we jump to a routine that will unhijack appropriatley and wait for the GC to +;; complete. There are also variants for GC stress. +;; +;; Note that at this point we are eiher hijacked or we are not, and this will not change until we return to +;; managed code. It is an invariant of the system that a thread will only attempt to hijack or unhijack +;; another thread while the target thread is suspended in managed code, and this is _not_ managed code. +;; +;; Register state on entry: +;; EAX: handler address we want to jump to. +;; ECX: reference to the exception object. +;; EDX: what ESP should be after the return address and arg space are removed. +;; EBX, ESI, EDI, and EBP are all already correct for return to the caller. +;; The stack still contains the return address and the arguments to the call. +;; +;; Register state on exit: +;; ESP: what it would be after a complete return to the caller. +;; +RTU_EH_JUMP_HELPER macro funcName, hijackFuncName, isStress, stressFuncName +FASTCALL_FUNC funcName, 0 + cmp [esp], hijackFuncName + je RhpGCProbeForEHJump + +IF isStress EQ 1 + cmp [esp], stressFuncName + je RhpGCStressProbeForEHJump +ENDIF + + ;; We are not hijacked, so we can return to the handler. + ;; We return to keep the call/return prediction balanced. + mov esp, edx ; The stack is now as if we have returned from the call. + push eax ; Push the handler as the return address. + ret + +FASTCALL_ENDFUNC +endm + + +;; We need an instance of the helper for each possible hijack function. The binder has enough +;; information to determine which one we need to use for any function. +RTU_EH_JUMP_HELPER RhpEHJumpScalar, @RhpGcProbeHijackScalar@0, 0, 0 +RTU_EH_JUMP_HELPER RhpEHJumpObject, @RhpGcProbeHijackObject@0, 0, 0 +RTU_EH_JUMP_HELPER RhpEHJumpByref, @RhpGcProbeHijackByref@0, 0, 0 +ifdef FEATURE_GC_STRESS +RTU_EH_JUMP_HELPER RhpEHJumpScalarGCStress, @RhpGcProbeHijackScalar@0, 1, @RhpGcStressHijackScalar@0 +RTU_EH_JUMP_HELPER RhpEHJumpObjectGCStress, @RhpGcProbeHijackObject@0, 1, @RhpGcStressHijackObject@0 +RTU_EH_JUMP_HELPER RhpEHJumpByrefGCStress, @RhpGcProbeHijackByref@0, 1, @RhpGcStressHijackByref@0 +endif + +;; +;; Macro to setup our EBP frame and adjust the location of the EH object reference for EH jump probe funcs. +;; +;; Register state on entry: +;; EAX: handler address we want to jump to. +;; ECX: reference to the exception object. +;; EDX: scratch +;; EBX, ESI, EDI, and EBP are all already correct for return to the caller. +;; The stack is as if we have returned from the call +;; +;; Register state on exit: +;; ESP: ebp frame +;; EBP: ebp frame setup with space reserved for the repaired return address +;; EAX: reference to the exception object +;; ECX: scratch +;; +EHJumpProbeProlog macro + push eax ; save a slot for the repaired return address + push ebp ; setup an ebp frame to keep the stack nicely crawlable + mov ebp, esp + push eax ; save the handler address so we can jump to it later + mov eax, ecx ; move the ex object reference into eax so we can report it +endm + +;; +;; Macro to re-adjust the location of the EH object reference, cleanup the EBP frame, and make the +;; final jump to the handler for EH jump probe funcs. +;; +;; Register state on entry: +;; EAX: reference to the exception object +;; ESP: ebp frame +;; EBP: ebp frame setup with the correct return (handler) address +;; ECX: scratch +;; EDX: scratch +;; +;; Register state on exit: +;; ESP: correct for return to the caller +;; EBP: previous ebp frame +;; ECX: reference to the exception object +;; EDX: trashed +;; +EHJumpProbeEpilog macro + mov ecx, eax ; Put the EX obj ref back into ecx for the handler. + pop eax ; Recover the handler address. + pop ebp ; Pop the ebp frame we setup. + pop edx ; Pop the original return address, which we do not need. + push eax ; Push the handler as the return address. + ret +endm + +;; +;; We are hijacked for a normal GC (not GC stress), so we need to unhijcak and wait for the GC to complete. +;; +;; Register state on entry: +;; EAX: handler address we want to jump to. +;; ECX: reference to the exception object. +;; EDX: what ESP should be after the return address and arg space are removed. +;; EBX, ESI, EDI, and EBP are all already correct for return to the caller. +;; The stack is as if we have returned from the call +;; +;; Register state on exit: +;; ESP: correct for return to the caller +;; EBP: previous ebp frame +;; ECX: reference to the exception object +;; +RhpGCProbeForEHJump proc + mov esp, edx ; The stack is now as if we have returned from the call. + EHJumpProbeProlog + + ;; edx <- GetThread(), TRASHES ecx + INLINE_GETTHREAD edx, ecx + + ;; Fix the stack by pushing the original return address + mov ecx, [edx + OFFSETOF__Thread__m_pvHijackedReturnAddress] + mov [ebp + 4], ecx + + ClearHijackState + +ifdef _DEBUG + ;; + ;; If we get here, then we have been hijacked for a real GC, and our SyncState must + ;; reflect that we've been requested to synchronize. + + test [RhpTrapThreads], TrapThreadsFlags_TrapThreads + jnz @F + + call RhDebugBreak +@@: +endif ;; _DEBUG + + + PushProbeFrame PROBE_SAVE_FLAGS_RAX_IS_GCREF + WaitForGCCompletion + PopProbeFrame + + EHJumpProbeEpilog + +RhpGCProbeForEHJump endp + +ifdef FEATURE_GC_STRESS +;; +;; We are hijacked for GC Stress (not a normal GC) so we need to invoke the GC stress helper. +;; +;; Register state on entry: +;; EAX: handler address we want to jump to. +;; ECX: reference to the exception object. +;; EDX: what ESP should be after the return address and arg space are removed. +;; EBX, ESI, EDI, and EBP are all already correct for return to the caller. +;; The stack is as if we have returned from the call +;; +;; Register state on exit: +;; ESP: correct for return to the caller +;; EBP: previous ebp frame +;; ECX: reference to the exception object +;; +RhpGCStressProbeForEHJump proc + mov esp, edx ; The stack is now as if we have returned from the call. + EHJumpProbeProlog + + ;; edx <- GetThread(), TRASHES ecx + INLINE_GETTHREAD edx, ecx + + ;; Fix the stack by pushing the original return address + mov ecx, [edx + OFFSETOF__Thread__m_pvHijackedReturnAddress] + mov [ebp + 4], ecx + + ClearHijackState + + PushProbeFrame PROBE_SAVE_FLAGS_RAX_IS_GCREF + StressGC + PopProbeFrame + + EHJumpProbeEpilog + +RhpGCStressProbeForEHJump endp + +endif ;; FEATURE_GC_STRESS + + end diff --git a/src/coreclr/src/nativeaot/Runtime/i386/GetThread.asm b/src/coreclr/src/nativeaot/Runtime/i386/GetThread.asm new file mode 100644 index 0000000000000..b330406b9a098 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/i386/GetThread.asm @@ -0,0 +1,31 @@ +;; Licensed to the .NET Foundation under one or more agreements. +;; The .NET Foundation licenses this file to you under the MIT license. + + .586 + .model flat + option casemap:none + .code + + +include AsmMacros.inc + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; RhpGetThread +;; +;; +;; INPUT: none +;; +;; OUTPUT: EAX: Thread pointer +;; +;; MUST PRESERVE ARGUMENT REGISTERS +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +FASTCALL_FUNC RhpGetThread, 0 + push ecx + INLINE_GETTHREAD eax, ecx ; eax dest, ecx trash + pop ecx + ret +FASTCALL_ENDFUNC + + end diff --git a/src/coreclr/src/nativeaot/Runtime/i386/Interlocked.S b/src/coreclr/src/nativeaot/Runtime/i386/Interlocked.S new file mode 100644 index 0000000000000..876f2dfbcb80d --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/i386/Interlocked.S @@ -0,0 +1,4 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// TODO: Implement diff --git a/src/coreclr/src/nativeaot/Runtime/i386/Interlocked.asm b/src/coreclr/src/nativeaot/Runtime/i386/Interlocked.asm new file mode 100644 index 0000000000000..f9599b1b8666e --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/i386/Interlocked.asm @@ -0,0 +1,3 @@ +;; TODO: Implement + +end diff --git a/src/coreclr/src/nativeaot/Runtime/i386/InteropThunksHelpers.S b/src/coreclr/src/nativeaot/Runtime/i386/InteropThunksHelpers.S new file mode 100644 index 0000000000000..876f2dfbcb80d --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/i386/InteropThunksHelpers.S @@ -0,0 +1,4 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// TODO: Implement diff --git a/src/coreclr/src/nativeaot/Runtime/i386/InteropThunksHelpers.asm b/src/coreclr/src/nativeaot/Runtime/i386/InteropThunksHelpers.asm new file mode 100644 index 0000000000000..6fee9002e86db --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/i386/InteropThunksHelpers.asm @@ -0,0 +1,101 @@ +;; Licensed to the .NET Foundation under one or more agreements. +;; The .NET Foundation licenses this file to you under the MIT license. + + +.586 +.model flat +option casemap:none +.code + +;; ----------------------------------------------------------------------------------------------------------- +;; standard macros +;; ----------------------------------------------------------------------------------------------------------- +LEAF_ENTRY macro Name, Section + Section segment para 'CODE' + public Name + Name proc +endm + +LEAF_END macro Name, Section + Name endp + Section ends +endm + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; DATA SECTIONS ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +__tls_array equ 2Ch ;; offsetof(TEB, ThreadLocalStoragePointer) + +POINTER_SIZE equ 04h + +;; TLS variables +_TLS SEGMENT ALIAS(".tls$") + ThunkParamSlot DD 00000000H +_TLS ENDS + +ASSUME fs : NOTHING +EXTRN __tls_index:DWORD + + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; Interop Thunks Helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; +;; RhCommonStub +;; +LEAF_ENTRY RhCommonStub, _TEXT + ;; There are arbitrary callers passing arguments with arbitrary signatures. + ;; Custom calling convention: + ;; eax: pointer to the current thunk's data block (data contains 2 pointer values: context + target pointers) + + ;; Save context data into the ThunkParamSlot thread-local variable + ;; A pointer to the delegate and function pointer for open static delegate should have been saved in the thunk's context cell during thunk allocation + + ;; make some scratch regs + push ecx + push edx + + mov ecx, [__tls_index] + mov edx, fs:[__tls_array] + mov ecx, [edx + ecx * POINTER_SIZE] + + ;; eax = address of context cell in thunk's data + ;; ecx = base address of TLS data + ;; edx = trashed + + ;; store thunk address in thread static + mov edx, [eax] + mov eax, [eax + POINTER_SIZE] ;; eax <- target slot data + mov [ecx + OFFSET ThunkParamSlot], edx ;; ThunkParamSlot <- context slot data + + ;; restore the regs we used + pop edx + pop ecx + + ;; jump to the target + jmp eax +LEAF_END RhCommonStub, _TEXT + + +;; +;; IntPtr RhGetCommonStubAddress() +;; +LEAF_ENTRY RhGetCommonStubAddress, _TEXT + lea eax, [RhCommonStub] + ret +LEAF_END RhGetCommonStubAddress, _TEXT + + +;; +;; IntPtr RhGetCurrentThunkContext() +;; +LEAF_ENTRY RhGetCurrentThunkContext, _TEXT + mov ecx, [__tls_index] + mov edx, fs:[__tls_array] + mov ecx, [edx + ecx * POINTER_SIZE] + mov eax, [ecx + OFFSET ThunkParamSlot] ;; eax <- ThunkParamSlot + ret +LEAF_END RhGetCurrentThunkContext, _TEXT + + +end diff --git a/src/coreclr/src/nativeaot/Runtime/i386/MemClrForGC.asm b/src/coreclr/src/nativeaot/Runtime/i386/MemClrForGC.asm new file mode 100644 index 0000000000000..cb23c1ce1625f --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/i386/MemClrForGC.asm @@ -0,0 +1,148 @@ +;; Licensed to the .NET Foundation under one or more agreements. +;; The .NET Foundation licenses this file to you under the MIT license. + + .586 + .xmm + .model flat + option casemap:none + + +EXTERN _IsProcessorFeaturePresent@4 : PROC + +PF_XMMI64_INSTRUCTIONS_AVAILABLE equ 10 + + .data +canUseSSE2 db 0 + + .code + +_memclr_for_gc@8 proc public + +; x86 version + +; we get the following parameters +; ecx = destination address +; edx = size to clear + + push ebx + push edi + + xor eax, eax + + ; load destination + mov edi,[esp+8+4] + + ; load size + mov ebx,[esp+8+8] + + ; check alignment of destination + test edi,3 + jnz alignDest +alignDone: + ; now destination is dword aligned + + ; compute number of bytes to clear non-temporally + ; we wish to clear the first 8k or so with rep stos, + ; anything above that non-temporally + + xor edx,edx + cmp ebx,8*1024 + jbe noNonTempClear + + ; can we use SSE2 instructions? + cmp canUseSSE2,0 + js noNonTempClear + jz computeCanUseSSE2 + +computeNonTempClear: + + ; compute the number of bytes above 8k + ; and round down to a multiple of 64 + mov edx,ebx + sub edx,8*1024 + and edx,not 63 + + ; compute remaining size to clear temporally + sub ebx,edx + +noNonTempClear: + ; do the temporal clear + mov ecx,ebx + shr ecx,2 + rep stosd + + ; do the non-temporal clear + test edx,edx + jne nonTempClearLoop + +nonTempClearDone: + + ; clear any remaining bytes + mov ecx,ebx + and ecx,3 + rep stosb + + pop edi + pop ebx + ret 8 + + ; this is the infrequent case, hence out of line +nonTempClearLoop: + movnti [edi+ 0],eax + movnti [edi+ 4],eax + movnti [edi+ 8],eax + movnti [edi+12],eax + + movnti [edi+16],eax + movnti [edi+20],eax + movnti [edi+24],eax + movnti [edi+28],eax + + movnti [edi+32],eax + movnti [edi+36],eax + movnti [edi+40],eax + movnti [edi+44],eax + + movnti [edi+48],eax + movnti [edi+52],eax + movnti [edi+56],eax + movnti [edi+60],eax + + add edi,64 + sub edx,64 + ja nonTempClearLoop + jmp nonTempClearDone + +alignDest: + test ebx,ebx + je alignDone +alignLoop: + mov [edi],al + add edi,1 + sub ebx,1 + jz alignDone + test edi,3 + jnz alignLoop + jmp alignDone + +computeCanUseSSE2: + ; we are not using the sse2 register set, + ; just sse2 instructions (movnti), + ; thus we just ask the OS about the usability of the instructions + ; OS bugs about saving/restoring registers like in early versions + ; of Vista etc. in the WoW shouldn't matter + + push PF_XMMI64_INSTRUCTIONS_AVAILABLE + call _IsProcessorFeaturePresent@4 + mov ecx,eax + xor eax,eax ; reset eax to 0 + test ecx,ecx + mov canUseSSE2,1 + jne computeNonTempClear + mov canUseSSE2,-1 + xor edx,edx + jmp noNonTempClear + +_memclr_for_gc@8 endp + + end diff --git a/src/coreclr/src/nativeaot/Runtime/i386/MiscStubs.S b/src/coreclr/src/nativeaot/Runtime/i386/MiscStubs.S new file mode 100644 index 0000000000000..6dee0d452b0a4 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/i386/MiscStubs.S @@ -0,0 +1,42 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +.intel_syntax noprefix +#include + +// *********************************************************************/ +// JIT_StackProbe +// +// Purpose: +// the helper will access ("probe") a word on each page of the stack +// starting with the page right beneath esp down to the one pointed to by eax. +// The procedure is needed to make sure that the "guard" page is pushed down below the allocated stack frame. +// The call to the helper will be emitted by JIT in the function prolog when large (larger than 0x3000 bytes) stack frame is required. +// +// NOTE: this helper will modify a value of esp and must establish the frame pointer. +// NOTE: On Linux we must advance the stack pointer as we probe - it is not allowed to access 65535 bytes below esp. +// +#define PAGE_SIZE 0x1000 +NESTED_ENTRY JIT_StackProbe, _TEXT, NoHandler + // On entry: + // eax - the lowest address of the stack frame being allocated (i.e. [InitialSp - FrameSize]) + // + // NOTE: this helper will probe at least one page below the one pointed by esp. + PROLOG_BEG + PROLOG_END + + and esp, -PAGE_SIZE // esp points to the **lowest address** on the last probed page + // This is done to make the loop end condition simpler. + +LOCAL_LABEL(ProbeLoop): + sub esp, PAGE_SIZE // esp points to the lowest address of the **next page** to probe + test [esp], eax // esp points to the lowest address on the **last probed** page + cmp esp, eax + jg LOCAL_LABEL(ProbeLoop) // if esp > eax, then we need to probe at least one more page. + + EPILOG_BEG + mov esp, ebp + EPILOG_END + ret + +NESTED_END JIT_StackProbe, _TEXT diff --git a/src/coreclr/src/nativeaot/Runtime/i386/MiscStubs.asm b/src/coreclr/src/nativeaot/Runtime/i386/MiscStubs.asm new file mode 100644 index 0000000000000..ed06082e96896 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/i386/MiscStubs.asm @@ -0,0 +1,301 @@ +;; Licensed to the .NET Foundation under one or more agreements. +;; The .NET Foundation licenses this file to you under the MIT license. + + .586 + .model flat + option casemap:none + .code + +include AsmMacros.inc + +EXTERN @GetClasslibCCtorCheck@4 : PROC +EXTERN _memcpy : PROC +EXTERN _memcpyGCRefs : PROC +EXTERN _memcpyGCRefsWithWriteBarrier : PROC +EXTERN _memcpyAnyWithWriteBarrier : PROC + +;; +;; Checks whether the static class constructor for the type indicated by the context structure has been +;; executed yet. If not the classlib is called via their CheckStaticClassConstruction callback which will +;; execute the cctor and update the context to record this fact. +;; +;; Input: +;; eax : Address of StaticClassConstructionContext structure +;; +;; Output: +;; All volatile registers and the condition codes may be trashed. +;; +FASTCALL_FUNC RhpCheckCctor, 4 + + ;; Check the m_initialized field of the context. The cctor has been run only if this equals 1 (the + ;; initial state is 0 and the remaining values are reserved for classlib use). This check is + ;; unsynchronized; if we go down the slow path and call the classlib then it is responsible for + ;; synchronizing with other threads and re-checking the value. + cmp dword ptr [eax + OFFSETOF__StaticClassConstructionContext__m_initialized], 1 + jne RhpCheckCctor__SlowPath + ret + +RhpCheckCctor__SlowPath: + mov edx, eax ; RhpCheckCctor2 takes the static class construction context pointer in the edx register + jmp @RhpCheckCctor2@4 +FASTCALL_ENDFUNC + +;; +;; Checks whether the static class constructor for the type indicated by the context structure has been +;; executed yet. If not the classlib is called via their CheckStaticClassConstruction callback which will +;; execute the cctor and update the context to record this fact. +;; +;; Input: +;; eax : Value that must be preserved in this register across the cctor check. +;; edx : Address of StaticClassConstructionContext structure +;; +;; Output: +;; All volatile registers other than eax may be trashed and the condition codes may also be trashed. +;; +FASTCALL_FUNC RhpCheckCctor2, 4 + + ;; Check the m_initialized field of the context. The cctor has been run only if this equals 1 (the + ;; initial state is 0 and the remaining values are reserved for classlib use). This check is + ;; unsynchronized; if we go down the slow path and call the classlib then it is responsible for + ;; synchronizing with other threads and re-checking the value. + cmp dword ptr [edx + OFFSETOF__StaticClassConstructionContext__m_initialized], 1 + jne RhpCheckCctor2__SlowPath + ret + +;; Input: +;; eax : Value that must be preserved in this register across the cctor check. +;; edx : Address of StaticClassConstructionContext structure +;; +;; Output: +;; All volatile registers other than eax may be trashed and the condition codes may also be trashed. +;; +RhpCheckCctor2__SlowPath: + ;; Call a C++ helper to retrieve the address of the classlib callback. We need to preserve the context + ;; structure address in eax since it's needed for the actual call. + push ebx + push esi + mov ebx, edx ; save cctor context pointer + mov esi, eax ; save preserved return value + + ;; The caller's return address is passed as the argument to the helper; it's an address in the module + ;; and is used by the helper to locate the classlib. + mov ecx, [esp + 8] ; + 8 to skip past the saved ebx and esi + + call @GetClasslibCCtorCheck@4 + + ;; Eax now contains the address of the classlib method to call. The single argument is the context + ;; structure address currently in ebx. Clean up and tail call to the classlib callback so we're not on + ;; the stack should a GC occur (so we don't need to worry about transition frames). + mov edx, ebx + mov ecx, esi + pop esi + pop ebx + ;; Tail-call the classlib cctor check function. Note that the incoming eax value is moved to ecx + ;; and the classlib cctor check function is required to return that value, so that eax is preserved + ;; across a RhpCheckCctor call. + jmp eax + +FASTCALL_ENDFUNC + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; void* __cdecl RhpCopyMultibyteNoGCRefs(void*, void*, size_t) +;; +;; The purpose of this wrapper is to hoist the potential null reference exceptions of copying memory up to a place where +;; the stack unwinder and exception dispatch can properly transform the exception into a managed exception and dispatch +;; it to managed code. +;; +_RhpCopyMultibyteNoGCRefs PROC PUBLIC + + ; #locals, num_params, prolog bytes, #regs saved, use ebp, frame type (0 == FRAME_FPO) + .FPO( 0, 3, 0, 0, 0, 0) + + ; [esp + 0] return address + ; [esp + 4] dest + ; [esp + 8] src + ; [esp + c] count + + cmp dword ptr [esp + 0Ch], 0 ; check for a zero-length copy + jz NothingToCopy + + mov ecx, [esp + 4] ; ecx <- dest + mov edx, [esp + 8] ; edx <- src + + ; Now check the dest and src pointers. If they AV, the EH subsystem will recognize the address of the AV, + ; unwind the frame, and fixup the stack to make it look like the (managed) caller AV'ed, which will be + ; translated to a managed exception as usual. +ALTERNATE_ENTRY RhpCopyMultibyteNoGCRefsDestAVLocation + cmp byte ptr [ecx], 0 +ALTERNATE_ENTRY RhpCopyMultibyteNoGCRefsSrcAVLocation + cmp byte ptr [edx], 0 + + ; tail-call to plain-old-memcpy + jmp _memcpy + +NothingToCopy: + mov eax, [esp + 4] ; return dest + ret + +_RhpCopyMultibyteNoGCRefs ENDP + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; void* __cdecl RhpCopyMultibyte(void*, void*, size_t) +;; +;; The purpose of this wrapper is to hoist the potential null reference exceptions of copying memory up to a place where +;; the stack unwinder and exception dispatch can properly transform the exception into a managed exception and dispatch +;; it to managed code. +;; +_RhpCopyMultibyte PROC PUBLIC + + ; #locals, num_params, prolog bytes, #regs saved, use ebp, frame type (0 == FRAME_FPO) + .FPO( 0, 3, 0, 0, 0, 0) + + ; [esp + 0] return address + ; [esp + 4] dest + ; [esp + 8] src + ; [esp + c] count + + cmp dword ptr [esp + 0Ch], 0 ; check for a zero-length copy + jz NothingToCopy + + mov ecx, [esp + 4] ; ecx <- dest + mov edx, [esp + 8] ; edx <- src + + ; Now check the dest and src pointers. If they AV, the EH subsystem will recognize the address of the AV, + ; unwind the frame, and fixup the stack to make it look like the (managed) caller AV'ed, which will be + ; translated to a managed exception as usual. +ALTERNATE_ENTRY RhpCopyMultibyteDestAVLocation + cmp byte ptr [ecx], 0 +ALTERNATE_ENTRY RhpCopyMultibyteSrcAVLocation + cmp byte ptr [edx], 0 + + ; tail-call to the GC-safe memcpy implementation + ; NOTE: this is also a __cdecl function + jmp _memcpyGCRefs + +NothingToCopy: + mov eax, [esp + 4] ; return dest + ret + +_RhpCopyMultibyte ENDP + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; void* __cdecl RhpCopyMultibyteWithWriteBarrier(void*, void*, size_t) +;; +;; The purpose of this wrapper is to hoist the potential null reference exceptions of copying memory up to a place where +;; the stack unwinder and exception dispatch can properly transform the exception into a managed exception and dispatch +;; it to managed code. +;; Runs a card table update via RhpBulkWriteBarrier after the copy +;; +_RhpCopyMultibyteWithWriteBarrier PROC PUBLIC + + ; #locals, num_params, prolog bytes, #regs saved, use ebp, frame type (0 == FRAME_FPO) + .FPO( 0, 3, 0, 0, 0, 0) + + ; [esp + 0] return address + ; [esp + 4] dest + ; [esp + 8] src + ; [esp + c] count + + cmp dword ptr [esp + 0Ch], 0 ; check for a zero-length copy + jz NothingToCopy + + mov ecx, [esp + 4] ; ecx <- dest + mov edx, [esp + 8] ; edx <- src + + ; Now check the dest and src pointers. If they AV, the EH subsystem will recognize the address of the AV, + ; unwind the frame, and fixup the stack to make it look like the (managed) caller AV'ed, which will be + ; translated to a managed exception as usual. +ALTERNATE_ENTRY RhpCopyMultibyteWithWriteBarrierDestAVLocation + cmp byte ptr [ecx], 0 +ALTERNATE_ENTRY RhpCopyMultibyteWithWriteBarrierSrcAVLocation + cmp byte ptr [edx], 0 + + ; tail-call to the GC-safe memcpy implementation + ; NOTE: this is also a __cdecl function + jmp _memcpyGCRefsWithWriteBarrier + +NothingToCopy: + mov eax, [esp + 4] ; return dest + ret + +_RhpCopyMultibyteWithWriteBarrier ENDP + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; void* __cdecl RhpCopyAnyWithWriteBarrier(void*, void*, size_t) +;; +;; The purpose of this wrapper is to hoist the potential null reference exceptions of copying memory up to a place where +;; the stack unwinder and exception dispatch can properly transform the exception into a managed exception and dispatch +;; it to managed code. +;; Runs a card table update via RhpBulkWriteBarrier after the copy if it contained GC pointers +;; +_RhpCopyAnyWithWriteBarrier PROC PUBLIC + + ; #locals, num_params, prolog bytes, #regs saved, use ebp, frame type (0 == FRAME_FPO) + .FPO( 0, 3, 0, 0, 0, 0) + + ; [esp + 0] return address + ; [esp + 4] dest + ; [esp + 8] src + ; [esp + c] count + + cmp dword ptr [esp + 0Ch], 0 ; check for a zero-length copy + jz NothingToCopy + + mov ecx, [esp + 4] ; ecx <- dest + mov edx, [esp + 8] ; edx <- src + + ; Now check the dest and src pointers. If they AV, the EH subsystem will recognize the address of the AV, + ; unwind the frame, and fixup the stack to make it look like the (managed) caller AV'ed, which will be + ; translated to a managed exception as usual. +ALTERNATE_ENTRY RhpCopyAnyWithWriteBarrierDestAVLocation + cmp byte ptr [ecx], 0 +ALTERNATE_ENTRY RhpCopyAnyWithWriteBarrierSrcAVLocation + cmp byte ptr [edx], 0 + + ; tail-call to the GC-safe memcpy implementation + ; NOTE: this is also a __cdecl function + jmp _memcpyAnyWithWriteBarrier + +NothingToCopy: + mov eax, [esp + 4] ; return dest + ret + +_RhpCopyAnyWithWriteBarrier ENDP + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; The following helper will access ("probe") a word on each page of the stack +; starting with the page right beneath esp down to the one pointed to by eax. +; The procedure is needed to make sure that the "guard" page is pushed down below the allocated stack frame. +; The call to the helper will be emitted by JIT in the function prolog when large (larger than 0x3000 bytes) stack frame is required. +; +; NOTE: this helper will modify a value of esp and must establish the frame pointer. +PAGE_SIZE equ 1000h + +_RhpStackProbe PROC public + ; On entry: + ; eax - the lowest address of the stack frame being allocated (i.e. [InitialSp - FrameSize]) + ; + ; NOTE: this helper will probe at least one page below the one pointed by esp. + push ebp + mov ebp, esp + + and esp, -PAGE_SIZE ; esp points to the **lowest address** on the last probed page + ; This is done to make the loop end condition simpler. +ProbeLoop: + sub esp, PAGE_SIZE ; esp points to the lowest address of the **next page** to probe + test [esp], eax ; esp points to the lowest address on the **last probed** page + cmp esp, eax + jg ProbeLoop ; if esp > eax, then we need to probe at least one more page. + + mov esp, ebp + pop ebp + ret + +_RhpStackProbe ENDP + +end diff --git a/src/coreclr/src/nativeaot/Runtime/i386/PInvoke.S b/src/coreclr/src/nativeaot/Runtime/i386/PInvoke.S new file mode 100644 index 0000000000000..876f2dfbcb80d --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/i386/PInvoke.S @@ -0,0 +1,4 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// TODO: Implement diff --git a/src/coreclr/src/nativeaot/Runtime/i386/PInvoke.asm b/src/coreclr/src/nativeaot/Runtime/i386/PInvoke.asm new file mode 100644 index 0000000000000..947d3c1f26277 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/i386/PInvoke.asm @@ -0,0 +1,219 @@ +;; Licensed to the .NET Foundation under one or more agreements. +;; The .NET Foundation licenses this file to you under the MIT license. + + .586 + .model flat + option casemap:none + .code + + +include AsmMacros.inc + +extern RhpReversePInvokeBadTransition : proc + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; RhpWaitForSuspend -- rare path for RhpPInvoke and RhpReversePInvokeReturn +;; +;; +;; INPUT: none +;; +;; TRASHES: none +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +_RhpWaitForSuspend proc public + push ebp + mov ebp, esp + push eax + push ecx + push edx + + call RhpWaitForSuspend2 + + pop edx + pop ecx + pop eax + pop ebp + ret +_RhpWaitForSuspend endp + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; RhpWaitForGCNoAbort +;; +;; +;; INPUT: ECX: transition frame +;; +;; OUTPUT: +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +_RhpWaitForGCNoAbort proc public + push ebp + mov ebp, esp + push eax + push edx + push ebx + push esi + + mov esi, [ecx + OFFSETOF__PInvokeTransitionFrame__m_pThread] + + test dword ptr [esi + OFFSETOF__Thread__m_ThreadStateFlags], TSF_DoNotTriggerGc + jnz Done + + ; passing transition frame pointer in ecx + call RhpWaitForGC2 + +Done: + pop esi + pop ebx + pop edx + pop eax + pop ebp + ret +_RhpWaitForGCNoAbort endp + +RhpThrowHwEx equ @RhpThrowHwEx@0 +EXTERN RhpThrowHwEx : PROC + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; RhpWaitForGC +;; +;; +;; INPUT: ECX: transition frame +;; +;; OUTPUT: +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +_RhpWaitForGC proc public + push ebp + mov ebp, esp + push ebx + + mov ebx, ecx + test [RhpTrapThreads], TrapThreadsFlags_TrapThreads + jz NoWait + + call _RhpWaitForGCNoAbort +NoWait: + test [RhpTrapThreads], TrapThreadsFlags_AbortInProgress + jz Done + test dword ptr [ebx + OFFSETOF__PInvokeTransitionFrame__m_Flags], PTFF_THREAD_ABORT + jz Done + + mov ecx, STATUS_REDHAWK_THREAD_ABORT + pop ebx + pop ebp + pop edx ; return address as exception RIP + jmp RhpThrowHwEx ; Throw the ThreadAbortException as a special kind of hardware exception +Done: + pop ebx + pop ebp + ret +_RhpWaitForGC endp + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; RhpReversePInvoke +;; +;; IN: EAX: address of reverse pinvoke frame +;; 0: save slot for previous M->U transition frame +;; 4: save slot for thread pointer to avoid re-calc in epilog sequence +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +FASTCALL_FUNC RhpReversePInvoke, 0 + push ecx ; save arg regs -- we could omit this if we knew the calling convention wasn't fastcall. + push edx ; ... + + ;; edx = GetThread(), TRASHES ecx + INLINE_GETTHREAD edx, ecx + mov [eax + 4], edx ; save thread pointer for RhpReversePInvokeReturn + + ; edx = thread + ; eax = prev save slot + ; ecx = scratch + + test dword ptr [edx + OFFSETOF__Thread__m_ThreadStateFlags], TSF_Attached + jz AttachThread + +ThreadAttached: + ;; + ;; Check for the correct mode. This is accessible via various odd things that we cannot completely + ;; prevent such as : + ;; 1) Registering a reverse pinvoke entrypoint as a vectored exception handler + ;; 2) Performing a managed delegate invoke on a reverse pinvoke delegate. + ;; + cmp dword ptr [edx + OFFSETOF__Thread__m_pTransitionFrame], 0 + je CheckBadTransition + + ; Save previous TransitionFrame prior to making the mode transition so that it is always valid + ; whenever we might attempt to hijack this thread. + mov ecx, [edx + OFFSETOF__Thread__m_pTransitionFrame] + mov [eax], ecx + +ReverseRetry: + mov dword ptr [edx + OFFSETOF__Thread__m_pTransitionFrame], 0 + test [RhpTrapThreads], TrapThreadsFlags_TrapThreads + jnz ReverseTrapReturningThread + +AllDone: + pop edx ; restore arg reg + pop ecx ; restore arg reg + ret + +CheckBadTransition: + ;; Allow 'bad transitions' in when the TSF_DoNotTriggerGc mode is set. This allows us to have + ;; [UnmanagedCallersOnly] methods that are called via the "restricted GC callouts" as well as from native, + ;; which is necessary because the methods are CCW vtable methods on interfaces passed to native. + test dword ptr [edx + OFFSETOF__Thread__m_ThreadStateFlags], TSF_DoNotTriggerGc + jz BadTransition + + ;; zero-out our 'previous transition frame' save slot + mov dword ptr [eax], 0 + + ;; nothing more to do + jmp AllDone + +ReverseTrapReturningThread: + ;; put the previous frame back (sets us back to preemptive mode) + mov ecx, [eax] + mov [edx + OFFSETOF__Thread__m_pTransitionFrame], ecx + +AttachThread: + mov ecx, eax ; arg <- address of reverse pinvoke frame + call RhpReversePInvokeAttachOrTrapThread2 + jmp AllDone + +BadTransition: + pop edx + pop ecx + mov ecx, dword ptr [esp] ; arg <- return address + jmp RhpReversePInvokeBadTransition +FASTCALL_ENDFUNC + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; RhpReversePInvokeReturn +;; +;; IN: ECX: address of reverse pinvoke frame +;; 0: save slot for previous M->U transition frame +;; 4: save slot for thread pointer to avoid re-calc in epilog sequence +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +FASTCALL_FUNC RhpReversePInvokeReturn, 0 + push edx ; save return value + + mov edx, [ecx + 4] ; get Thread pointer + mov ecx, [ecx + 0] ; get previous M->U transition frame + + mov [edx + OFFSETOF__Thread__m_pTransitionFrame], ecx + test [RhpTrapThreads], TrapThreadsFlags_TrapThreads + pop edx ; restore return value + jnz _RhpWaitForSuspend + ret + +FASTCALL_ENDFUNC + + + end diff --git a/src/coreclr/src/nativeaot/Runtime/i386/StubDispatch.S b/src/coreclr/src/nativeaot/Runtime/i386/StubDispatch.S new file mode 100644 index 0000000000000..876f2dfbcb80d --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/i386/StubDispatch.S @@ -0,0 +1,4 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// TODO: Implement diff --git a/src/coreclr/src/nativeaot/Runtime/i386/StubDispatch.asm b/src/coreclr/src/nativeaot/Runtime/i386/StubDispatch.asm new file mode 100644 index 0000000000000..8b1ba538eb169 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/i386/StubDispatch.asm @@ -0,0 +1,133 @@ +;; Licensed to the .NET Foundation under one or more agreements. +;; The .NET Foundation licenses this file to you under the MIT license. + + .586 + .model flat + option casemap:none + .code + + +include AsmMacros.inc + + +ifdef FEATURE_CACHED_INTERFACE_DISPATCH + +EXTERN RhpCidResolve : PROC +EXTERN _RhpUniversalTransition_DebugStepTailCall@0 : PROC + + +;; Macro that generates code to check a single cache entry. +CHECK_CACHE_ENTRY macro entry +NextLabel textequ @CatStr( Attempt, %entry+1 ) + cmp ebx, [eax + (OFFSETOF__InterfaceDispatchCache__m_rgEntries + (entry * 8))] + jne @F + pop ebx + jmp dword ptr [eax + (OFFSETOF__InterfaceDispatchCache__m_rgEntries + (entry * 8) + 4)] +@@: +endm + + +;; Macro that generates a stub consuming a cache with the given number of entries. +DEFINE_INTERFACE_DISPATCH_STUB macro entries + +StubName textequ @CatStr( _RhpInterfaceDispatch, entries ) + + StubName proc public + + ;; Check the instance here to catch null references. We're going to touch it again below (to cache + ;; the EEType pointer), but that's after we've pushed ebx below, and taking an A/V there will + ;; mess up the stack trace for debugging. We also don't have a spare scratch register (eax holds + ;; the cache pointer and the push of ebx below is precisely so we can access a second register + ;; to hold the EEType pointer). + test ecx, ecx + je RhpInterfaceDispatchNullReference + + ;; eax currently contains the indirection cell address. We need to update it to point to the cache + ;; block instead. + mov eax, [eax + OFFSETOF__InterfaceDispatchCell__m_pCache] + + ;; Cache pointer is already loaded in the only scratch register we have so far, eax. We need + ;; another scratch register to hold the instance type so save the value of ebx and use that. + push ebx + + ;; Load the EEType from the object instance in ebx. + mov ebx, [ecx] + +CurrentEntry = 0 + while CurrentEntry lt entries + CHECK_CACHE_ENTRY %CurrentEntry +CurrentEntry = CurrentEntry + 1 + endm + + ;; eax currently contains the cache block. We need to point it back to the + ;; indirection cell using the back pointer in the cache block + mov eax, [eax + OFFSETOF__InterfaceDispatchCache__m_pCell] + pop ebx + jmp RhpInterfaceDispatchSlow + + StubName endp + + endm ;; DEFINE_INTERFACE_DISPATCH_STUB + + +;; Define all the stub routines we currently need. +DEFINE_INTERFACE_DISPATCH_STUB 1 +DEFINE_INTERFACE_DISPATCH_STUB 2 +DEFINE_INTERFACE_DISPATCH_STUB 4 +DEFINE_INTERFACE_DISPATCH_STUB 8 +DEFINE_INTERFACE_DISPATCH_STUB 16 +DEFINE_INTERFACE_DISPATCH_STUB 32 +DEFINE_INTERFACE_DISPATCH_STUB 64 + +;; Shared out of line helper used on cache misses. +RhpInterfaceDispatchSlow proc +;; eax points at InterfaceDispatchCell + + ;; Setup call to Universal Transition thunk + push ebp + mov ebp, esp + push eax ; First argument (Interface Dispatch Cell) + lea eax, [RhpCidResolve] + push eax ; Second argument (RhpCidResolve) + + ;; Jump to Universal Transition + jmp _RhpUniversalTransition_DebugStepTailCall@0 +RhpInterfaceDispatchSlow endp + +;; Out of line helper used when we try to interface dispatch on a null pointer. Sets up the stack so the +;; debugger gives a reasonable stack trace. +RhpInterfaceDispatchNullReference proc public + push ebp + mov ebp, esp + mov ebx, [ecx] ;; This should A/V + int 3 +RhpInterfaceDispatchNullReference endp + +;; Stub dispatch routine for dispatch to a vtable slot +_RhpVTableOffsetDispatch proc public + ;; eax currently contains the indirection cell address. We need to update it to point to the vtable offset (which is in the m_pCache field) + mov eax, [eax + OFFSETOF__InterfaceDispatchCell__m_pCache] + + ;; add the vtable offset to the EEType pointer + add eax, [ecx] + + ;; Load the target address of the vtable into eax + mov eax, [eax] + + ;; tail-jump to the target + jmp eax +_RhpVTableOffsetDispatch endp + + +;; Initial dispatch on an interface when we don't have a cache yet. +_RhpInitialInterfaceDispatch proc public + ALTERNATE_ENTRY RhpInitialDynamicInterfaceDispatch + + jmp RhpInterfaceDispatchSlow + +_RhpInitialInterfaceDispatch endp + + +endif ;; FEATURE_CACHED_INTERFACE_DISPATCH + +end diff --git a/src/coreclr/src/nativeaot/Runtime/i386/ThunkPoolThunks.asm b/src/coreclr/src/nativeaot/Runtime/i386/ThunkPoolThunks.asm new file mode 100644 index 0000000000000..868f48837597f --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/i386/ThunkPoolThunks.asm @@ -0,0 +1,297 @@ +;; Licensed to the .NET Foundation under one or more agreements. +;; The .NET Foundation licenses this file to you under the MIT license. + +.586 +.model flat +option casemap:none +.code + +include AsmMacros.inc + +;; ----------------------------------------------------------------------------------------------------------- +;; standard macros +;; ----------------------------------------------------------------------------------------------------------- +LEAF_ENTRY macro Name, Section + Section segment para 'CODE' + public Name + Name proc +endm + +NAMED_LEAF_ENTRY macro Name, Section, SectionAlias + Section segment para alias(SectionAlias) 'CODE' + public Name + Name proc +endm + +LEAF_END macro Name, Section + Name endp + Section ends +endm + +NAMED_READONLY_DATA_SECTION macro Section, SectionAlias + Section segment para alias(SectionAlias) read 'DATA' + DD 0 + Section ends +endm + +NAMED_READWRITE_DATA_SECTION macro Section, SectionAlias + Section segment para alias(SectionAlias) read write 'DATA' + DD 0 + Section ends +endm + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; STUBS & DATA SECTIONS ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +THUNK_CODESIZE equ 20h ;; 5-byte call, 1 byte pop, 6-byte lea, 6-byte jmp, 14 bytes of padding +THUNK_DATASIZE equ 08h ;; 2 dwords + +THUNK_POOL_NUM_THUNKS_PER_PAGE equ 078h ;; 120 thunks per page + +PAGE_SIZE equ 01000h ;; 4K +POINTER_SIZE equ 04h + + +GET_CURRENT_IP macro + ALIGN 10h ;; make sure we align to 16-byte boundary for CFG table + call @F + @@: pop eax +endm + +LOAD_DATA_ADDRESS macro groupIndex, index + ;; start : eax points to current instruction of the current thunk + ;; set eax to begining of data page : eax <- [eax - (size of the call instruction + (THUNK_CODESIZE * current thunk's index)) + PAGE_SIZE] + ;; fix offset of the data : eax <- eax + (THUNK_DATASIZE * current thunk's index) + lea eax,[eax - (5 + groupIndex * THUNK_CODESIZE * 10 + THUNK_CODESIZE * index) + PAGE_SIZE + (groupIndex * THUNK_DATASIZE * 10 + THUNK_DATASIZE * index)] +endm + +JUMP_TO_COMMON macro groupIndex, index + ;; start : eax points to current thunk's data block + ;; re-point eax to begining of data page : eax <- [eax - (THUNK_DATASIZE * current thunk's index)] + ;; jump to the location pointed at by the last dword in the data page : jump [eax + PAGE_SIZE - POINTER_SIZE] + jmp dword ptr[eax - (groupIndex * THUNK_DATASIZE * 10 + THUNK_DATASIZE * index) + PAGE_SIZE - POINTER_SIZE] +endm + +TenThunks macro groupIndex + ;; Each thunk will load the address of its corresponding data (from the page that immediately follows) + ;; and call a common stub. The address of the common stub is setup by the caller (last dword + ;; in the thunks data section) depending on the 'kind' of thunks needed (interop, fat function pointers, etc...) + + ;; Each data block used by a thunk consists of two dword values: + ;; - Context: some value given to the thunk as context (passed in eax). Example for fat-fptrs: context = generic dictionary + ;; - Target : target code that the thunk eventually jumps to. + + GET_CURRENT_IP + LOAD_DATA_ADDRESS groupIndex,0 + JUMP_TO_COMMON groupIndex,0 + + GET_CURRENT_IP + LOAD_DATA_ADDRESS groupIndex,1 + JUMP_TO_COMMON groupIndex,1 + + GET_CURRENT_IP + LOAD_DATA_ADDRESS groupIndex,2 + JUMP_TO_COMMON groupIndex,2 + + GET_CURRENT_IP + LOAD_DATA_ADDRESS groupIndex,3 + JUMP_TO_COMMON groupIndex,3 + + GET_CURRENT_IP + LOAD_DATA_ADDRESS groupIndex,4 + JUMP_TO_COMMON groupIndex,4 + + GET_CURRENT_IP + LOAD_DATA_ADDRESS groupIndex,5 + JUMP_TO_COMMON groupIndex,5 + + GET_CURRENT_IP + LOAD_DATA_ADDRESS groupIndex,6 + JUMP_TO_COMMON groupIndex,6 + + GET_CURRENT_IP + LOAD_DATA_ADDRESS groupIndex,7 + JUMP_TO_COMMON groupIndex,7 + + GET_CURRENT_IP + LOAD_DATA_ADDRESS groupIndex,8 + JUMP_TO_COMMON groupIndex,8 + + GET_CURRENT_IP + LOAD_DATA_ADDRESS groupIndex,9 + JUMP_TO_COMMON groupIndex,9 +endm + +THUNKS_PAGE_BLOCK macro + TenThunks 0 + TenThunks 1 + TenThunks 2 + TenThunks 3 + TenThunks 4 + TenThunks 5 + TenThunks 6 + TenThunks 7 + TenThunks 8 + TenThunks 9 + TenThunks 10 + TenThunks 11 +endm + +;; +;; The first thunks section should be 64K aligned because it can get +;; mapped multiple times in memory, and mapping works on allocation +;; granularity boundaries (we don't want to map more than what we need) +;; +;; The easiest way to do so is by having the thunks section at the +;; first 64K aligned virtual address in the binary. We provide a section +;; layout file to the linker to tell it how to layout the thunks sections +;; that we care about. (ndp\rh\src\runtime\DLLs\app\mrt100_app_sectionlayout.txt) +;; +;; The PE spec says images cannot have gaps between sections (other +;; than what is required by the section alignment value in the header), +;; therefore we need a couple of padding data sections (otherwise the +;; OS will not load the image). +;; + +NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment0, ".pad0" +NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment1, ".pad1" +NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment2, ".pad2" +NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment3, ".pad3" +NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment4, ".pad4" +NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment5, ".pad5" +NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment6, ".pad6" +NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment7, ".pad7" +NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment8, ".pad8" +NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment9, ".pad9" +NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment10, ".pad10" +NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment11, ".pad11" +NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment12, ".pad12" +NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment13, ".pad13" +NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment14, ".pad14" + +;; +;; Thunk Stubs +;; NOTE: Keep number of blocks in sync with macro/constant named 'NUM_THUNK_BLOCKS' in: +;; - ndp\FxCore\src\System.Private.CoreLib\System\Runtime\InteropServices\ThunkPool.cs +;; - ndp\rh\src\tools\rhbind\zapimage.h +;; +NAMED_LEAF_ENTRY ThunkPool, TKS0, ".tks0" + THUNKS_PAGE_BLOCK +LEAF_END ThunkPool, TKS0 + +NAMED_READWRITE_DATA_SECTION ThunkData0, ".tkd0" + +NAMED_LEAF_ENTRY ThunkPool1, TKS1, ".tks1" + THUNKS_PAGE_BLOCK +LEAF_END ThunkPool1, TKS1 + +NAMED_READWRITE_DATA_SECTION ThunkData1, ".tkd1" + +NAMED_LEAF_ENTRY ThunkPool2, TKS2, ".tks2" + THUNKS_PAGE_BLOCK +LEAF_END ThunkPool2, TKS2 + +NAMED_READWRITE_DATA_SECTION ThunkData2, ".tkd2" + +NAMED_LEAF_ENTRY ThunkPool3, TKS3, ".tks3" + THUNKS_PAGE_BLOCK +LEAF_END ThunkPool3, TKS3 + +NAMED_READWRITE_DATA_SECTION ThunkData3, ".tkd3" + +NAMED_LEAF_ENTRY ThunkPool4, TKS4, ".tks4" + THUNKS_PAGE_BLOCK +LEAF_END ThunkPool4, TKS4 + +NAMED_READWRITE_DATA_SECTION ThunkData4, ".tkd4" + +NAMED_LEAF_ENTRY ThunkPool5, TKS5, ".tks5" + THUNKS_PAGE_BLOCK +LEAF_END ThunkPool5, TKS5 + +NAMED_READWRITE_DATA_SECTION ThunkData5, ".tkd5" + +NAMED_LEAF_ENTRY ThunkPool6, TKS6, ".tks6" + THUNKS_PAGE_BLOCK +LEAF_END ThunkPool6, TKS6 + +NAMED_READWRITE_DATA_SECTION ThunkData6, ".tkd6" + +NAMED_LEAF_ENTRY ThunkPool7, TKS7, ".tks7" + THUNKS_PAGE_BLOCK +LEAF_END ThunkPool7, TKS7 + +NAMED_READWRITE_DATA_SECTION ThunkData7, ".tkd7" + + +;; +;; IntPtr RhpGetThunksBase() +;; +FASTCALL_FUNC RhpGetThunksBase, 0 + ;; Return the address of the first thunk pool to the caller (this is really the base address) + lea eax, [ThunkPool] + ret +FASTCALL_ENDFUNC + + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; General Helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; +;; int RhpGetNumThunksPerBlock() +;; +FASTCALL_FUNC RhpGetNumThunksPerBlock, 0 + mov eax, THUNK_POOL_NUM_THUNKS_PER_PAGE + ret +FASTCALL_ENDFUNC + +;; +;; int RhpGetThunkSize() +;; +FASTCALL_FUNC RhpGetThunkSize, 0 + mov eax, THUNK_CODESIZE + ret +FASTCALL_ENDFUNC + +;; +;; int RhpGetNumThunkBlocksPerMapping() +;; +FASTCALL_FUNC RhpGetNumThunkBlocksPerMapping, 0 + mov eax, 8 + ret +FASTCALL_ENDFUNC + +;; +;; int RhpGetThunkBlockSize +;; +FASTCALL_FUNC RhpGetThunkBlockSize, 0 + mov eax, PAGE_SIZE * 2 + ret +FASTCALL_ENDFUNC + +;; +;; IntPtr RhpGetThunkDataBlockAddress(IntPtr thunkStubAddress) +;; +FASTCALL_FUNC RhpGetThunkDataBlockAddress, 4 + mov eax, ecx + mov ecx, PAGE_SIZE - 1 + not ecx + and eax, ecx + add eax, PAGE_SIZE + ret +FASTCALL_ENDFUNC + +;; +;; IntPtr RhpGetThunkStubsBlockAddress(IntPtr thunkDataAddress) +;; +FASTCALL_FUNC RhpGetThunkStubsBlockAddress, 4 + mov eax, ecx + mov ecx, PAGE_SIZE - 1 + not ecx + and eax, ecx + sub eax, PAGE_SIZE + ret +FASTCALL_ENDFUNC + + +end diff --git a/src/coreclr/src/nativeaot/Runtime/i386/UniversalTransition.S b/src/coreclr/src/nativeaot/Runtime/i386/UniversalTransition.S new file mode 100644 index 0000000000000..876f2dfbcb80d --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/i386/UniversalTransition.S @@ -0,0 +1,4 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// TODO: Implement diff --git a/src/coreclr/src/nativeaot/Runtime/i386/UniversalTransition.asm b/src/coreclr/src/nativeaot/Runtime/i386/UniversalTransition.asm new file mode 100644 index 0000000000000..b425c7d17d80c --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/i386/UniversalTransition.asm @@ -0,0 +1,101 @@ +;; Licensed to the .NET Foundation under one or more agreements. +;; The .NET Foundation licenses this file to you under the MIT license. + + .586 + .model flat + option casemap:none + .code + +include AsmMacros.inc + +ifdef FEATURE_DYNAMIC_CODE + +;; +;; Defines an assembly thunk used to make a transition from managed code to a callee, +;; then (based on the return value from the callee), either returning or jumping to +;; a new location while preserving the input arguments. The usage of this thunk also +;; ensures arguments passed are properly reported. +;; +;; TODO: This code currently only tailcalls, and does not return. +;; +;; Inputs: +;; ecx, edx, stack space three pops down: arguments as normal +;; first register sized fields on the stack is the location of the target code +;; the UniversalTransitionThunk will call +;; second register sized field on the stack is the parameter to the target function +;; followed by the return address of the whole method. (This method cannot be called +;; via a call instruction, it must be jumped to.) The fake entrypoint is in place to +;; convince the stack walker this is a normal framed function. +;; +;; NOTE! FOR CORRECTNESS THIS FUNCTION REQUIRES THAT ALL NON-LEAF MANAGED FUNCTIONS HAVE +;; FRAME POINTERS, OR THE STACK WALKER CAN'T STACKWALK OUT OF HERE +;; + +; +; Frame layout is: +; +; {StackPassedArgs} ChildSP+018 CallerSP+000 +; {CallerRetaddr} ChildSP+014 CallerSP-004 +; {CallerEBP} ChildSP+010 CallerSP-008 +; {ReturnBlock (0x8 bytes)} ChildSP+008 CallerSP-010 +; -- On input (i.e., when control jumps to RhpUniversalTransition), the low 4 bytes of +; the ReturnBlock area holds the address of the callee and the high 4 bytes holds the +; extra argument to pass to the callee. +; {IntArgRegs (edx,ecx) (0x8 bytes)} ChildSP+000 CallerSP-018 +; {CalleeRetaddr} ChildSP-004 CallerSP-01c +; +; NOTE: If the frame layout ever changes, the C++ UniversalTransitionStackFrame structure +; must be updated as well. +; +; NOTE: The callee receives a pointer to the base of the pushed IntArgRegs, and the callee +; has knowledge of the exact layout of the entire frame. +; +; NOTE: The stack walker guarantees that conservative GC reporting will be applied to +; everything between the base of the IntArgRegs and the top of the StackPassedArgs. +; + +UNIVERSAL_TRANSITION macro FunctionName + +FASTCALL_FUNC Rhp&FunctionName&_FAKE_ENTRY, 0 + ; Set up an ebp frame + push ebp + mov ebp, esp + push eax + push eax +ALTERNATE_ENTRY Rhp&FunctionName&@0 + push ecx + push edx + + ; + ; Call out to the target, while storing and reporting arguments to the GC. + ; + mov eax, [ebp-8] ; Get the address of the callee + mov edx, [ebp-4] ; Get the extra argument to pass to the callee + lea ecx, [ebp-10h] ; Get pointer to edx value pushed above + call eax + + EXPORT_POINTER_TO_ADDRESS _PointerToReturnFrom&FunctionName + + ; We cannot make the label public as that tricks DIA stackwalker into thinking + ; it's the beginning of a method. For this reason we export an auxiliary variable + ; holding the address instead. + + pop edx + pop ecx + add esp, 8 + pop ebp + jmp eax + +FASTCALL_ENDFUNC + + endm + + ; To enable proper step-in behavior in the debugger, we need to have two instances + ; of the thunk. For the first one, the debugger steps into the call in the function, + ; for the other, it steps over it. + UNIVERSAL_TRANSITION UniversalTransition + UNIVERSAL_TRANSITION UniversalTransition_DebugStepTailCall + +endif + +end diff --git a/src/coreclr/src/nativeaot/Runtime/i386/WriteBarriers.S b/src/coreclr/src/nativeaot/Runtime/i386/WriteBarriers.S new file mode 100644 index 0000000000000..876f2dfbcb80d --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/i386/WriteBarriers.S @@ -0,0 +1,4 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// TODO: Implement diff --git a/src/coreclr/src/nativeaot/Runtime/i386/WriteBarriers.asm b/src/coreclr/src/nativeaot/Runtime/i386/WriteBarriers.asm new file mode 100644 index 0000000000000..0f6b3f1ba8ea5 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/i386/WriteBarriers.asm @@ -0,0 +1,266 @@ +;; Licensed to the .NET Foundation under one or more agreements. +;; The .NET Foundation licenses this file to you under the MIT license. + +;; +;; Define the helpers used to implement the write barrier required when writing an object reference into a +;; location residing on the GC heap. Such write barriers allow the GC to optimize which objects in +;; non-ephemeral generations need to be scanned for references to ephemeral objects during an ephemeral +;; collection. +;; + + .xmm + .model flat + option casemap:none + .code + +include AsmMacros.inc + +;; Macro used to copy contents of newly updated GC heap locations to a shadow copy of the heap. This is used +;; during garbage collections to verify that object references where never written to the heap without using a +;; write barrier. Note that we're potentially racing to update the shadow heap while other threads are writing +;; new references to the real heap. Since this can't be solved perfectly without critical sections around the +;; entire update process, we instead update the shadow location and then re-check the real location (as two +;; ordered operations) and if there is a disparity we'll re-write the shadow location with a special value +;; (INVALIDGCVALUE) which disables the check for that location. Since the shadow heap is only validated at GC +;; time and these write barrier operations are atomic wrt to GCs this is sufficient to guarantee that the +;; shadow heap contains only valid copies of real heap values or INVALIDGCVALUE. +ifdef WRITE_BARRIER_CHECK + +g_GCShadow TEXTEQU +g_GCShadowEnd TEXTEQU +INVALIDGCVALUE EQU 0CCCCCCCDh + +EXTERN g_GCShadow : DWORD +EXTERN g_GCShadowEnd : DWORD + +UPDATE_GC_SHADOW macro BASENAME, DESTREG, REFREG + + ;; If g_GCShadow is 0, don't perform the check. + cmp g_GCShadow, 0 + je &BASENAME&_UpdateShadowHeap_Done_&DESTREG&_&REFREG& + + ;; Save DESTREG since we're about to modify it (and we need the original value both within the macro and + ;; once we exit the macro). + push DESTREG + + ;; Transform DESTREG into the equivalent address in the shadow heap. + sub DESTREG, G_LOWEST_ADDRESS + jb &BASENAME&_UpdateShadowHeap_PopThenDone_&DESTREG&_&REFREG& + add DESTREG, [g_GCShadow] + cmp DESTREG, [g_GCShadowEnd] + ja &BASENAME&_UpdateShadowHeap_PopThenDone_&DESTREG&_&REFREG& + + ;; Update the shadow heap. + mov [DESTREG], REFREG + + ;; Now check that the real heap location still contains the value we just wrote into the shadow heap. This + ;; read must be strongly ordered wrt to the previous write to prevent race conditions. We also need to + ;; recover the old value of DESTREG for the comparison so use an xchg instruction (which has an implicit lock + ;; prefix). + xchg [esp], DESTREG + cmp [DESTREG], REFREG + jne &BASENAME&_UpdateShadowHeap_Invalidate_&DESTREG&_&REFREG& + + ;; The original DESTREG value is now restored but the stack has a value (the shadow version of the + ;; location) pushed. Need to discard this push before we are done. + add esp, 4 + jmp &BASENAME&_UpdateShadowHeap_Done_&DESTREG&_&REFREG& + +&BASENAME&_UpdateShadowHeap_Invalidate_&DESTREG&_&REFREG&: + ;; Someone went and updated the real heap. We need to invalidate the shadow location since we can't + ;; guarantee whose shadow update won. + + ;; Retrieve shadow location from the stack and restore original DESTREG to the stack. This is an + ;; additional memory barrier we don't require but it's on the rare path and x86 doesn't have an xchg + ;; variant that doesn't implicitly specify the lock prefix. + xchg [esp], DESTREG + mov dword ptr [DESTREG], INVALIDGCVALUE + +&BASENAME&_UpdateShadowHeap_PopThenDone_&DESTREG&_&REFREG&: + ;; Restore original DESTREG value from the stack. + pop DESTREG + +&BASENAME&_UpdateShadowHeap_Done_&DESTREG&_&REFREG&: +endm + +else ; WRITE_BARRIER_CHECK + +UPDATE_GC_SHADOW macro BASENAME, DESTREG, REFREG +endm + +endif ; WRITE_BARRIER_CHECK + +;; There are several different helpers used depending on which register holds the object reference. Since all +;; the helpers have identical structure we use a macro to define this structure. Two arguments are taken, the +;; name of the register that points to the location to be updated and the name of the register that holds the +;; object reference (this should be in upper case as it's used in the definition of the name of the helper). +DEFINE_WRITE_BARRIER macro DESTREG, REFREG + +;; Define a helper with a name of the form RhpAssignRefEAX etc. (along with suitable calling standard +;; decoration). The location to be updated is in DESTREG. The object reference that will be assigned into that +;; location is in one of the other general registers determined by the value of REFREG. +FASTCALL_FUNC RhpAssignRef&REFREG&, 0 + + ;; Export the canonical write barrier under unqualified name as well + ifidni , + @RhpAssignRef@0 label proc + PUBLIC @RhpAssignRef@0 + ALTERNATE_ENTRY RhpAssignRefAVLocation + endif + + ;; Write the reference into the location. Note that we rely on the fact that no GC can occur between here + ;; and the card table update we may perform below. + mov dword ptr [DESTREG], REFREG + + ;; Update the shadow copy of the heap with the same value (if enabled). + UPDATE_GC_SHADOW RhpAssignRef, DESTREG, REFREG + + ;; If the reference is to an object that's not in an ephemeral generation we have no need to track it + ;; (since the object won't be collected or moved by an ephemeral collection). + cmp REFREG, [G_EPHEMERAL_LOW] + jb WriteBarrier_NoBarrierRequired_&DESTREG&_&REFREG& + cmp REFREG, [G_EPHEMERAL_HIGH] + jae WriteBarrier_NoBarrierRequired_&DESTREG&_&REFREG& + + ;; We have a location on the GC heap being updated with a reference to an ephemeral object so we must + ;; track this write. The location address is translated into an offset in the card table bitmap. We set + ;; an entire byte in the card table since it's quicker than messing around with bitmasks and we only write + ;; the byte if it hasn't already been done since writes are expensive and impact scaling. + shr DESTREG, 10 + add DESTREG, [G_CARD_TABLE] + cmp byte ptr [DESTREG], 0FFh + jne WriteBarrier_UpdateCardTable_&DESTREG&_&REFREG& + +WriteBarrier_NoBarrierRequired_&DESTREG&_&REFREG&: + ret + +;; We get here if it's necessary to update the card table. +WriteBarrier_UpdateCardTable_&DESTREG&_&REFREG&: + mov byte ptr [DESTREG], 0FFh + ret +FASTCALL_ENDFUNC +endm + +RET4 macro + ret 4 +endm + +DEFINE_CHECKED_WRITE_BARRIER_CORE macro BASENAME, DESTREG, REFREG, RETINST + + ;; The location being updated might not even lie in the GC heap (a handle or stack location for instance), + ;; in which case no write barrier is required. + cmp DESTREG, [G_LOWEST_ADDRESS] + jb &BASENAME&_NoBarrierRequired_&DESTREG&_&REFREG& + cmp DESTREG, [G_HIGHEST_ADDRESS] + jae &BASENAME&_NoBarrierRequired_&DESTREG&_&REFREG& + + ;; Update the shadow copy of the heap with the same value just written to the same heap. (A no-op unless + ;; we're in a debug build and write barrier checking has been enabled). + UPDATE_GC_SHADOW BASENAME, DESTREG, REFREG + + ;; If the reference is to an object that's not in an ephemeral generation we have no need to track it + ;; (since the object won't be collected or moved by an ephemeral collection). + cmp REFREG, [G_EPHEMERAL_LOW] + jb &BASENAME&_NoBarrierRequired_&DESTREG&_&REFREG& + cmp REFREG, [G_EPHEMERAL_HIGH] + jae &BASENAME&_NoBarrierRequired_&DESTREG&_&REFREG& + + ;; We have a location on the GC heap being updated with a reference to an ephemeral object so we must + ;; track this write. The location address is translated into an offset in the card table bitmap. We set + ;; an entire byte in the card table since it's quicker than messing around with bitmasks and we only write + ;; the byte if it hasn't already been done since writes are expensive and impact scaling. + shr DESTREG, 10 + add DESTREG, [G_CARD_TABLE] + cmp byte ptr [DESTREG], 0FFh + jne &BASENAME&_UpdateCardTable_&DESTREG&_&REFREG& + +&BASENAME&_NoBarrierRequired_&DESTREG&_&REFREG&: + RETINST + +;; We get here if it's necessary to update the card table. +&BASENAME&_UpdateCardTable_&DESTREG&_&REFREG&: + mov byte ptr [DESTREG], 0FFh + RETINST + +endm + + +;; This macro is very much like the one above except that it generates a variant of the function which also +;; checks whether the destination is actually somewhere within the GC heap. +DEFINE_CHECKED_WRITE_BARRIER macro DESTREG, REFREG + +;; Define a helper with a name of the form RhpCheckedAssignRefEAX etc. (along with suitable calling standard +;; decoration). The location to be updated is in DESTREG. The object reference that will be assigned into +;; that location is in one of the other general registers determined by the value of REFREG. + +;; WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular: +;; - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen on the first instruction +;; - Function "UnwindWriteBarrierToCaller" assumes the stack contains just the pushed return address +FASTCALL_FUNC RhpCheckedAssignRef&REFREG&, 0 + + ;; Export the canonical write barrier under unqualified name as well + ifidni , + @RhpCheckedAssignRef@0 label proc + PUBLIC @RhpCheckedAssignRef@0 + ALTERNATE_ENTRY RhpCheckedAssignRefAVLocation + endif + + ;; Write the reference into the location. Note that we rely on the fact that no GC can occur between here + ;; and the card table update we may perform below. + mov dword ptr [DESTREG], REFREG + + DEFINE_CHECKED_WRITE_BARRIER_CORE RhpCheckedAssignRef, DESTREG, REFREG, ret + +FASTCALL_ENDFUNC + +endm + +;; One day we might have write barriers for all the possible argument registers but for now we have +;; just one write barrier that assumes the input register is EDX. +DEFINE_CHECKED_WRITE_BARRIER ECX, EDX +DEFINE_WRITE_BARRIER ECX, EDX + +;; Need some more write barriers to run CLR compiled MDIL on Redhawk - commented out for now +;; DEFINE_WRITE_BARRIER EDX, EAX +;; DEFINE_WRITE_BARRIER EDX, ECX +;; DEFINE_WRITE_BARRIER EDX, EBX +;; DEFINE_WRITE_BARRIER EDX, ESI +;; DEFINE_WRITE_BARRIER EDX, EDI +;; DEFINE_WRITE_BARRIER EDX, EBP + +;; DEFINE_CHECKED_WRITE_BARRIER EDX, EAX +;; DEFINE_CHECKED_WRITE_BARRIER EDX, ECX +;; DEFINE_CHECKED_WRITE_BARRIER EDX, EBX +;; DEFINE_CHECKED_WRITE_BARRIER EDX, ESI +;; DEFINE_CHECKED_WRITE_BARRIER EDX, EDI +;; DEFINE_CHECKED_WRITE_BARRIER EDX, EBP + +;; WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular: +;; - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at @RhpCheckedLockCmpXchgAVLocation@0 +;; - Function "UnwindWriteBarrierToCaller" assumes the stack contains just the pushed return address +;; pass third argument in EAX +FASTCALL_FUNC RhpCheckedLockCmpXchg +ALTERNATE_ENTRY RhpCheckedLockCmpXchgAVLocation + lock cmpxchg [ecx], edx + jne RhpCheckedLockCmpXchg_NoBarrierRequired_ECX_EDX + + DEFINE_CHECKED_WRITE_BARRIER_CORE RhpCheckedLockCmpXchg, ECX, EDX, ret + +FASTCALL_ENDFUNC + +;; WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular: +;; - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at @RhpCheckedXchgAVLocation@0 +;; - Function "UnwindWriteBarrierToCaller" assumes the stack contains just the pushed return address +FASTCALL_FUNC RhpCheckedXchg, 0 + + ;; Setup eax with the new object for the exchange, that way it will automatically hold the correct result + ;; afterwards and we can leave edx unaltered ready for the GC write barrier below. + mov eax, edx +ALTERNATE_ENTRY RhpCheckedXchgAVLocation + xchg [ecx], eax + + DEFINE_CHECKED_WRITE_BARRIER_CORE RhpCheckedXchg, ECX, EDX, ret + +FASTCALL_ENDFUNC + + end diff --git a/src/coreclr/src/nativeaot/Runtime/inc/CommonTypes.h b/src/coreclr/src/nativeaot/Runtime/inc/CommonTypes.h new file mode 100644 index 0000000000000..43da9a17d19a8 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/inc/CommonTypes.h @@ -0,0 +1,53 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#ifndef __COMMON_TYPES_H__ +#define __COMMON_TYPES_H__ + +#include +#include +#include +#include + +using std::nothrow; +using std::size_t; +using std::uintptr_t; +using std::intptr_t; + +// +// These type names are chosen to match the C# types +// +typedef int8_t Int8; +typedef int16_t Int16; +typedef int32_t Int32; +typedef int64_t Int64; +typedef uint8_t UInt8; +typedef uint16_t UInt16; +typedef uint32_t UInt32; +typedef uint64_t UInt64; +typedef intptr_t IntNative; // intentional deviation from C# IntPtr +typedef uintptr_t UIntNative; // intentional deviation from C# UIntPtr +typedef wchar_t WCHAR; +typedef void * HANDLE; + +typedef unsigned char Boolean; +#define Boolean_false 0 +#define Boolean_true 1 + +typedef UInt32 UInt32_BOOL; // windows 4-byte BOOL, 0 -> false, everything else -> true +#define UInt32_FALSE 0 +#define UInt32_TRUE 1 + +#define UInt16_MAX ((UInt16)0xffffU) +#define UInt16_MIN ((UInt16)0x0000U) + +#define UInt32_MAX ((UInt32)0xffffffffU) +#define UInt32_MIN ((UInt32)0x00000000U) + +#define Int32_MAX ((Int32)0x7fffffff) +#define Int32_MIN ((Int32)0x80000000) + +#define UInt64_MAX ((UInt64)0xffffffffffffffffUL) +#define UInt64_MIN ((UInt64)0x0000000000000000UL) + +#endif // __COMMON_TYPES_H__ diff --git a/src/coreclr/src/nativeaot/Runtime/inc/DebugEvents.h b/src/coreclr/src/nativeaot/Runtime/inc/DebugEvents.h new file mode 100644 index 0000000000000..657a23c2d0009 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/inc/DebugEvents.h @@ -0,0 +1,68 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// ----------------------------------------------------------------------------------------------------------- +// This defines the payload of debug events that are emited by Redhawk runtime and +// received by the debugger. These payloads are referenced by 1st chance SEH exceptions + + +// ----------------------------------------------------------------------------------------------------------- +// This version of holder does not have a default constructor. +#ifndef __DEBUG_EVENTS_H_ +#define __DEBUG_EVENTS_H_ + +// Special Exception code for RH to communicate to debugger +// RH will raise this exception to communicate managed debug events. +// Exception codes can't use bit 0x10000000, that's reserved by OS. +// NOTE: This is intentionally different than CLR's exception code (0x04242420) +// Perhaps it is because now we are in building 40? Who would know +#define CLRDBG_NOTIFICATION_EXCEPTION_CODE ((int) 0x04040400) + +// This is exception argument 0 included in debugger notification events. +// The debugger uses this as a sanity check. +// This could be very volatile data that changes between builds. +// NOTE: Again intentionally different than CLR's checksum (0x31415927) +// It doesn't have to be, but if anyone is manually looking at these +// exception payloads I am trying to make it obvious that they aren't +// the same. +#define CLRDBG_EXCEPTION_DATA_CHECKSUM ((int) 0x27182818) + +typedef enum +{ + DEBUG_EVENT_TYPE_INVALID = 0, + DEBUG_EVENT_TYPE_LOAD_MODULE = 1, + DEBUG_EVENT_TYPE_UNLOAD_MODULE = 2, + DEBUG_EVENT_TYPE_EXCEPTION_THROWN = 3, + DEBUG_EVENT_TYPE_EXCEPTION_FIRST_PASS_FRAME_ENTER = 4, + DEBUG_EVENT_TYPE_EXCEPTION_CATCH_HANDLER_FOUND = 5, + DEBUG_EVENT_TYPE_EXCEPTION_UNHANDLED = 6, + DEBUG_EVENT_TYPE_CUSTOM = 7, + DEBUG_EVENT_TYPE_MAX = 8 +} DebugEventType; + +typedef unsigned int ULONG32; + +struct DebugEventPayload +{ + DebugEventType type; + union + { + struct + { + CORDB_ADDRESS pModuleHeader; //ModuleHeader* + } ModuleLoadUnload; + struct + { + CORDB_ADDRESS ip; + CORDB_ADDRESS sp; + } Exception; + struct + { + CORDB_ADDRESS payload; + ULONG32 length; + } Custom; + }; +}; + + +#endif // __DEBUG_EVENTS_H_ diff --git a/src/coreclr/src/nativeaot/Runtime/inc/DebugMacrosExt.h b/src/coreclr/src/nativeaot/Runtime/inc/DebugMacrosExt.h new file mode 100644 index 0000000000000..54622f5a45c8a --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/inc/DebugMacrosExt.h @@ -0,0 +1,45 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +//***************************************************************************** +// DebugMacrosExt.h +// +// Simple debugging macros that take no dependencies on CLR headers. +// This header can be used from outside the CLR. +// +//***************************************************************************** + +#ifndef __DebugMacrosExt_h__ +#define __DebugMacrosExt_h__ + +#if !defined(_DEBUG_IMPL) && defined(_DEBUG) && !defined(DACCESS_COMPILE) +#define _DEBUG_IMPL 1 +#endif + +#ifdef _DEBUG +// A macro to execute a statement only in _DEBUG. +#define DEBUG_STMT(stmt) stmt +#define INDEBUG(x) x +#define INDEBUG_COMMA(x) x, +#define COMMA_INDEBUG(x) ,x +#define NOT_DEBUG(x) +#else +#define DEBUG_STMT(stmt) +#define INDEBUG(x) +#define INDEBUG_COMMA(x) +#define COMMA_INDEBUG(x) +#define NOT_DEBUG(x) x +#endif + + +#ifdef _DEBUG_IMPL +#define INDEBUGIMPL(x) x +#define INDEBUGIMPL_COMMA(x) x, +#define COMMA_INDEBUGIMPL(x) ,x +#else +#define INDEBUGIMPL(x) +#define INDEBUGIMPL_COMMA(x) +#define COMMA_INDEBUGIMPL(x) +#endif + + +#endif diff --git a/src/coreclr/src/nativeaot/Runtime/inc/ModuleHeaders.h b/src/coreclr/src/nativeaot/Runtime/inc/ModuleHeaders.h new file mode 100644 index 0000000000000..83c8c608e8499 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/inc/ModuleHeaders.h @@ -0,0 +1,64 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// +// Please keep the data structures in this file in sync with the managed version at +// src/Common/src/Internal/Runtime/ModuleHeaders.cs +// + +struct ReadyToRunHeaderConstants +{ + static const uint32_t Signature = 0x00525452; // 'RTR' + + static const uint32_t CurrentMajorVersion = 4; + static const uint32_t CurrentMinorVersion = 0; +}; + +struct ReadyToRunHeader +{ + uint32_t Signature; // ReadyToRunHeaderConstants.Signature + uint16_t MajorVersion; + uint16_t MinorVersion; + + uint32_t Flags; + + uint16_t NumberOfSections; + uint8_t EntrySize; + uint8_t EntryType; + + // Array of sections follows. +}; + +// +// ReadyToRunSectionType IDs are used by the runtime to look up specific global data sections +// from each module linked into the final binary. New sections should be added at the bottom +// of the enum and deprecated sections should not be removed to preserve ID stability. +// +// Eventually this will be reconciled with ReadyToRunSectionType from +// https://github.com/dotnet/coreclr/blob/master/src/inc/readytorun.h +// +enum class ReadyToRunSectionType +{ + StringTable = 200, + GCStaticRegion = 201, + ThreadStaticRegion = 202, + InterfaceDispatchTable = 203, + TypeManagerIndirection = 204, + EagerCctor = 205, + FrozenObjectRegion = 206, + GCStaticDesc = 207, + ThreadStaticOffsetRegion = 208, + ThreadStaticGCDescRegion = 209, + ThreadStaticIndex = 210, + LoopHijackFlag = 211, + ImportAddressTables = 212, + + // Sections 300 - 399 are reserved for RhFindBlob backwards compatibility + ReadonlyBlobRegionStart = 300, + ReadonlyBlobRegionEnd = 399, +}; + +enum class ModuleInfoFlags +{ + HasEndPointer = 0x1, +}; diff --git a/src/coreclr/src/nativeaot/Runtime/inc/OptionalFieldDefinitions.h b/src/coreclr/src/nativeaot/Runtime/inc/OptionalFieldDefinitions.h new file mode 100644 index 0000000000000..a76c31e83496d --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/inc/OptionalFieldDefinitions.h @@ -0,0 +1,24 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// +// This file is designed to be included multiple times with different definitions of the +// DEFINE_INLINE_OPTIONAL_FIELD macro in order to build data structures +// related to each type of EEType optional field we support (see OptionalFields.h for details). +// + +// The order of definition of the fields is somewhat important: for types that require multiple optional +// fields the fields are laid out in the order of definition. Thus access to the fields defined first will be +// slightly faster than the later fields. + +#ifndef DEFINE_INLINE_OPTIONAL_FIELD +#error Must define DEFINE_INLINE_OPTIONAL_FIELD before including this file +#endif + +// Field name Field type +DEFINE_INLINE_OPTIONAL_FIELD (RareFlags, UInt32) +DEFINE_INLINE_OPTIONAL_FIELD (DispatchMap, UInt32) +DEFINE_INLINE_OPTIONAL_FIELD (ValueTypeFieldPadding, UInt32) +DEFINE_INLINE_OPTIONAL_FIELD (NullableValueOffset, UInt8) + +#undef DEFINE_INLINE_OPTIONAL_FIELD diff --git a/src/coreclr/src/nativeaot/Runtime/inc/OptionalFields.h b/src/coreclr/src/nativeaot/Runtime/inc/OptionalFields.h new file mode 100644 index 0000000000000..e90c34490aad9 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/inc/OptionalFields.h @@ -0,0 +1,202 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// +// Support for optional fields attached out-of-line to EETypes (or any other data structure for that matter). +// These should be used for attributes that exist for only a small subset of EETypes or are accessed only +// rarely. The idea is to avoid bloating the size of the most common EETypes and to move some of the colder +// data out-of-line to improve the density of the hot data. The basic idea is that the EEType contains a +// pointer to an OptionalFields structure (which may be NULL) and that structure contains a somewhat +// compressed version of the optional fields. +// +// For each OptionalFields instance we encode only the fields that are present so that the structure is as +// small as possible while retaining reasonable access costs. +// +// This implies some tricky tradeoffs: +// * The more we compress the data the greater the access costs in terms of CPU. +// * More effective compression schemes tend to lead to the payload data being unaligned. This itself can +// result in overhead but on some architectures it's worse than that and the unaligned nature of the data +// requires special handling in client code. Generally it would be more robust and clean not to leak out +// such requirements to our callers. For small fields we can imagine copying the data into aligned storage +// (and indeed that might be a natural part of the decompression process). It might be more problematic for +// larger data items. +// +// In order to get the best of both worlds we employ a hybrid approach. Small values (typically single small +// integers) get encoded inline in a compressed format. Decoding them will automatically copy them into +// aligned storage. Larger values (such as complex data structures) will be stored out-of-line, naturally +// aligned and uncompressed (at least by this layer of the software). The entry in the optional field record +// will instead contain a reference to this out-of-line structure. +// +// Pointers are large (especially on 64-bit) and incur overhead in terms of base relocs and complexity (since +// the locations requiring relocs may not be aligned). To mitigate this we can encode references to these +// out-of-line records as deltas from a base address and by carefully ordering the layout of the out-of-line +// records we can share the same base address amongst multiple OptionalFields structures. +// +// Taking this to one end of the logical extreme we could store a single base address such as the module base +// address and encode all OptionalFields references as offsets from this; basically RVAs. This is cheap in the +// respect that we only need one base address (and associated reloc) but the majority of OptionalFields +// references will encode as fairly large deltas. As we'll touch on later our mechanism for compressing inline +// values in OptionalRecords is based on discarding insignificant leading zero bits; i.e. we encode small +// integers more effectively. So ideally we want to store multiple base addresses so we can lower the average +// encoding cost of the deltas. +// +// An additional concern is how these base addresses are located. Take the module base address example: we +// have no direct means of locating this based on an OptionalFields (or even the EEType that owns it). To +// obtain this value we're likely to have to perform some operation akin to a range lookup and there are +// interesting edge cases such as EETypes for generic types, which don't reside in modules. +// +// The approach taken here addresses several of the concerns above. The algorithm stores base addresses +// interleaved with the OptionalFields. They are located at well-known locations by aligning their addresses +// to a specific value (we can tune this but assume for the purposes of this explanation that the value is 64 +// bytes). This implies that the address requiring a base reloc is always aligned plus it can be located +// cheaply from an OptionalFields address by masking off the low-order bits of that address. +// +// As OptionalFields are added any out-of-line data they reference is stored linearly in the same order (this +// does imply that all out-of-line records must live in the same section and thus must have the same access +// attributes). This provides locality: adjacent OptionalFields may encode deltas to different out-of-line +// records but since the out-of-line records are adjacent (or nearly so) as well, both deltas will be about +// the same size. Once we've filled in the space between stored base addresses (some padding might be needed +// near the end where a full OptionalField won't fit, but this should be small given good compression of +// OptionalFields) then we write out a new base address. This is chosen based on the first out-of-line record +// referenced by the next OptionalField (i.e. it will make the first delta zero and keep the subsequent ones +// small). +// +// Consider the following example where for the sake of simplicity we assume each OptionalFields structure has +// precisely one out-of-line reference: +// +// +-----------------+ Out-of-line Records +// | Base Address |----------------------> +--------------------+ +// +-----------------+ | #1 | +// | OptionalFields | +--------------------+ +// | Record #1 | | #2 | +// | | | | +// +-----------------+ +--------------------+ +// | OptionalFields | | #3 | +// | Record #2 | /------------> +--------------------+ +// | | / | #4 | +// +-----------------+ / | | +// | OptionalFields | / | | +// | Record #3 | / +--------------------+ +// | | / | #5 | +// +-----------------+ / | | +// | Padding | / +--------------------+ +// +-----------------+ / : : +// | Base Address |- +// +-----------------+ +// | OptionalFields | +// | Record #4 | +// | | +// +-----------------+ +// | OptionalFields | +// | Record #5 | +// : : +// +// Each optional field uses the base address defined above it (at the lower memory address determined by +// masking off the alignment bits). No matter which out-of-line records they reference the deltas will be as +// small as we can make them. +// +// Lowering the alignment requirement introduces more base addresses and as a result also lowers the number of +// OptionalFields that share the same base address, leading to smaller encodings for out-of-line deltas. But +// at the same time it increases the number of pointers (and associated base relocs) that we must store. +// Additionally the compression of the deltas is not completely linear: certain ranges of delta magnitude will +// result in exactly the same storage being used when compressed. See the details of the delta encoding below +// to see how we can use this to our advantage when tuning the alignment of base addresses. +// +// We optimize the case where OptionalFields structs don't contain any out-of-line references. We collect +// those together and emit them in a single run with no interleaved base addresses. +// +// The OptionalFields record encoding itself is a byte stream representing one or more fields. The first byte +// is a field header: it contains a field type tag in the low-order 7 bits (giving us 128 possible field +// types) and the most significant bit indicates whether this is the last field of the structure. The field +// value (a 32-bit unsigned number) is encoded using the existing VarInt support which encodes the value in +// byte chunks taking between 1 and 5 bytes to do so. +// +// If the field value is out-of-line we decode the delta from the base address in much the same way as for +// inline field values. Before adding the delta to the base address, however, we scale it based on the natural +// alignment of the out-of-line data record it references. Since the out-of-line data is aligned on the same +// basis this scaling avoids encoding bits that will always be zero and thus allows us to reference a greater +// range of memory with a delta that encodes using less bytes. +// +// The value compression algorithm above gives us the non-linearity of compression referenced earlier. 32-bit +// values will encode in a given number of bytes based on the having a given number of significant +// (non-leading zero) bits: +// 5 bytes : 25 - 32 significant bits +// 4 bytes : 18 - 24 significant bits +// 3 bytes : 11 - 17 significant bits +// 2 bytes : 4 - 10 significant bits +// 1 byte : 0 - 3 significant bits +// +// We can use this to our advantage when choosing an alignment at which to store base addresses. Assuming that +// most out-of-line data will have an alignment requirement of at least 4 bytes we note that the 2 byte +// encoding already gives us an addressable range of 2^10 * 4 == 4KB which is likely to be enough for the vast +// majority of cases. That is we can raise the granularity of base addresses until the average amount of +// out-of-line data addressed begins to approach 4KB which lowers the cost of storing the base addresses while +// not impacting the encoding size of deltas at all (there's no point in storing base addresses more +// frequently because it won't make the encodings of deltas any smaller). +// +// Trying to tune for one byte deltas all the time is probably not worth it. The addressability range (again +// assuming 4 byte alignment) is only 32 bytes and unless we start storing a lot of small data structures +// out-of-line tuning for this will involve placing the base addresses very frequently and our costs will be +// dominated by the size of the base address pointers and their relocs. +// + +// Define enumeration of optional field tags. +enum OptionalFieldTag +{ +#define DEFINE_INLINE_OPTIONAL_FIELD(_name, _type) OFT_##_name, +#include "OptionalFieldDefinitions.h" + OFT_Count // Number of field types we support +}; + +// Array that indicates whether a given field type is inline (true) or out-of-line (false). +static bool g_rgOptionalFieldTypeIsInline[OFT_Count] = { +#define DEFINE_INLINE_OPTIONAL_FIELD(_name, _type) true, +#include "OptionalFieldDefinitions.h" +}; + +// Various random global constants we can tweak for performance tuning. +enum OptionalFieldConstants +{ + // Constants determining how often we interleave a "header" containing a base address for out-of-line + // records into the stream of OptionalFields structures. These will occur at some power of 2 alignment of + // memory address. The alignment must at least exceed that of a pointer (since we'll store a pointer in + // the header and we need room for at least one OptionalFields record between each header). As the + // alignment goes up we store less headers but may impose a larger one-time padding cost at the start of + // the optional fields memory block as well as increasing the average encoding size for out-of-line record + // deltas in each optional field record. + // + // Note that if you change these constants you must be sure to modify the alignment of the optional field + // virtual section in ZapImage.cpp as well as ensuring the alignment of the containing physical section is + // at least as high (this latter cases matters for the COFF output case only, when we're generating PE + // images directly the physical section will get page alignment). + OFC_HeaderAlignmentShift = 7, + OFC_HeaderAlignmentBytes = 1 << OFC_HeaderAlignmentShift, + OFC_HeaderAlignmentMask = OFC_HeaderAlignmentBytes - 1, +}; + +typedef DPTR(class OptionalFields) PTR_OptionalFields; +typedef DPTR(PTR_OptionalFields) PTR_PTR_OptionalFields; + +class OptionalFields +{ +public: + // Define accessors for each field type. +#define DEFINE_INLINE_OPTIONAL_FIELD(_name, _type) \ + _type Get##_name(_type defaultValue) \ + { \ + return (_type)GetInlineField(OFT_##_name, (UInt32)defaultValue); \ + } + +#include "OptionalFieldDefinitions.h" + +private: + // Reads a field value (or the basis for an out-of-line record delta) starting from the first byte after + // the field header. Advances the field location to the start of the next field. + static OptionalFieldTag DecodeFieldTag(PTR_UInt8 * ppFields, bool *pfLastField); + + // Reads a field value (or the basis for an out-of-line record delta) starting from the first byte of a + // field description. Advances the field location to the start of the next field. + static UInt32 DecodeFieldValue(PTR_UInt8 * ppFields); + + UInt32 GetInlineField(OptionalFieldTag eTag, UInt32 uiDefaultValue); +}; diff --git a/src/coreclr/src/nativeaot/Runtime/inc/TargetPtrs.h b/src/coreclr/src/nativeaot/Runtime/inc/TargetPtrs.h new file mode 100644 index 0000000000000..846e5054727a4 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/inc/TargetPtrs.h @@ -0,0 +1,31 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +#ifndef _TARGETPTRS_H_ +#define _TARGETPTRS_H_ + +typedef DPTR(class EEType) PTR_EEType; +typedef SPTR(struct StaticGcDesc) PTR_StaticGcDesc; + +#ifdef TARGET_AMD64 +typedef UInt64 UIntTarget; +#elif defined(TARGET_X86) +typedef UInt32 UIntTarget; +#elif defined(TARGET_ARM) +typedef UInt32 UIntTarget; +#elif defined(TARGET_ARM64) +typedef UInt64 UIntTarget; +#elif defined(TARGET_WASM) +typedef UInt32 UIntTarget; +#else +#error unexpected target architecture +#endif + +typedef PTR_UInt8 TgtPTR_UInt8; +typedef PTR_UInt32 TgtPTR_UInt32; +typedef void * TgtPTR_Void; +typedef PTR_EEType TgtPTR_EEType; +typedef class Thread * TgtPTR_Thread; +typedef struct CORINFO_Object * TgtPTR_CORINFO_Object; +typedef PTR_StaticGcDesc TgtPTR_StaticGcDesc; + +#endif // !_TARGETPTRS_H_ diff --git a/src/coreclr/src/nativeaot/Runtime/inc/daccess.h b/src/coreclr/src/nativeaot/Runtime/inc/daccess.h new file mode 100644 index 0000000000000..5f142ff056038 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/inc/daccess.h @@ -0,0 +1,2387 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +//***************************************************************************** +// File: daccess.h +// +// Support for external access of runtime data structures. These +// macros and templates hide the details of pointer and data handling +// so that data structures and code can be compiled to work both +// in-process and through a special memory access layer. +// +// This code assumes the existence of two different pieces of code, +// the target, the runtime code that is going to be examined, and +// the host, the code that's doing the examining. Access to the +// target is abstracted so the target may be a live process on the +// same machine, a live process on a different machine, a dump file +// or whatever. No assumptions should be made about accessibility +// of the target. +// +// This code assumes that the data in the target is static. Any +// time the target's data changes the interfaces must be reset so +// that potentially stale data is discarded. +// +// This code is intended for read access and there is no +// way to write data back currently. +// +// DAC-ized code: +// - is read-only (non-invasive). So DACized codepaths can not trigger a GC. +// - has no Thread* object. In reality, DAC-ized codepaths are +// ReadProcessMemory calls from out-of-process. Conceptually, they +// are like a pure-native (preemptive) thread. +//// +// This means that in particular, you cannot DACize a GCTRIGGERS function. +// Neither can you DACize a function that throws if this will involve +// allocating a new exception object. There may be +// exceptions to these rules if you can guarantee that the DACized +// part of the code path cannot cause a garbage collection (see +// EditAndContinueModule::ResolveField for an example). +// If you need to DACize a function that may trigger +// a GC, it is probably best to refactor the function so that the DACized +// part of the code path is in a separate function. For instance, +// functions with GetOrCreate() semantics are hard to DAC-ize because +// they the Create portion is inherently invasive. Instead, consider refactoring +// into a GetOrFail() function that DAC can call; and then make GetOrCreate() +// a wrapper around that. + +// +// This code works by hiding the details of access to target memory. +// Access is divided into two types: +// 1. DPTR - access to a piece of data. +// 2. VPTR - access to a class with a vtable. The class can only have +// a single vtable pointer at the beginning of the class instance. +// Things only need to be declared as VPTRs when it is necessary to +// call virtual functions in the host. In that case the access layer +// must do extra work to provide a host vtable for the object when +// it is retrieved so that virtual functions can be called. +// +// When compiling with DACCESS_COMPILE the macros turn into templates +// which replace pointers with smart pointers that know how to fetch +// data from the target process and provide a host process version of it. +// Normal data structure access will transparently receive a host copy +// of the data and proceed, so code such as +// typedef DPTR(Class) PTR_Class; +// PTR_Class cls; +// int val = cls->m_Int; +// will work without modification. The appropriate operators are overloaded +// to provide transparent access, such as the -> operator in this case. +// Note that the convention is to create an appropriate typedef for +// each type that will be accessed. This hides the particular details +// of the type declaration and makes the usage look more like regular code. +// +// The ?PTR classes also have an implicit base type cast operator to +// produce a host-pointer instance of the given type. For example +// Class* cls = PTR_Class(addr); +// works by implicit conversion from the PTR_Class created by wrapping +// to a host-side Class instance. Again, this means that existing code +// can work without modification. +// +// Code Example: +// +// typedef struct _rangesection +// { +// PTR_IJitManager pjit; +// PTR_RangeSection pright; +// PTR_RangeSection pleft; +// ... Other fields omitted ... +// } RangeSection; +// +// RangeSection* pRS = m_RangeTree; +// +// while (pRS != NULL) +// { +// if (currentPC < pRS->LowAddress) +// pRS=pRS->pleft; +// else if (currentPC > pRS->HighAddress) +// pRS=pRS->pright; +// else +// { +// return pRS->pjit; +// } +// } +// +// This code does not require any modifications. The global reference +// provided by m_RangeTree will be a host version of the RangeSection +// instantiated by conversion. The references to pRS->pleft and +// pRS->pright will refer to DPTRs due to the modified declaration. +// In the assignment statement the compiler will automatically use +// the implicit conversion from PTR_RangeSection to RangeSection*, +// causing a host instance to be created. Finally, if an appropriate +// section is found the use of pRS->pjit will cause an implicit +// conversion from PTR_IJitManager to IJitManager. The VPTR code +// will look at target memory to determine the actual derived class +// for the JitManager and instantiate the right class in the host so +// that host virtual functions can be used just as they would in +// the target. +// +// There are situations where code modifications are required, though. +// +// 1. Any time the actual value of an address matters, such as using +// it as a search key in a tree, the target address must be used. +// +// An example of this is the RangeSection tree used to locate JIT +// managers. A portion of this code is shown above. Each +// RangeSection node in the tree describes a range of addresses +// managed by the JitMan. These addresses are just being used as +// values, not to dereference through, so there are not DPTRs. When +// searching the range tree for an address the address used in the +// search must be a target address as that's what values are kept in +// the RangeSections. In the code shown above, currentPC must be a +// target address as the RangeSections in the tree are all target +// addresses. Use dac_cast to retrieve the target address +// of a ?PTR, as well as to convert a host address to the +// target address used to retrieve that particular instance. Do not +// use dac_cast with any raw target pointer types (such as BYTE*). +// +// 2. Any time an address is modified, such as by address arithmetic, +// the arithmetic must be performed on the target address. +// +// When a host instance is created it is created for the type in use. +// There is no particular relation to any other instance, so address +// arithmetic cannot be used to get from one instance to any other +// part of memory. For example +// char* Func(Class* cls) +// { +// // String follows the basic Class data. +// return (char*)(cls + 1); +// } +// does not work with external access because the Class* used would +// have retrieved only a Class worth of data. There is no string +// following the host instance. Instead, this code should use +// dac_cast to get the target address of the Class +// instance, add sizeof(*cls) and then create a new ?PTR to access +// the desired data. Note that the newly retrieved data will not +// be contiguous with the Class instance, so address arithmetic +// will still not work. +// +// Previous Code: +// +// BOOL IsTarget(LPVOID ip) +// { +// StubCallInstrs* pStubCallInstrs = GetStubCallInstrs(); +// +// if (ip == (LPVOID) &(pStubCallInstrs->m_op)) +// { +// return TRUE; +// } +// +// Modified Code: +// +// BOOL IsTarget(LPVOID ip) +// { +// StubCallInstrs* pStubCallInstrs = GetStubCallInstrs(); +// +// if ((TADDR)ip == dac_cast(pStubCallInstrs) + +// (TADDR)offsetof(StubCallInstrs, m_op)) +// { +// return TRUE; +// } +// +// The parameter ip is a target address, so the host pStubCallInstrs +// cannot be used to derive an address from. The member & reference +// has to be replaced with a conversion from host to target address +// followed by explicit offsetting for the field. +// +// PTR_HOST_MEMBER_TADDR is a convenience macro that encapsulates +// these two operations, so the above code could also be: +// +// if ((TADDR)ip == +// PTR_HOST_MEMBER_TADDR(StubCallInstrs, pStubCallInstrs, m_op)) +// +// 3. Any time the amount of memory referenced through an address +// changes, such as by casting to a different type, a new ?PTR +// must be created. +// +// Host instances are created and stored based on both the target +// address and size of access. The access code has no way of knowing +// all possible ways that data will be retrieved for a given address +// so if code changes the way it accesses through an address a new +// ?PTR must be used, which may lead to a difference instance and +// different host address. This means that pointer identity does not hold +// across casts, so code like +// Class* cls = PTR_Class(addr); +// Class2* cls2 = PTR_Class2(addr); +// return cls == cls2; +// will fail because the host-side instances have no relation to each +// other. That isn't a problem, since by rule #1 you shouldn't be +// relying on specific host address values. +// +// Previous Code: +// +// return (ArrayClass *) m_pMethTab->GetClass(); +// +// Modified Code: +// +// return PTR_ArrayClass(m_pMethTab->GetClass()); +// +// The ?PTR templates have an implicit conversion from a host pointer +// to a target address, so the cast above constructs a new +// PTR_ArrayClass by implicitly converting the host pointer result +// from GetClass() to its target address and using that as the address +// of the new PTR_ArrayClass. As mentioned, the actual host-side +// pointer values may not be the same. +// +// Host pointer identity can be assumed as long as the type of access +// is the same. In the example above, if both accesses were of type +// Class then the host pointer will be the same, so it is safe to +// retrieve the target address of an instance and then later get +// a new host pointer for the target address using the same type as +// the host pointer in that case will be the same. This is enabled +// by caching all of the retrieved host instances. This cache is searched +// by the addr:size pair and when there's a match the existing instance +// is reused. This increases performance and also allows simple +// pointer identity to hold. It does mean that host memory grows +// in proportion to the amount of target memory being referenced, +// so retrieving extraneous data should be avoided. +// The host-side data cache grows until the Flush() method is called, +// at which point all host-side data is discarded. No host +// instance pointers should be held across a Flush(). +// +// Accessing into an object can lead to some unusual behavior. For +// example, the SList class relies on objects to contain an SLink +// instance that it uses for list maintenance. This SLink can be +// embedded anywhere in the larger object. The SList access is always +// purely to an SLink, so when using the access layer it will only +// retrieve an SLink's worth of data. The SList template will then +// do some address arithmetic to determine the start of the real +// object and cast the resulting pointer to the final object type. +// When using the access layer this results in a new ?PTR being +// created and used, so a new instance will result. The internal +// SLink instance will have no relation to the new object instance +// even though in target address terms one is embedded in the other. +// The assumption of data stability means that this won't cause +// a problem, but care must be taken with the address arithmetic, +// as layed out in rules #2 and #3. +// +// 4. Global address references cannot be used. Any reference to a +// global piece of code or data, such as a function address, global +// variable or class static variable, must be changed. +// +// The external access code may load at a different base address than +// the target process code. Global addresses are therefore not +// meaningful and must be replaced with something else. There isn't +// a single solution, so replacements must be done on a case-by-case +// basis. +// +// The simplest case is a global or class static variable. All +// declarations must be replaced with a special declaration that +// compiles into a modified accessor template value when compiled for +// external data access. Uses of the variable automatically are fixed +// up by the template instance. Note that assignment to the global +// must be independently ifdef'ed as the external access layer should +// not make any modifications. +// +// Macros allow for simple declaration of a class static and global +// values that compile into an appropriate templated value. +// +// Previous Code: +// +// static RangeSection* m_RangeTree; +// RangeSection* ExecutionManager::m_RangeTree; +// +// extern ThreadStore* g_pThreadStore; +// ThreadStore* g_pThreadStore = &StaticStore; +// class SystemDomain : public BaseDomain { +// ... +// ArrayListStatic m_appDomainIndexList; +// ... +// } +// +// SystemDomain::m_appDomainIndexList; +// +// extern DWORD gThreadTLSIndex; +// +// DWORD gThreadTLSIndex = TLS_OUT_OF_INDEXES; +// +// Modified Code: +// +// typedef DPTR(RangeSection) PTR_RangeSection; +// SPTR_DECL(RangeSection, m_RangeTree); +// SPTR_IMPL(RangeSection, ExecutionManager, m_RangeTree); +// +// typedef DPTR(ThreadStore) PTR_ThreadStore +// GPTR_DECL(ThreadStore, g_pThreadStore); +// GPTR_IMPL_INIT(ThreadStore, g_pThreadStore, &StaticStore); +// +// class SystemDomain : public BaseDomain { +// ... +// SVAL_DECL(ArrayListStatic; m_appDomainIndexList); +// ... +// } +// +// SVAL_IMPL(ArrayListStatic, SystemDomain, m_appDomainIndexList); +// +// GVAL_DECL(DWORD, gThreadTLSIndex); +// +// GVAL_IMPL_INIT(DWORD, gThreadTLSIndex, TLS_OUT_OF_INDEXES); +// +// When declaring the variable, the first argument declares the +// variable's type and the second argument declares the variable's +// name. When defining the variable the arguments are similar, with +// an extra class name parameter for the static class variable case. +// If an initializer is needed the IMPL_INIT macro should be used. +// +// Things get slightly more complicated when declaring an embedded +// array. In this case the data element is not a single element and +// therefore cannot be represented by a ?PTR. In the case of a global +// array, you should use the GARY_DECL and GARY_IMPL macros. +// We durrently have no support for declaring static array data members +// or initialized arrays. Array data members that are dynamically allocated +// need to be treated as pointer members. To reference individual elements +// you must use pointer arithmetic (see rule 2 above). An array declared +// as a local variable within a function does not need to be DACized. +// +// +// All uses of ?VAL_DECL must have a corresponding entry given in the +// DacGlobals structure in src\inc\dacvars.h. For SVAL_DECL the entry +// is class__name. For GVAL_DECL the entry is dac__name. You must add +// these entries in dacvars.h using the DEFINE_DACVAR macro. Note that +// these entries also are used for dumping memory in mini dumps and +// heap dumps. If it's not appropriate to dump a variable, (e.g., +// it's an array or some other value that is not important to have +// in a minidump) a second macro, DEFINE_DACVAR_NO_DUMP, will allow +// you to make the required entry in the DacGlobals structure without +// dumping its value. +// +// For convenience, here is a list of the various variable declaration and +// initialization macros: +// SVAL_DECL(type, name) static non-pointer data class MyClass +// member declared within { +// the class declaration // static int i; +// SVAL_DECL(int, i); +// } +// +// SVAL_IMPL(type, cls, name) static non-pointer data // int MyClass::i; +// member defined outside SVAL_IMPL(int, MyClass, i); +// the class declaration +// +// SVAL_IMPL_INIT(type, cls, static non-pointer data // int MyClass::i = 0; +// name, val) member defined and SVAL_IMPL_INIT(int, MyClass, i, 0); +// initialized outside the +// class declaration +// ------------------------------------------------------------------------------------------------ +// SPTR_DECL(type, name) static pointer data class MyClass +// member declared within { +// the class declaration // static int * pInt; +// SPTR_DECL(int, pInt); +// } +// +// SPTR_IMPL(type, cls, name) static pointer data // int * MyClass::pInt; +// member defined outside SPTR_IMPL(int, MyClass, pInt); +// the class declaration +// +// SPTR_IMPL_INIT(type, cls, static pointer data // int * MyClass::pInt = NULL; +// name, val) member defined and SPTR_IMPL_INIT(int, MyClass, pInt, NULL); +// initialized outside the +// class declaration +// ------------------------------------------------------------------------------------------------ +// GVAL_DECL(type, name) extern declaration of // extern int g_i +// global non-pointer GVAL_DECL(int, g_i); +// variable +// +// GVAL_IMPL(type, name) declaration of a // int g_i +// global non-pointer GVAL_IMPL(int, g_i); +// variable +// +// GVAL_IMPL_INIT (type, declaration and // int g_i = 0; +// name, initialization of a GVAL_IMPL_INIT(int, g_i, 0); +// val) global non-pointer +// variable +// ****Note**** +// If you use GVAL_? to declare a global variable of a structured type and you need to +// access a member of the type, you cannot use the dot operator. Instead, you must take the +// address of the variable and use the arrow operator. For example: +// struct +// { +// int x; +// char ch; +// } MyStruct; +// GVAL_IMPL(MyStruct, g_myStruct); +// int i = (&g_myStruct)->x; +// ------------------------------------------------------------------------------------------------ +// GPTR_DECL(type, name) extern declaration of // extern int * g_pInt +// global pointer GPTR_DECL(int, g_pInt); +// variable +// +// GPTR_IMPL(type, name) declaration of a // int * g_pInt +// global pointer GPTR_IMPL(int, g_pInt); +// variable +// +// GPTR_IMPL_INIT (type, declaration and // int * g_pInt = 0; +// name, initialization of a GPTR_IMPL_INIT(int, g_pInt, NULL); +// val) global pointer +// variable +// ------------------------------------------------------------------------------------------------ +// GARY_DECL(type, name) extern declaration of // extern int g_rgIntList[MAX_ELEMENTS]; +// a global array GPTR_DECL(int, g_rgIntList, MAX_ELEMENTS); +// variable +// +// GARY_IMPL(type, name) declaration of a // int g_rgIntList[MAX_ELEMENTS]; +// global pointer GPTR_IMPL(int, g_rgIntList, MAX_ELEMENTS); +// variable +// +// +// Certain pieces of code, such as the stack walker, rely on identifying +// an object from its vtable address. As the target vtable addresses +// do not necessarily correspond to the vtables used in the host, these +// references must be translated. The access layer maintains translation +// tables for all classes used with VPTR and can return the target +// vtable pointer for any host vtable in the known list of VPTR classes. +// +// ----- Errors: +// +// All errors in the access layer are reported via exceptions. The +// formal access layer methods catch all such exceptions and turn +// them into the appropriate error, so this generally isn't visible +// to users of the access layer. +// +// ----- DPTR Declaration: +// +// Create a typedef for the type with typedef DPTR(type) PTR_type; +// Replace type* with PTR_type. +// +// ----- VPTR Declaration: +// +// VPTR can only be used on classes that have a single vtable +// pointer at the beginning of the object. This should be true +// for a normal single-inheritance object. +// +// All of the classes that may be instantiated need to be identified +// and marked. In the base class declaration add either +// VPTR_BASE_VTABLE_CLASS if the class is abstract or +// VPTR_BASE_CONCRETE_VTABLE_CLASS if the class is concrete. In each +// derived class add VPTR_VTABLE_CLASS. If you end up with compile or +// link errors for an unresolved method called VPtrSize you missed a +// derived class declaration. +// +// As described above, dac can only handle classes with a single +// vtable. However, there's a special case for multiple inheritance +// situations when only one of the classes is needed for dac. If +// the base class needed is the first class in the derived class's +// layout then it can be used with dac via using the VPTR_MULTI_CLASS +// macros. Use with extreme care. +// +// All classes to be instantiated must be listed in src\inc\vptr_list.h. +// +// Create a typedef for the type with typedef VPTR(type) PTR_type; +// When using a VPTR, replace Class* with PTR_Class. +// +// ----- Specific Macros: +// +// PTR_TO_TADDR(ptr) +// Retrieves the raw target address for a ?PTR. +// See code:dac_cast for the preferred alternative +// +// PTR_HOST_TO_TADDR(host) +// Given a host address of an instance produced by a ?PTR reference, +// return the original target address. The host address must +// be an exact match for an instance. +// See code:dac_cast for the preferred alternative +// +// PTR_HOST_INT_TO_TADDR(host) +// Given a host address which resides somewhere within an instance +// produced by a ?PTR reference (a host interior pointer) return the +// corresponding target address. This is useful for evaluating +// relative pointers (e.g. RelativePointer) where calculating the +// target address requires knowledge of the target address of the +// relative pointer field itself. This lookup is slower than that for +// a non-interior host pointer so use it sparingly. +// +// VPTR_HOST_VTABLE_TO_TADDR(host) +// Given the host vtable pointer for a known VPTR class, return +// the target vtable pointer. +// +// PTR_HOST_MEMBER_TADDR(type, host, memb) +// Retrieves the target address of a host instance pointer and +// offsets it by the given member's offset within the type. +// +// PTR_HOST_INT_MEMBER_TADDR(type, host, memb) +// As above but will work for interior host pointers (see the +// description of PTR_HOST_INT_TO_TADDR for an explanation of host +// interior pointers). +// +// PTR_READ(addr, size) +// Reads a block of memory from the target and returns a host +// pointer for it. Useful for reading blocks of data from the target +// whose size is only known at runtime, such as raw code for a jitted +// method. If the data being read is actually an object, use SPTR +// instead to get better type semantics. +// +// DAC_EMPTY() +// DAC_EMPTY_ERR() +// DAC_EMPTY_RET(retVal) +// DAC_UNEXPECTED() +// Provides an empty method implementation when compiled +// for DACCESS_COMPILE. For example, use to stub out methods needed +// for vtable entries but otherwise unused. +// +// These macros are designed to turn into normal code when compiled +// without DACCESS_COMPILE. +// +//***************************************************************************** +// See code:EEStartup#TableOfContents for EE overview + +#ifndef __daccess_h__ +#define __daccess_h__ + +#ifndef __in +#include +#endif + +#define DACCESS_TABLE_RESOURCE L"COREXTERNALDATAACCESSRESOURCE" + +#include "type_traits.hpp" + +#ifdef DACCESS_COMPILE + +#include "safemath.h" + +#if defined(TARGET_AMD64) || defined(TARGET_ARM64) +typedef UInt64 UIntTarget; +#elif defined(TARGET_X86) +typedef UInt32 UIntTarget; +#elif defined(TARGET_ARM) +typedef UInt32 UIntTarget; +#else +#error unexpected target architecture +#endif + +// +// This version of things wraps pointer access in +// templates which understand how to retrieve data +// through an access layer. In this case no assumptions +// can be made that the current compilation processor or +// pointer types match the target's processor or pointer types. +// + +// Define TADDR as a non-pointer value so use of it as a pointer +// will not work properly. Define it as unsigned so +// pointer comparisons aren't affected by sign. +// This requires special casting to ULONG64 to sign-extend if necessary. +// XXX drewb - Cheating right now by not supporting cross-plat. +typedef UIntTarget TADDR; + +// TSIZE_T used for counts or ranges that need to span the size of a +// target pointer. For cross-plat, this may be different than SIZE_T +// which reflects the host pointer size. +typedef UIntTarget TSIZE_T; + +// Information stored in the DAC table of interest to the DAC implementation +// Note that this information is shared between all instantiations of ClrDataAccess, so initialize +// it just once in code:ClrDataAccess.GetDacGlobals (rather than use fields in ClrDataAccess); +struct DacTableInfo +{ + // On Windows, the first DWORD is the 32-bit timestamp read out of the runtime dll's debug directory. + // The remaining 3 DWORDS must all be 0. + // On Mac, this is the 16-byte UUID of the runtime dll. + // It is used to validate that mscorwks is the same version as mscordacwks + UInt32 dwID0; + UInt32 dwID1; + UInt32 dwID2; + UInt32 dwID3; +}; +extern DacTableInfo g_dacTableInfo; + +// +// The following table contains all the global information that data access needs to begin +// operation. All of the values stored here are RVAs. DacGlobalBase() returns the current +// base address to combine with to get a full target address. +// + +typedef struct _DacGlobals +{ +// These will define all of the dac related mscorwks static and global variables +// TODO: update DacTableGen to parse "UInt32" instead of "ULONG32" for the ids +#ifdef DAC_CLR_ENVIRONMENT +#define DEFINE_DACVAR(id_type, size, id) id_type id; +#define DEFINE_DACVAR_NO_DUMP(id_type, size, id) id_type id; +#else +#define DEFINE_DACVAR(id_type, size, id) UInt32 id; +#define DEFINE_DACVAR_NO_DUMP(id_type, size, id) UInt32 id; +#endif +#include "dacvars.h" +#undef DEFINE_DACVAR_NO_DUMP +#undef DEFINE_DACVAR + +/* + // Global functions. + ULONG fn__QueueUserWorkItemCallback; + ULONG fn__ThreadpoolMgr__AsyncCallbackCompletion; + ULONG fn__ThreadpoolMgr__AsyncTimerCallbackCompletion; + ULONG fn__DACNotifyCompilationFinished; +#ifdef HOST_X86 + ULONG fn__NativeDelayFixupAsmStub; + ULONG fn__NativeDelayFixupAsmStubRet; +#endif // HOST_X86 + ULONG fn__PInvokeCalliReturnFromCall; + ULONG fn__NDirectGenericStubReturnFromCall; + ULONG fn__DllImportForDelegateGenericStubReturnFromCall; +*/ + +} DacGlobals; + +extern DacGlobals g_dacGlobals; + +#ifdef __cplusplus +extern "C" { +#endif + +// These two functions are largely just for marking code +// that is not fully converted. DacWarning prints a debug +// message, while DacNotImpl throws a not-implemented exception. +void __cdecl DacWarning(__in __in_z char* format, ...); +void DacNotImpl(void); +void DacError(HRESULT err); +void __declspec(noreturn) DacError_NoRet(HRESULT err); +TADDR DacGlobalBase(void); +HRESULT DacReadAll(TADDR addr, void* buffer, uint32_t size, bool throwEx); +#ifdef DAC_CLR_ENVIRONMENT +HRESULT DacWriteAll(TADDR addr, PVOID buffer, ULONG32 size, bool throwEx); +HRESULT DacAllocVirtual(TADDR addr, ULONG32 size, + ULONG32 typeFlags, ULONG32 protectFlags, + bool throwEx, TADDR* mem); +HRESULT DacFreeVirtual(TADDR mem, ULONG32 size, ULONG32 typeFlags, + bool throwEx); + +#endif // DAC_CLR_ENVIRONMENT + +/* We are simulating a tiny bit of memory existing in the debuggee address space that really isn't there. + The memory appears to exist in the last 1KB of the memory space to make minimal risk that + it collides with any legitimate debuggee memory. When the DAC uses + DacInstantiateTypeByAddressHelper on these high addresses instead of getting back a pointer + in the DAC_INSTANCE cache it will get back a pointer to specifically configured block of + debugger memory. + + Rationale: + This method was invented to solve a problem when doing stack walking in the DAC. When + running in-process the register context has always been written to memory somewhere before + the stackwalker begins to operate. The stackwalker doesn't track the registers themselves, + but rather the storage locations where registers were written. + When the DAC runs the registers haven't been saved anywhere - there is no memory address + that refers to them. It would be easy to store the registers in the debugger's memory space + but the Regdisplay is typed as PTR_UIntNative, not UIntNative*. We could change REGDISPLAY + to point at debugger local addresses, but then we would have the opposite problem, being unable + to refer to stack addresses that are in the debuggee memory space. Options we could do: + 1) Add discriminant bits to REGDISPLAY fields to record whether the pointer is local or remote + a) Do it in the runtime definition - adds size and complexity to mrt100 for a debug only scenario + b) Do it only in the DAC definition - breaks marshalling for types that are or contain REGDISPLAY + (ie StackFrameIterator). + 2) Add a new DebuggerREGDISPLAY type that can hold local or remote addresses, and then create + parallel DAC stackwalking code that uses it. This is a bunch of work and + has higher maintenance cost to keep both code paths operational and functionally identical. + 3) Allocate space in debuggee that will be used to stash the registers when doing a debug stackwalk - + increases runtime working set for debug only scenario and won't work for dumps + 4) Same as #3, but don't actually allocate the space at runtime, just simulate that it was allocated + within the debugger - risk of colliding with real runtime allocations, adds complexity to the + DAC. + + #4 seems the best option to me, so we wound up here. +*/ + +// This address is picked to be very unlikely to collide with any real memory usage in the target +#define SIMULATED_DEBUGGEE_MEMORY_BASE_ADDRESS ((TADDR) -1024) +// The byte at ((TADDR)-1) isn't addressable at all, so we only have 1023 bytes of usable space +// At the moment we only need 256 bytes at most. +#define SIMULATED_DEBUGGEE_MEMORY_MAX_SIZE 1023 + +// Sets the simulated debuggee memory region, or clears it if pSimulatedDebuggeeMemory = NULL +// See large comment above for more details. +void SetSimulatedDebuggeeMemory(void* pSimulatedDebuggeeMemory, UInt32 cbSimulatedDebuggeeMemory); + +void* DacInstantiateTypeByAddress(TADDR addr, UInt32 size, bool throwEx); +void* DacInstantiateTypeByAddressNoReport(TADDR addr, UInt32 size, bool throwEx); +void* DacInstantiateClassByVTable(TADDR addr, UInt32 minSize, bool throwEx); + +// This method should not be used casually. Make sure simulatedTargetAddr does not cause collisions. See comment in dacfn.cpp for more details. +void* DacInstantiateTypeAtSimulatedAddress(TADDR simulatedTargetAddr, UInt32 size, void* pLocalBuffer, bool throwEx); + +// Copy a null-terminated ascii or unicode string from the target to the host. +// Note that most of the work here is to find the null terminator. If you know the exact length, +// then you can also just call DacInstantiateTypebyAddress. +char* DacInstantiateStringA(TADDR addr, UInt32 maxChars, bool throwEx); +wchar_t* DacInstantiateStringW(TADDR addr, UInt32 maxChars, bool throwEx); + +TADDR DacGetTargetAddrForHostAddr(const void* ptr, bool throwEx); +TADDR DacGetTargetAddrForHostInteriorAddr(const void* ptr, bool throwEx); +TADDR DacGetTargetVtForHostVt(const void* vtHost, bool throwEx); +wchar_t* DacGetVtNameW(TADDR targetVtable); + +// Report a region of memory to the debugger +void DacEnumMemoryRegion(TADDR addr, TSIZE_T size, bool fExpectSuccess = true); + +HRESULT DacWriteHostInstance(void * host, bool throwEx); + +#ifdef DAC_CLR_ENVIRONMENT + +// Occasionally it's necessary to allocate some host memory for +// instance data that's created on the fly and so doesn't directly +// correspond to target memory. These are held and freed on flush +// like other instances but can't be looked up by address. +PVOID DacAllocHostOnlyInstance(ULONG32 size, bool throwEx); + +// Determines whether ASSERTs should be raised when inconsistencies in the target are detected +bool DacTargetConsistencyAssertsEnabled(); + +// Host instances can be marked as they are enumerated in +// order to break cycles. This function returns true if +// the instance is already marked, otherwise it marks the +// instance and returns false. +bool DacHostPtrHasEnumMark(LPCVOID host); + +// Determines if EnumMemoryRegions has been called on a method descriptor. +// This helps perf for minidumps of apps with large managed stacks. +bool DacHasMethodDescBeenEnumerated(LPCVOID pMD); + +// Sets a flag indicating that EnumMemoryRegions on a method desciptor +// has been successfully called. The function returns true if +// this flag had been previously set. +bool DacSetMethodDescEnumerated(LPCVOID pMD); + +// Determines if a method descriptor is valid +BOOL DacValidateMD(LPCVOID pMD); + +// Enumerate the instructions around a call site to help debugger stack walking heuristics +void DacEnumCodeForStackwalk(TADDR taCallEnd); + +// Given the address and the size of a memory range which is stored in the buffer, replace all the patches +// in the buffer with the real opcodes. This is especially important on X64 where the unwinder needs to +// disassemble the native instructions. +class MemoryRange; +HRESULT DacReplacePatchesInHostMemory(MemoryRange range, PVOID pBuffer); + +// +// Convenience macros for EnumMemoryRegions implementations. +// + +// Enumerate the given host instance and return +// true if the instance hasn't already been enumerated. +#define DacEnumHostDPtrMem(host) \ + (!DacHostPtrHasEnumMark(host) ? \ + (DacEnumMemoryRegion(PTR_HOST_TO_TADDR(host), sizeof(*host)), \ + true) : false) +#define DacEnumHostSPtrMem(host, type) \ + (!DacHostPtrHasEnumMark(host) ? \ + (DacEnumMemoryRegion(PTR_HOST_TO_TADDR(host), \ + type::DacSize(PTR_HOST_TO_TADDR(host))), \ + true) : false) +#define DacEnumHostVPtrMem(host) \ + (!DacHostPtrHasEnumMark(host) ? \ + (DacEnumMemoryRegion(PTR_HOST_TO_TADDR(host), (host)->VPtrSize()), \ + true) : false) + +// Check enumeration of 'this' and return if this has already been +// enumerated. Making this the first line of an object's EnumMemoryRegions +// method will prevent cycles. +#define DAC_CHECK_ENUM_THIS() \ + if (DacHostPtrHasEnumMark(this)) return +#define DAC_ENUM_DTHIS() \ + if (!DacEnumHostDPtrMem(this)) return +#define DAC_ENUM_STHIS(type) \ + if (!DacEnumHostSPtrMem(this, type)) return +#define DAC_ENUM_VTHIS() \ + if (!DacEnumHostVPtrMem(this)) return + +#endif // DAC_CLR_ENVIRONMENT + +#ifdef __cplusplus +} + +// +// Computes (taBase + (dwIndex * dwElementSize()), with overflow checks. +// +// Arguments: +// taBase the base TADDR value +// dwIndex the index of the offset +// dwElementSize the size of each element (to multiply the offset by) +// +// Return value: +// The resulting TADDR, or throws CORDB_E_TARGET_INCONSISTENT on overlow. +// +// Notes: +// The idea here is that overflows during address arithmetic suggest that we're operating on corrupt +// pointers. It helps to improve reliability to detect the cases we can (like overflow) and fail. Note +// that this is just a heuristic, not a security measure. We can't trust target data regardless - +// failing on overflow is just one easy case of corruption to detect. There is no need to use checked +// arithmetic everywhere in the DAC infrastructure, this is intended just for the places most likely to +// help catch bugs (eg. __DPtr::operator[]). +// +inline TADDR DacTAddrOffset( TADDR taBase, TSIZE_T dwIndex, TSIZE_T dwElementSize ) +{ +#ifdef DAC_CLR_ENVIRONMENT + ClrSafeInt t(taBase); + t += ClrSafeInt(dwIndex) * ClrSafeInt(dwElementSize); + if( t.IsOverflow() ) + { + // Pointer arithmetic overflow - probably due to corrupt target data + //DacError(CORDBG_E_TARGET_INCONSISTENT); + DacError(E_FAIL); + } + return t.Value(); +#else // TODO: port safe math + return taBase + (dwIndex*dwElementSize); +#endif +} + +// Base pointer wrapper which provides common behavior. +class __TPtrBase +{ +public: + __TPtrBase() + { + // Make uninitialized pointers obvious. + m_addr = (TADDR)-1; + } + explicit __TPtrBase(TADDR addr) + { + m_addr = addr; + } + + bool operator!() const + { + return m_addr == 0; + } + // We'd like to have an implicit conversion to bool here since the C++ + // standard says all pointer types are implicitly converted to bool. + // Unfortunately, that would cause ambiguous overload errors for uses + // of operator== and operator!=. Instead callers will have to compare + // directly against NULL. + + bool operator==(TADDR addr) const + { + return m_addr == addr; + } + bool operator!=(TADDR addr) const + { + return m_addr != addr; + } + bool operator<(TADDR addr) const + { + return m_addr < addr; + } + bool operator>(TADDR addr) const + { + return m_addr > addr; + } + bool operator<=(TADDR addr) const + { + return m_addr <= addr; + } + bool operator>=(TADDR addr) const + { + return m_addr >= addr; + } + + TADDR GetAddr(void) const + { + return m_addr; + } + TADDR SetAddr(TADDR addr) + { + m_addr = addr; + return addr; + } + +protected: + TADDR m_addr; +}; + +// Adds comparison operations +// Its possible we just want to merge these into __TPtrBase, but SPtr isn't comparable with +// other types right now and I would rather stay conservative +class __ComparableTPtrBase : public __TPtrBase +{ +protected: + __ComparableTPtrBase(void) : __TPtrBase() + {} + + explicit __ComparableTPtrBase(TADDR addr) : __TPtrBase(addr) + {} + +public: + bool operator==(const __ComparableTPtrBase& ptr) const + { + return m_addr == ptr.m_addr; + } + bool operator!=(const __ComparableTPtrBase& ptr) const + { + return !operator==(ptr); + } + bool operator<(const __ComparableTPtrBase& ptr) const + { + return m_addr < ptr.m_addr; + } + bool operator>(const __ComparableTPtrBase& ptr) const + { + return m_addr > ptr.m_addr; + } + bool operator<=(const __ComparableTPtrBase& ptr) const + { + return m_addr <= ptr.m_addr; + } + bool operator>=(const __ComparableTPtrBase& ptr) const + { + return m_addr >= ptr.m_addr; + } +}; + +// Pointer wrapper base class for various forms of normal data. +// This has the common functionality between __DPtr and __ArrayDPtr. +// The DPtrType type parameter is the actual derived type in use. This is necessary so that +// inhereted functions preserve exact return types. +template +class __DPtrBase : public __ComparableTPtrBase +{ +public: + typedef type _Type; + typedef type* _Ptr; + +protected: + // Constructors + // All protected - this type should not be used directly - use one of the derived types instead. + __DPtrBase< type, DPtrType >(void) : __ComparableTPtrBase() + {} + + explicit __DPtrBase< type, DPtrType >(TADDR addr) : __ComparableTPtrBase(addr) + {} + + explicit __DPtrBase(__TPtrBase addr) + { + m_addr = addr.GetAddr(); + } + explicit __DPtrBase(type const * host) + { + m_addr = DacGetTargetAddrForHostAddr(host, true); + } + +public: + DPtrType& operator=(const __TPtrBase& ptr) + { + m_addr = ptr.GetAddr(); + return DPtrType(m_addr); + } + DPtrType& operator=(TADDR addr) + { + m_addr = addr; + return DPtrType(m_addr); + } + + type& operator*(void) const + { + return *(type*)DacInstantiateTypeByAddress(m_addr, sizeof(type), true); + } + + + using __ComparableTPtrBase::operator==; + using __ComparableTPtrBase::operator!=; + using __ComparableTPtrBase::operator<; + using __ComparableTPtrBase::operator>; + using __ComparableTPtrBase::operator<=; + using __ComparableTPtrBase::operator>=; + bool operator==(TADDR addr) const + { + return m_addr == addr; + } + bool operator!=(TADDR addr) const + { + return m_addr != addr; + } + + // Array index operator + // we want an operator[] for all possible numeric types (rather than rely on + // implicit numeric conversions on the argument) to prevent ambiguity with + // DPtr's implicit conversion to type* and the built-in operator[]. + // @dbgtodo rbyers: we could also use this technique to simplify other operators below. + template + type& operator[](indexType index) + { + // Compute the address of the element. + TADDR elementAddr; + if( index >= 0 ) + { + elementAddr = DacTAddrOffset(m_addr, index, sizeof(type)); + } + else + { + // Don't bother trying to do overflow checking for negative indexes - they are rare compared to + // positive ones. ClrSafeInt doesn't support signed datatypes yet (although we should be able to add it + // pretty easily). + elementAddr = m_addr + index * sizeof(type); + } + + // Marshal over a single instance and return a reference to it. + return *(type*) DacInstantiateTypeByAddress(elementAddr, sizeof(type), true); + } + + template + type const & operator[](indexType index) const + { + return (*const_cast<__DPtrBase*>(this))[index]; + } + + //------------------------------------------------------------------------- + // operator+ + + DPtrType operator+(unsigned short val) + { + return DPtrType(DacTAddrOffset(m_addr, val, sizeof(type))); + } + DPtrType operator+(short val) + { + return DPtrType(m_addr + val * sizeof(type)); + } + // size_t is unsigned int on Win32, so we need + // to ifdef here to make sure the unsigned int + // and size_t overloads don't collide. size_t + // is marked __w64 so a simple unsigned int + // will not work on Win32, it has to be size_t. + DPtrType operator+(size_t val) + { + return DPtrType(DacTAddrOffset(m_addr, val, sizeof(type))); + } +#if (!defined (HOST_X86) && !defined(_SPARC_) && !defined(HOST_ARM)) || (defined(HOST_X86) && defined(__APPLE__)) + DPtrType operator+(unsigned int val) + { + return DPtrType(DacTAddrOffset(m_addr, val, sizeof(type))); + } +#endif // (!defined (HOST_X86) && !defined(_SPARC_) && !defined(HOST_ARM)) || (defined(HOST_X86) && defined(__APPLE__)) + DPtrType operator+(int val) + { + return DPtrType(m_addr + val * sizeof(type)); + } +#ifndef TARGET_UNIX // for now, everything else is 32 bit + DPtrType operator+(unsigned long val) + { + return DPtrType(DacTAddrOffset(m_addr, val, sizeof(type))); + } + DPtrType operator+(long val) + { + return DPtrType(m_addr + val * sizeof(type)); + } +#endif // !TARGET_UNIX // for now, everything else is 32 bit +#if !defined(HOST_ARM) && !defined(HOST_X86) + DPtrType operator+(IntNative val) + { + return DPtrType(m_addr + val * sizeof(type)); + } +#endif + + //------------------------------------------------------------------------- + // operator- + + DPtrType operator-(unsigned short val) + { + return DPtrType(m_addr - val * sizeof(type)); + } + DPtrType operator-(short val) + { + return DPtrType(m_addr - val * sizeof(type)); + } + // size_t is unsigned int on Win32, so we need + // to ifdef here to make sure the unsigned int + // and size_t overloads don't collide. size_t + // is marked __w64 so a simple unsigned int + // will not work on Win32, it has to be size_t. + DPtrType operator-(size_t val) + { + return DPtrType(m_addr - val * sizeof(type)); + } + DPtrType operator-(signed __int64 val) + { + return DPtrType(m_addr - val * sizeof(type)); + } +#if !defined (HOST_X86) && !defined(_SPARC_) && !defined(HOST_ARM) + DPtrType operator-(unsigned int val) + { + return DPtrType(m_addr - val * sizeof(type)); + } +#endif // !defined (HOST_X86) && !defined(_SPARC_) && !defined(HOST_ARM) + DPtrType operator-(int val) + { + return DPtrType(m_addr - val * sizeof(type)); + } +#ifdef _MSC_VER // for now, everything else is 32 bit + DPtrType operator-(unsigned long val) + { + return DPtrType(m_addr - val * sizeof(type)); + } + DPtrType operator-(long val) + { + return DPtrType(m_addr - val * sizeof(type)); + } +#endif // _MSC_VER // for now, everything else is 32 bit + size_t operator-(const DPtrType& val) + { + return (size_t)((m_addr - val.m_addr) / sizeof(type)); + } + + //------------------------------------------------------------------------- + + DPtrType& operator+=(size_t val) + { + m_addr += val * sizeof(type); + return static_cast(*this); + } + DPtrType& operator-=(size_t val) + { + m_addr -= val * sizeof(type); + return static_cast(*this); + } + + DPtrType& operator++() + { + m_addr += sizeof(type); + return static_cast(*this); + } + DPtrType& operator--() + { + m_addr -= sizeof(type); + return static_cast(*this); + } + DPtrType operator++(int postfix) + { + UNREFERENCED_PARAMETER(postfix); + DPtrType orig = DPtrType(*this); + m_addr += sizeof(type); + return orig; + } + DPtrType operator--(int postfix) + { + UNREFERENCED_PARAMETER(postfix); + DPtrType orig = DPtrType(*this); + m_addr -= sizeof(type); + return orig; + } + + bool IsValid(void) const + { + return m_addr && + DacInstantiateTypeByAddress(m_addr, sizeof(type), + false) != NULL; + } + void EnumMem(void) const + { + DacEnumMemoryRegion(m_addr, sizeof(type)); + } +}; + +// Pointer wrapper for objects which are just plain data +// and need no special handling. +template +class __DPtr : public __DPtrBase > +{ +#ifdef __GNUC__ +protected: + //there seems to be a bug in GCC's inference logic. It can't find m_addr. + using __DPtrBase >::m_addr; +#endif // __GNUC__ +public: + // constructors - all chain to __DPtrBase constructors + __DPtr< type >(void) : __DPtrBase >() {} + __DPtr< type >(TADDR addr) : __DPtrBase >(addr) {} + + // construct const from non-const + typedef typename type_traits::remove_const::type mutable_type; + __DPtr< type >(__DPtr const & rhs) : __DPtrBase >(rhs.GetAddr()) {} + + explicit __DPtr< type >(__TPtrBase addr) : __DPtrBase >(addr) {} + explicit __DPtr< type >(type const * host) : __DPtrBase >(host) {} + + operator type*() const + { + return (type*)DacInstantiateTypeByAddress(m_addr, sizeof(type), true); + } + type* operator->() const + { + return (type*)DacInstantiateTypeByAddress(m_addr, sizeof(type), true); + } +}; + +#define DPTR(type) __DPtr< type > + +// A restricted form of DPtr that doesn't have any conversions to pointer types. +// This is useful for pointer types that almost always represent arrays, as opposed +// to pointers to single instances (eg. PTR_BYTE). In these cases, allowing implicit +// conversions to (for eg.) BYTE* would usually result in incorrect usage (eg. pointer +// arithmetic and array indexing), since only a single instance has been marshalled to the host. +// If you really must marshal a single instance (eg. converting T* to PTR_T is too painful for now), +// then use code:DacUnsafeMarshalSingleElement so we can identify such unsafe code. +template +class __ArrayDPtr : public __DPtrBase > +{ +public: + // constructors - all chain to __DPtrBase constructors + __ArrayDPtr< type >(void) : __DPtrBase >() {} + __ArrayDPtr< type >(TADDR addr) : __DPtrBase >(addr) {} + + // construct const from non-const + typedef typename type_traits::remove_const::type mutable_type; + __ArrayDPtr< type >(__ArrayDPtr const & rhs) : __DPtrBase >(rhs.GetAddr()) {} + + explicit __ArrayDPtr< type >(__TPtrBase addr) : __DPtrBase >(addr) {} + + // Note that there is also no explicit constructor from host instances (type*). + // Going this direction is less problematic, but often still represents risky coding. +}; + +#define ArrayDPTR(type) __ArrayDPtr< type > + + +// Pointer wrapper for objects which are just plain data +// but whose size is not the same as the base type size. +// This can be used for prefetching data for arrays or +// for cases where an object has a variable size. +template +class __SPtr : public __TPtrBase +{ +public: + typedef type _Type; + typedef type* _Ptr; + + __SPtr< type >(void) : __TPtrBase() {} + __SPtr< type >(TADDR addr) : __TPtrBase(addr) {} + explicit __SPtr< type >(__TPtrBase addr) + { + m_addr = addr.GetAddr(); + } + explicit __SPtr< type >(type* host) + { + m_addr = DacGetTargetAddrForHostAddr(host, true); + } + + __SPtr< type >& operator=(const __TPtrBase& ptr) + { + m_addr = ptr.m_addr; + return *this; + } + __SPtr< type >& operator=(TADDR addr) + { + m_addr = addr; + return *this; + } + + operator type*() const + { + if (m_addr) + { + return (type*)DacInstantiateTypeByAddress(m_addr, + type::DacSize(m_addr), + true); + } + else + { + return (type*)NULL; + } + } + type* operator->() const + { + if (m_addr) + { + return (type*)DacInstantiateTypeByAddress(m_addr, + type::DacSize(m_addr), + true); + } + else + { + return (type*)NULL; + } + } + type& operator*(void) const + { + if (!m_addr) + { + DacError(E_INVALIDARG); + } + + return *(type*)DacInstantiateTypeByAddress(m_addr, + type::DacSize(m_addr), + true); + } + + bool IsValid(void) const + { + return m_addr && + DacInstantiateTypeByAddress(m_addr, type::DacSize(m_addr), + false) != NULL; + } + void EnumMem(void) const + { + if (m_addr) + { + DacEnumMemoryRegion(m_addr, type::DacSize(m_addr)); + } + } +}; + +#define SPTR(type) __SPtr< type > + +// Pointer wrapper for objects which have a single leading +// vtable, such as objects in a single-inheritance tree. +// The base class of all such trees must have use +// VPTR_BASE_VTABLE_CLASS in their declaration and all +// instantiable members of the tree must be listed in vptr_list.h. +template +class __VPtr : public __TPtrBase +{ +public: + // VPtr::_Type has to be a pointer as + // often the type is an abstract class. + // This type is not expected to be used anyway. + typedef type* _Type; + typedef type* _Ptr; + + __VPtr< type >(void) : __TPtrBase() {} + __VPtr< type >(TADDR addr) : __TPtrBase(addr) {} + explicit __VPtr< type >(__TPtrBase addr) + { + m_addr = addr.GetAddr(); + } + explicit __VPtr< type >(type* host) + { + m_addr = DacGetTargetAddrForHostAddr(host, true); + } + + __VPtr< type >& operator=(const __TPtrBase& ptr) + { + m_addr = ptr.m_addr; + return *this; + } + __VPtr< type >& operator=(TADDR addr) + { + m_addr = addr; + return *this; + } + + operator type*() const + { + return (type*)DacInstantiateClassByVTable(m_addr, sizeof(type), true); + } + type* operator->() const + { + return (type*)DacInstantiateClassByVTable(m_addr, sizeof(type), true); + } + + bool operator==(const __VPtr< type >& ptr) const + { + return m_addr == ptr.m_addr; + } + bool operator==(TADDR addr) const + { + return m_addr == addr; + } + bool operator!=(const __VPtr< type >& ptr) const + { + return !operator==(ptr); + } + bool operator!=(TADDR addr) const + { + return m_addr != addr; + } + + bool IsValid(void) const + { + return m_addr && + DacInstantiateClassByVTable(m_addr, sizeof(type), false) != NULL; + } + void EnumMem(void) const + { + if (IsValid()) + { + DacEnumMemoryRegion(m_addr, (operator->())->VPtrSize()); + } + } +}; + +#define VPTR(type) __VPtr< type > + +// Pointer wrapper for 8-bit strings. +#ifdef DAC_CLR_ENVIRONMENT +template +#else +template +#endif +class __Str8Ptr : public __DPtr +{ +public: + typedef type _Type; + typedef type* _Ptr; + + __Str8Ptr< type, maxChars >(void) : __DPtr() {} + __Str8Ptr< type, maxChars >(TADDR addr) : __DPtr(addr) {} + explicit __Str8Ptr< type, maxChars >(__TPtrBase addr) + { + m_addr = addr.GetAddr(); + } + explicit __Str8Ptr< type, maxChars >(type* host) + { + m_addr = DacGetTargetAddrForHostAddr(host, true); + } + + __Str8Ptr< type, maxChars >& operator=(const __TPtrBase& ptr) + { + m_addr = ptr.m_addr; + return *this; + } + __Str8Ptr< type, maxChars >& operator=(TADDR addr) + { + m_addr = addr; + return *this; + } + + operator type*() const + { + return (type*)DacInstantiateStringA(m_addr, maxChars, true); + } + + bool IsValid(void) const + { + return m_addr && + DacInstantiateStringA(m_addr, maxChars, false) != NULL; + } + void EnumMem(void) const + { + char* str = DacInstantiateStringA(m_addr, maxChars, false); + if (str) + { + DacEnumMemoryRegion(m_addr, strlen(str) + 1); + } + } +}; + +#define S8PTR(type) __Str8Ptr< type > +#define S8PTRMAX(type, maxChars) __Str8Ptr< type, maxChars > + +// Pointer wrapper for 16-bit strings. +#ifdef DAC_CLR_ENVIRONMENT +template +#else +template +#endif +class __Str16Ptr : public __DPtr +{ +public: + typedef type _Type; + typedef type* _Ptr; + + __Str16Ptr< type, maxChars >(void) : __DPtr() {} + __Str16Ptr< type, maxChars >(TADDR addr) : __DPtr(addr) {} + explicit __Str16Ptr< type, maxChars >(__TPtrBase addr) + { + m_addr = addr.GetAddr(); + } + explicit __Str16Ptr< type, maxChars >(type* host) + { + m_addr = DacGetTargetAddrForHostAddr(host, true); + } + + __Str16Ptr< type, maxChars >& operator=(const __TPtrBase& ptr) + { + m_addr = ptr.m_addr; + return *this; + } + __Str16Ptr< type, maxChars >& operator=(TADDR addr) + { + m_addr = addr; + return *this; + } + + operator type*() const + { + return (type*)DacInstantiateStringW(m_addr, maxChars, true); + } + + bool IsValid(void) const + { + return m_addr && + DacInstantiateStringW(m_addr, maxChars, false) != NULL; + } + void EnumMem(void) const + { + char* str = DacInstantiateStringW(m_addr, maxChars, false); + if (str) + { + DacEnumMemoryRegion(m_addr, strlen(str) + 1); + } + } +}; + +#define S16PTR(type) __Str16Ptr< type > +#define S16PTRMAX(type, maxChars) __Str16Ptr< type, maxChars > + +template +class __GlobalVal +{ +public: +#ifdef DAC_CLR_ENVIRONMENT + __GlobalVal< type >(PULONG rvaPtr) +#else + __GlobalVal< type >(UInt32* rvaPtr) +#endif + { + m_rvaPtr = rvaPtr; + } + + operator type() const + { + return (type)*__DPtr< type >(DacGlobalBase() + *m_rvaPtr); + } + + __DPtr< type > operator&() const + { + return __DPtr< type >(DacGlobalBase() + *m_rvaPtr); + } + + // @dbgtodo rbyers dac support: This updates values in the host. This seems extremely dangerous + // to do silently. I'd prefer that a specific (searchable) write function + // was used. Try disabling this and see what fails... + type & operator=(type & val) + { + type* ptr = __DPtr< type >(DacGlobalBase() + *m_rvaPtr); + // Update the host copy; + *ptr = val; + // Write back to the target. + DacWriteHostInstance(ptr, true); + return val; + } + + bool IsValid(void) const + { + return __DPtr< type >(DacGlobalBase() + *m_rvaPtr).IsValid(); + } + void EnumMem(void) const + { + TADDR p = DacGlobalBase() + *m_rvaPtr; + __DPtr< type >(p).EnumMem(); + } + +private: +#ifdef DAC_CLR_ENVIRONMENT + PULONG m_rvaPtr; +#else + UInt32* m_rvaPtr; +#endif +}; + +template +class __GlobalArray +{ +public: +#ifdef DAC_CLR_ENVIRONMENT + __GlobalArray< type, size >(PULONG rvaPtr) +#else + __GlobalArray< type, size >(UInt32* rvaPtr) +#endif + { + m_rvaPtr = rvaPtr; + } + + __DPtr< type > operator&() const + { + return __DPtr< type >(DacGlobalBase() + *m_rvaPtr); + } + + type& operator[](unsigned int index) const + { + return __DPtr< type >(DacGlobalBase() + *m_rvaPtr)[index]; + } + + bool IsValid(void) const + { + // Only validates the base pointer, not the full array range. + return __DPtr< type >(DacGlobalBase() + *m_rvaPtr).IsValid(); + } + void EnumMem(void) const + { + DacEnumMemoryRegion(DacGlobalBase() + *m_rvaPtr, sizeof(type) * size); + } + +private: +#ifdef DAC_CLR_ENVIRONMENT + PULONG m_rvaPtr; +#else + UInt32* m_rvaPtr; +#endif +}; + +template +class __GlobalPtr +{ +public: +#ifdef DAC_CLR_ENVIRONMENT + __GlobalPtr< acc_type, store_type >(PULONG rvaPtr) +#else + __GlobalPtr< acc_type, store_type >(UInt32* rvaPtr) +#endif + { + m_rvaPtr = rvaPtr; + } + + __DPtr< store_type > operator&() const + { + return __DPtr< store_type >(DacGlobalBase() + *m_rvaPtr); + } + + store_type & operator=(store_type & val) + { + store_type* ptr = __DPtr< store_type >(DacGlobalBase() + *m_rvaPtr); + // Update the host copy; + *ptr = val; + // Write back to the target. + DacWriteHostInstance(ptr, true); + return val; + } + + acc_type operator->() const + { + return (acc_type)*__DPtr< store_type >(DacGlobalBase() + *m_rvaPtr); + } + operator acc_type() const + { + return (acc_type)*__DPtr< store_type >(DacGlobalBase() + *m_rvaPtr); + } + operator store_type() const + { + return *__DPtr< store_type >(DacGlobalBase() + *m_rvaPtr); + } + bool operator!() const + { + return !*__DPtr< store_type >(DacGlobalBase() + *m_rvaPtr); + } + + typename store_type operator[](int index) const + { + return (*__DPtr< store_type >(DacGlobalBase() + *m_rvaPtr))[index]; + } + + typename store_type operator[](unsigned int index) const + { + return (*__DPtr< store_type >(DacGlobalBase() + *m_rvaPtr))[index]; + } + + TADDR GetAddr() const + { + return (*__DPtr< store_type >(DacGlobalBase() + *m_rvaPtr)).GetAddr(); + } + + TADDR GetAddrRaw () const + { + return DacGlobalBase() + *m_rvaPtr; + } + + // This is only testing the the pointer memory is available but does not verify + // the memory that it points to. + // + bool IsValidPtr(void) const + { + return __DPtr< store_type >(DacGlobalBase() + *m_rvaPtr).IsValid(); + } + + bool IsValid(void) const + { + return __DPtr< store_type >(DacGlobalBase() + *m_rvaPtr).IsValid() && + (*__DPtr< store_type >(DacGlobalBase() + *m_rvaPtr)).IsValid(); + } + void EnumMem(void) const + { + __DPtr< store_type > ptr(DacGlobalBase() + *m_rvaPtr); + ptr.EnumMem(); + if (ptr.IsValid()) + { + (*ptr).EnumMem(); + } + } + +#ifdef DAC_CLR_ENVIRONMENT + PULONG m_rvaPtr; +#else + UInt32* m_rvaPtr; +#endif +}; + +template +inline bool operator==(const __GlobalPtr& gptr, + acc_type host) +{ + return DacGetTargetAddrForHostAddr(host, true) == + *__DPtr< TADDR >(DacGlobalBase() + *gptr.m_rvaPtr); +} +template +inline bool operator!=(const __GlobalPtr& gptr, + acc_type host) +{ + return !operator==(gptr, host); +} + +template +inline bool operator==(acc_type host, + const __GlobalPtr& gptr) +{ + return DacGetTargetAddrForHostAddr(host, true) == + *__DPtr< TADDR >(DacGlobalBase() + *gptr.m_rvaPtr); +} +template +inline bool operator!=(acc_type host, + const __GlobalPtr& gptr) +{ + return !operator==(host, gptr); +} + + +// +// __VoidPtr is a type that behaves like void* but for target pointers. +// Behavior of PTR_VOID: +// * has void* semantics. Will compile to void* in non-DAC builds (just like +// other PTR types. Unlike TADDR, we want pointer semantics. +// * NOT assignable from host pointer types or convertible to host pointer +// types - ensures we can't confuse host and target pointers (we'll get +// compiler errors if we try and cast between them). +// * like void*, no pointer arithmetic or dereferencing is allowed +// * like TADDR, can be used to construct any __DPtr / __VPtr instance +// * representation is the same as a void* (for marshalling / casting) +// +// One way in which __VoidPtr is unlike void* is that it can't be cast to +// pointer or integer types. On the one hand, this is a good thing as it forces +// us to keep target pointers separate from other data types. On the other hand +// in practice this means we have to use dac_cast in places where we used +// to use a (TADDR) cast. Unfortunately C++ provides us no way to allow the +// explicit cast to primitive types without also allowing implicit conversions. +// +// This is very similar in spirit to TADDR. The primary difference is that +// PTR_VOID has pointer semantics, where TADDR has integer semantics. When +// dacizing uses of void* to TADDR, casts must be inserted everywhere back to +// pointer types. If we switch a use of TADDR to PTR_VOID, those casts in +// DACCESS_COMPILE regions no longer compile (see above). Also, TADDR supports +// pointer arithmetic, but that might not be necessary (could use PTR_BYTE +// instead etc.). Ideally we'd probably have just one type for this purpose +// (named TADDR but with the semantics of PTR_VOID), but outright conversion +// would require too much work. +// + +template <> +class __DPtr : public __ComparableTPtrBase +{ +public: + __DPtr(void) : __ComparableTPtrBase() {} + __DPtr(TADDR addr) : __ComparableTPtrBase(addr) {} + + // Note, unlike __DPtr, this ctor form is not explicit. We allow implicit + // conversions from any pointer type (just like for void*). + __DPtr(__TPtrBase addr) + { + m_addr = addr.GetAddr(); + } + + // Like TPtrBase, VoidPtrs can also be created impicitly from all GlobalPtrs + template + __DPtr(__GlobalPtr globalPtr) + { + m_addr = globalPtr.GetAddr(); + } + + // Note, unlike __DPtr, there is no explicit conversion from host pointer + // types. Since void* cannot be marshalled, there is no such thing as + // a void* DAC instance in the host. + + // Also, we don't want an implicit conversion to TADDR because then the + // compiler will allow pointer arithmetic (which it wouldn't allow for + // void*). Instead, callers can use dac_cast if they want. + + // Note, unlike __DPtr, any pointer type can be assigned to a __DPtr + // This is to mirror the assignability of any pointer type to a void* + __DPtr& operator=(const __TPtrBase& ptr) + { + m_addr = ptr.GetAddr(); + return *this; + } + __DPtr& operator=(TADDR addr) + { + m_addr = addr; + return *this; + } + + // note, no marshalling operators (type* conversion, operator ->, operator*) + // A void* can't be marshalled because we don't know how much to copy + + // PTR_Void can be compared to any other pointer type (because conceptually, + // any other pointer type should be implicitly convertible to void*) + using __ComparableTPtrBase::operator==; + using __ComparableTPtrBase::operator!=; + using __ComparableTPtrBase::operator<; + using __ComparableTPtrBase::operator>; + using __ComparableTPtrBase::operator<=; + using __ComparableTPtrBase::operator>=; + bool operator==(TADDR addr) const + { + return m_addr == addr; + } + bool operator!=(TADDR addr) const + { + return m_addr != addr; + } +}; + +typedef __DPtr __VoidPtr; +typedef __VoidPtr PTR_VOID; +typedef DPTR(PTR_VOID) PTR_PTR_VOID; + +// For now we treat pointers to const and non-const void the same in DAC +// builds. In general, DAC is read-only anyway and so there isn't a danger of +// writing to these pointers. Also, the non-dac builds will ensure +// const-correctness. However, if we wanted to support true void* / const void* +// behavior, we could probably build the follow functionality by templating +// __VoidPtr: +// * A PTR_VOID would be implicitly convertable to PTR_CVOID +// * An explicit coercion (ideally const_cast) would be required to convert a +// PTR_CVOID to a PTR_VOID +// * Similarily, an explicit coercion would be required to convert a cost PTR +// type (eg. PTR_CBYTE) to a PTR_VOID. +typedef __VoidPtr PTR_CVOID; + + +// The special empty ctor declared here allows the whole +// class hierarchy to be instantiated easily by the +// external access code. The actual class body will be +// read externally so no members should be initialized. + +// Safe access for retrieving the target address of a PTR. +#define PTR_TO_TADDR(ptr) ((ptr).GetAddr()) + +#define GFN_TADDR(name) (DacGlobalBase() + g_dacGlobals.fn__ ## name) + +// ROTORTODO - g++ 3 doesn't like the use of the operator& in __GlobalVal +// here. Putting GVAL_ADDR in to get things to compile while I discuss +// this matter with the g++ authors. + +#define GVAL_ADDR(g) \ + ((g).operator&()) + +// +// References to class static and global data. +// These all need to be redirected through the global +// data table. +// + +#define _SPTR_DECL(acc_type, store_type, var) \ + static __GlobalPtr< acc_type, store_type > var +#define _SPTR_IMPL(acc_type, store_type, cls, var) \ + __GlobalPtr< acc_type, store_type > cls::var(&g_dacGlobals.cls##__##var) +#define _SPTR_IMPL_INIT(acc_type, store_type, cls, var, init) \ + __GlobalPtr< acc_type, store_type > cls::var(&g_dacGlobals.cls##__##var) +#define _SPTR_IMPL_NS(acc_type, store_type, ns, cls, var) \ + __GlobalPtr< acc_type, store_type > cls::var(&g_dacGlobals.ns##__##cls##__##var) +#define _SPTR_IMPL_NS_INIT(acc_type, store_type, ns, cls, var, init) \ + __GlobalPtr< acc_type, store_type > cls::var(&g_dacGlobals.ns##__##cls##__##var) + +#define _GPTR_DECL(acc_type, store_type, var) \ + extern __GlobalPtr< acc_type, store_type > var +#define _GPTR_IMPL(acc_type, store_type, var) \ + __GlobalPtr< acc_type, store_type > var(&g_dacGlobals.dac__##var) +#define _GPTR_IMPL_INIT(acc_type, store_type, var, init) \ + __GlobalPtr< acc_type, store_type > var(&g_dacGlobals.dac__##var) + +#define SVAL_DECL(type, var) \ + static __GlobalVal< type > var +#define SVAL_IMPL(type, cls, var) \ + __GlobalVal< type > cls::var(&g_dacGlobals.cls##__##var) +#define SVAL_IMPL_INIT(type, cls, var, init) \ + __GlobalVal< type > cls::var(&g_dacGlobals.cls##__##var) +#define SVAL_IMPL_NS(type, ns, cls, var) \ + __GlobalVal< type > cls::var(&g_dacGlobals.ns##__##cls##__##var) +#define SVAL_IMPL_NS_INIT(type, ns, cls, var, init) \ + __GlobalVal< type > cls::var(&g_dacGlobals.ns##__##cls##__##var) + +#define GVAL_DECL(type, var) \ + extern __GlobalVal< type > var +#define GVAL_IMPL(type, var) \ + __GlobalVal< type > var(&g_dacGlobals.dac__##var) +#define GVAL_IMPL_INIT(type, var, init) \ + __GlobalVal< type > var(&g_dacGlobals.dac__##var) + +#define GARY_DECL(type, var, size) \ + extern __GlobalArray< type, size > var +#define GARY_IMPL(type, var, size) \ + __GlobalArray< type, size > var(&g_dacGlobals.dac__##var) + +// Translation from a host pointer back to the target address +// that was used to retrieve the data for the host pointer. +#define PTR_HOST_TO_TADDR(host) DacGetTargetAddrForHostAddr(host, true) + +// Translation from a host interior pointer back to the corresponding +// target address. The host address must reside within a previously +// retrieved instance. +#define PTR_HOST_INT_TO_TADDR(host) DacGetTargetAddrForHostInteriorAddr(host, true) + +// Construct a pointer to a member of the given type. +#define PTR_HOST_MEMBER_TADDR(type, host, memb) \ + (PTR_HOST_TO_TADDR(host) + (TADDR)offsetof(type, memb)) + +// in the DAC build this is still typed TADDR, but in the runtime +// build it preserves the member type. +#define PTR_HOST_MEMBER(type, host, memb) \ + (PTR_HOST_TO_TADDR(host) + (TADDR)offsetof(type, memb)) + +// Construct a pointer to a member of the given type given an interior +// host address. +#define PTR_HOST_INT_MEMBER_TADDR(type, host, memb) \ + (PTR_HOST_INT_TO_TADDR(host) + (TADDR)offsetof(type, memb)) + +#define PTR_TO_MEMBER_TADDR(type, ptr, memb) \ + (PTR_TO_TADDR(ptr) + (TADDR)offsetof(type, memb)) + +// in the DAC build this is still typed TADDR, but in the runtime +// build it preserves the member type. +#define PTR_TO_MEMBER(type, ptr, memb) \ + (PTR_TO_TADDR(ptr) + (TADDR)offsetof(type, memb)) + +// Constructs an arbitrary data instance for a piece of +// memory in the target. +#define PTR_READ(addr, size) \ + DacInstantiateTypeByAddress(addr, size, true) + +// This value is used to initialize target pointers to NULL. We want this to be TADDR type +// (as opposed to, say, __TPtrBase) so that it can be used in the non-explicit ctor overloads, +// eg. as an argument default value. +// We can't always just use NULL because that's 0 which (in C++) can be any integer or pointer +// type (causing an ambiguous overload compiler error when used in explicit ctor forms). +#define PTR_NULL ((TADDR)0) + +// Provides an empty method implementation when compiled +// for DACCESS_COMPILE. For example, use to stub out methods needed +// for vtable entries but otherwise unused. +// Note that these functions are explicitly NOT marked SUPPORTS_DAC so that we'll get a +// DacCop warning if any calls to them are detected. +// @dbgtodo rbyers: It's probably almost always wrong to call any such function, so +// we should probably throw a better error (DacNotImpl), and ideally mark the function +// DECLSPEC_NORETURN so we don't have to deal with fabricating return values and we can +// get compiler warnings (unreachable code) anytime functions marked this way are called. +#define DAC_EMPTY() { LEAF_CONTRACT; } +#define DAC_EMPTY_ERR() { LEAF_CONTRACT; DacError(E_UNEXPECTED); } +#define DAC_EMPTY_RET(retVal) { LEAF_CONTRACT; DacError(E_UNEXPECTED); return retVal; } +#define DAC_UNEXPECTED() { LEAF_CONTRACT; DacError_NoRet(E_UNEXPECTED); } + +#endif // __cplusplus + +HRESULT DacGetTargetAddrForHostAddr(const void* ptr, TADDR * pTADDR); + +// Implementation details for dac_cast, should never be accessed directly. +// See code:dac_cast for details and discussion. +namespace dac_imp +{ + //--------------------------------------------- + // Conversion to TADDR + + // Forward declarations. + template + struct conversionHelper; + + template + TADDR getTaddr(T&& val); + + // Helper structs to get the target address of specific types + + // This non-specialized struct handles all instances of asTADDR that don't + // take partially-specialized arguments. + template + struct conversionHelper + { + inline static TADDR asTADDR(__TPtrBase const & tptr) + { return PTR_TO_TADDR(tptr); } + + inline static TADDR asTADDR(TADDR addr) + { return addr; } + }; + + // Handles + template + struct conversionHelper + { + inline static TADDR asTADDR(TypeT * src) + { + TADDR addr = 0; + if (DacGetTargetAddrForHostAddr(src, &addr) != S_OK) + addr = DacGetTargetAddrForHostInteriorAddr(src, true); + return addr; + } + }; + + template + struct conversionHelper<__GlobalPtr const & > + { + inline static TADDR asTADDR(__GlobalPtr const & gptr) + { return PTR_TO_TADDR(gptr); } + }; + + // It is an error to try dac_cast on a __GlobalVal or a __GlobalArray. + template + struct conversionHelper< __GlobalVal const & > + { + inline static TADDR asTADDR(__GlobalVal const & gval) + { static_assert(false, "Cannot use dac_cast on a __GlobalVal; first you must get its address using the '&' operator."); } + }; + + template + struct conversionHelper< __GlobalArray const & > + { + inline static TADDR asTADDR(__GlobalArray const & garr) + { static_assert(false, "Cannot use dac_cast on a __GlobalArray; first you must get its address using the '&' operator."); } + }; + + // This is the main helper function, and it delegates to the above helper functions. + // NOTE: this works because of C++0x reference collapsing rules for rvalue reference + // arguments in template functions. + template + TADDR getTaddr(T&& val) + { return conversionHelper::asTADDR(val); } + + //--------------------------------------------- + // Conversion to DAC instance + + // Helper class to instantiate DAC instances from a TADDR + // The default implementation assumes we want to create an instance of a PTR type + template + struct makeDacInst + { + // First constructing a __TPtrBase and then constructing the target type + // ensures that the target type can construct itself from a __TPtrBase. + // This also prevents unknown user conversions from producing incorrect + // results (since __TPtrBase can only be constructed from TADDR values). + static inline T fromTaddr(TADDR addr) + { return T(__TPtrBase(addr)); } + }; + + // Specialization for creating TADDRs from TADDRs. + template<> struct makeDacInst + { + static inline TADDR fromTaddr(TADDR addr) { return addr; } + }; + + // Partial specialization for creating host instances. + template + struct makeDacInst + { + static inline T * fromTaddr(TADDR addr) + { return makeDacInst::fromTaddr(addr); } + }; + + /* + struct Yes { char c[2]; }; + struct No { char c; }; + Yes& HasTPtrBase(__TPtrBase const *, ); + No& HasTPtrBase(...); + + template + typename rh::std::enable_if< + sizeof(HasTPtrBase(typename rh::std::remove_reference::type *)) == sizeof(Yes), + T>::type + makeDacInst(TADDR addr) + */ + +} // namespace dac_imp + +// DacCop in-line exclusion mechanism + +// Warnings - official home is DacCop\Shared\Warnings.cs, but we want a way for users to indicate +// warning codes in a way that is descriptive to readers (not just code numbers). The names here +// don't matter - DacCop just looks at the value +enum DacCopWarningCode +{ + // General Rules + FieldAccess = 1, + PointerArith = 2, + PointerComparison = 3, + InconsistentMarshalling = 4, + CastBetweenAddressSpaces = 5, + CastOfMarshalledType = 6, + VirtualCallToNonVPtr = 7, + UndacizedGlobalVariable = 8, + + // Function graph related + CallUnknown = 701, + CallNonDac = 702, + CallVirtualUnknown = 704, + CallVirtualNonDac = 705, +}; + +// DACCOP_IGNORE is a mechanism to suppress DacCop violations from within the source-code. +// See the DacCop wiki for guidance on how best to use this: http://mswikis/clr/dev/Pages/DacCop.aspx +// +// DACCOP_IGNORE will suppress a DacCop violation for the following (non-compound) statement. +// For example: +// // The "dual-mode DAC problem" occurs in a few places where a class is used both +// // in the host, and marshalled from the target ... +// DACCOP_IGNORE(CastBetweenAddressSpaces,"SBuffer has the dual-mode DAC problem"); +// TADDR bufAddr = (TADDR)m_buffer; +// +// A call to DACCOP_IGNORE must occur as it's own statement, and can apply only to following +// single-statements (not to compound statement blocks). Occasionally it is necessary to hoist +// violation-inducing code out to its own statement (e.g., if it occurs in the conditional of an +// if). +// +// Arguments: +// code: a literal value from DacCopWarningCode indicating which violation should be suppressed. +// szReasonString: a short description of why this exclusion is necessary. This is intended just +// to help readers of the code understand the source of the problem, and what would be required +// to fix it. More details can be provided in comments if desired. +// +inline void DACCOP_IGNORE(DacCopWarningCode code, const char * szReasonString) +{ + UNREFERENCED_PARAMETER(code); + UNREFERENCED_PARAMETER(szReasonString); + // DacCop detects calls to this function. No implementation is necessary. +} + +#else // !DACCESS_COMPILE + +// +// This version of the macros turns into normal pointers +// for unmodified in-proc compilation. + +// ******************************************************* +// !!!!!!!!!!!!!!!!!!!!!!!!!NOTE!!!!!!!!!!!!!!!!!!!!!!!!!! +// +// Please search this file for the type name to find the +// DAC versions of these definitions +// +// !!!!!!!!!!!!!!!!!!!!!!!!!NOTE!!!!!!!!!!!!!!!!!!!!!!!!!! +// ******************************************************* + +// Declare TADDR as a non-pointer type so that arithmetic +// can be done on it directly, as with the DACCESS_COMPILE definition. +// This also helps expose pointer usage that may need to be changed. +typedef UIntNative TADDR; + +typedef void* PTR_VOID; +typedef void** PTR_PTR_VOID; + +#define DPTR(type) type* +#define ArrayDPTR(type) type* +#define SPTR(type) type* +#define VPTR(type) type* +#define S8PTR(type) type* +#define S8PTRMAX(type, maxChars) type* +#define S16PTR(type) type* +#define S16PTRMAX(type, maxChars) type* + +#ifndef __GCENV_BASE_INCLUDED__ +#define PTR_TO_TADDR(ptr) (reinterpret_cast(ptr)) +#endif // __GCENV_BASE_INCLUDED__ +#define GFN_TADDR(name) (reinterpret_cast(&(name))) + +#define GVAL_ADDR(g) (&(g)) +#define _SPTR_DECL(acc_type, store_type, var) \ + static store_type var +#define _SPTR_IMPL(acc_type, store_type, cls, var) \ + store_type cls::var +#define _SPTR_IMPL_INIT(acc_type, store_type, cls, var, init) \ + store_type cls::var = init +#define _SPTR_IMPL_NS(acc_type, store_type, ns, cls, var) \ + store_type cls::var +#define _SPTR_IMPL_NS_INIT(acc_type, store_type, ns, cls, var, init) \ + store_type cls::var = init +#define _GPTR_DECL(acc_type, store_type, var) \ + extern store_type var +#define _GPTR_IMPL(acc_type, store_type, var) \ + store_type var +#define _GPTR_IMPL_INIT(acc_type, store_type, var, init) \ + store_type var = init +#define SVAL_DECL(type, var) \ + static type var +#define SVAL_IMPL(type, cls, var) \ + type cls::var +#define SVAL_IMPL_INIT(type, cls, var, init) \ + type cls::var = init +#define SVAL_IMPL_NS(type, ns, cls, var) \ + type cls::var +#define SVAL_IMPL_NS_INIT(type, ns, cls, var, init) \ + type cls::var = init +#define GVAL_DECL(type, var) \ + extern type var +#define GVAL_IMPL(type, var) \ + type var +#define GVAL_IMPL_INIT(type, var, init) \ + type var = init +#define GARY_DECL(type, var, size) \ + extern type var[size] +#define GARY_IMPL(type, var, size) \ + type var[size] +#define PTR_HOST_TO_TADDR(host) (reinterpret_cast(host)) +#define PTR_HOST_INT_TO_TADDR(host) ((TADDR)(host)) +#define VPTR_HOST_VTABLE_TO_TADDR(host) (reinterpret_cast(host)) +#define PTR_HOST_MEMBER_TADDR(type, host, memb) (reinterpret_cast(&(host)->memb)) +#define PTR_HOST_MEMBER(type, host, memb) (&((host)->memb)) +#define PTR_HOST_INT_MEMBER_TADDR(type, host, memb) ((TADDR)&(host)->memb) +#define PTR_TO_MEMBER_TADDR(type, ptr, memb) (reinterpret_cast(&((ptr)->memb))) +#define PTR_TO_MEMBER(type, ptr, memb) (&((ptr)->memb)) +#define PTR_READ(addr, size) (reinterpret_cast(addr)) + +#define PTR_NULL NULL + +#define DAC_EMPTY() +#define DAC_EMPTY_ERR() +#define DAC_EMPTY_RET(retVal) +#define DAC_UNEXPECTED() + +#define DACCOP_IGNORE(warningCode, reasonString) + +#endif // !DACCESS_COMPILE + +//---------------------------------------------------------------------------- +// dac_cast +// Casting utility, to be used for casting one class pointer type to another. +// Use as you would use static_cast +// +// dac_cast is designed to act just as static_cast does when +// dealing with pointers and their DAC abstractions. Specifically, +// it handles these coversions: +// +// dac_cast(SourceTypeVal) +// +// where TargetType <- SourceTypeVal are +// +// ?PTR(Tgt) <- TADDR - Create PTR type (DPtr etc.) from TADDR +// ?PTR(Tgt) <- ?PTR(Src) - Convert one PTR type to another +// ?PTR(Tgt) <- Src * - Create PTR type from dac host object instance +// TADDR <- ?PTR(Src) - Get TADDR of PTR object (DPtr etc.) +// TADDR <- Src * - Get TADDR of dac host object instance +// +// Note that there is no direct convertion to other host-pointer types (because we don't +// know if you want a DPTR or VPTR etc.). However, due to the implicit DAC conversions, +// you can just use dac_cast and assign that to a Foo*. +// +// The beauty of this syntax is that it is consistent regardless +// of source and target casting types. You just use dac_cast +// and the partial template specialization will do the right thing. +// +// One important thing to realise is that all "Foo *" types are +// assumed to be pointers to host instances that were marshalled by DAC. This should +// fail at runtime if it's not the case. +// +// Some examples would be: +// +// - Host pointer of one type to a related host pointer of another +// type, i.e., MethodDesc * <-> InstantiatedMethodDesc * +// Syntax: with MethodDesc *pMD, InstantiatedMethodDesc *pInstMD +// pInstMd = dac_cast(pMD) +// pMD = dac_cast(pInstMD) +// +// - (D|V)PTR of one encapsulated pointer type to a (D|V)PTR of +// another type, i.e., PTR_AppDomain <-> PTR_BaseDomain +// Syntax: with PTR_AppDomain pAD, PTR_BaseDomain pBD +// dac_cast(pBD) +// dac_cast(pAD) +// +// Example comparsions of some old and new syntax, where +// h is a host pointer, such as "Foo *h;" +// p is a DPTR, such as "PTR_Foo p;" +// +// PTR_HOST_TO_TADDR(h) ==> dac_cast(h) +// PTR_TO_TADDR(p) ==> dac_cast(p) +// PTR_Foo(PTR_HOST_TO_TADDR(h)) ==> dac_cast(h) +// +//---------------------------------------------------------------------------- +template +inline Tgt dac_cast(Src src) +{ +#ifdef DACCESS_COMPILE + // In DAC builds, first get a TADDR for the source, then create the + // appropriate destination instance. + TADDR addr = dac_imp::getTaddr(src); + return dac_imp::makeDacInst::fromTaddr(addr); +#else // !DACCESS_COMPILE + // In non-DAC builds, dac_cast is the same as a C-style cast because we need to support: + // - casting away const + // - conversions between pointers and TADDR + // Perhaps we should more precisely restrict it's usage, but we get the precise + // restrictions in DAC builds, so it wouldn't buy us much. + return (Tgt)(src); +#endif // !DACCESS_COMPILE +} + +//---------------------------------------------------------------------------- +// +// Convenience macros which work for either mode. +// +//---------------------------------------------------------------------------- + +#define SPTR_DECL(type, var) _SPTR_DECL(type*, PTR_##type, var) +#define SPTR_IMPL(type, cls, var) _SPTR_IMPL(type*, PTR_##type, cls, var) +#define SPTR_IMPL_INIT(type, cls, var, init) _SPTR_IMPL_INIT(type*, PTR_##type, cls, var, init) +#define SPTR_IMPL_NS(type, ns, cls, var) _SPTR_IMPL_NS(type*, PTR_##type, ns, cls, var) +#define SPTR_IMPL_NS_INIT(type, ns, cls, var, init) _SPTR_IMPL_NS_INIT(type*, PTR_##type, ns, cls, var, init) +#define GPTR_DECL(type, var) _GPTR_DECL(type*, PTR_##type, var) +#define GPTR_IMPL(type, var) _GPTR_IMPL(type*, PTR_##type, var) +#define GPTR_IMPL_INIT(type, var, init) _GPTR_IMPL_INIT(type*, PTR_##type, var, init) + +// If you want to marshal a single instance of an ArrayDPtr over to the host and +// return a pointer to it, you can use this function. However, this is unsafe because +// users of value may assume they can do pointer arithmetic on it. This is exactly +// the bugs ArrayDPtr is designed to prevent. See code:__ArrayDPtr for details. +template +inline type* DacUnsafeMarshalSingleElement( ArrayDPTR(type) arrayPtr ) +{ + return (DPTR(type))(arrayPtr); +} + +typedef DPTR(Int8) PTR_Int8; +typedef DPTR(Int16) PTR_Int16; +typedef DPTR(Int32) PTR_Int32; +typedef DPTR(Int64) PTR_Int64; +typedef ArrayDPTR(UInt8) PTR_UInt8; +typedef DPTR(PTR_UInt8) PTR_PTR_UInt8; +typedef DPTR(PTR_PTR_UInt8) PTR_PTR_PTR_UInt8; +typedef DPTR(UInt16) PTR_UInt16; +typedef DPTR(UInt32) PTR_UInt32; +typedef DPTR(UInt64) PTR_UInt64; +typedef DPTR(UIntNative) PTR_UIntNative; + +typedef DPTR(size_t) PTR_size_t; + +typedef UInt8 Code; +typedef DPTR(Code) PTR_Code; +typedef DPTR(PTR_Code) PTR_PTR_Code; + +#if defined(DACCESS_COMPILE) && defined(DAC_CLR_ENVIRONMENT) +#include +#include +//#include +#endif // defined(DACCESS_COMPILE) && defined(DAC_CLR_ENVIRONMENT) + +//---------------------------------------------------------------------------- +// PCODE is pointer to any executable code. +typedef TADDR PCODE; +typedef DPTR(TADDR) PTR_PCODE; + +//---------------------------------------------------------------------------- +// +// The access code compile must compile data structures that exactly +// match the real structures for access to work. The access code +// doesn't want all of the debugging validation code, though, so +// distinguish between _DEBUG, for declaring general debugging data +// and always-on debug code, and _DEBUG_IMPL, for debugging code +// which will be disabled when compiling for external access. +// +//---------------------------------------------------------------------------- + +#if !defined(_DEBUG_IMPL) && defined(_DEBUG) && !defined(DACCESS_COMPILE) +#define _DEBUG_IMPL 1 +#endif + +// Helper macro for tracking EnumMemoryRegions progress. +#if 0 +#define EMEM_OUT(args) DacWarning args +#else // !0 +#define EMEM_OUT(args) +#endif // !0 + +// TARGET_CONSISTENCY_CHECK represents a condition that should not fail unless the DAC target is corrupt. +// This is in contrast to ASSERTs in DAC infrastructure code which shouldn't fail regardless of the memory +// read from the target. At the moment we treat these the same, but in the future we will want a mechanism +// for disabling just the target consistency checks (eg. for tests that intentionally use corrupted targets). +// @dbgtodo rbyers: Separating asserts and target consistency checks is tracked by DevDiv Bugs 31674 +#define TARGET_CONSISTENCY_CHECK(expr,msg) _ASSERTE_MSG(expr,msg) + +#ifdef DACCESS_COMPILE +#define NO_DAC() static_assert(false, "Cannot use this method in builds DAC: " __FILE__ ":" __LINE__) +#else +#define NO_DAC() do {} while (0) +#endif + +#endif // !__daccess_h__ diff --git a/src/coreclr/src/nativeaot/Runtime/inc/eetype.h b/src/coreclr/src/nativeaot/Runtime/inc/eetype.h new file mode 100644 index 0000000000000..35e4a64294bc8 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/inc/eetype.h @@ -0,0 +1,421 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// Fundamental runtime type representation + +#pragma warning(push) +#pragma warning(disable:4200) // nonstandard extension used : zero-sized array in struct/union +//------------------------------------------------------------------------------------------------- +// Forward declarations + +class EEType; +class OptionalFields; +class TypeManager; +struct TypeManagerHandle; +class DynamicModule; +struct EETypeRef; + +#if !defined(USE_PORTABLE_HELPERS) +#define SUPPORTS_WRITABLE_DATA 1 +#endif + +//------------------------------------------------------------------------------------------------- +// Array of these represents the interfaces implemented by a type + +class EEInterfaceInfo +{ + public: + EEType * GetInterfaceEEType() + { + return ((UIntTarget)m_pInterfaceEEType & ((UIntTarget)1)) ? + *(EEType**)((UIntTarget)m_ppInterfaceEETypeViaIAT & ~((UIntTarget)1)) : + m_pInterfaceEEType; + } + + private: + union + { + EEType * m_pInterfaceEEType; // m_uFlags == InterfaceFlagNormal + EEType ** m_ppInterfaceEETypeViaIAT; // m_uFlags == InterfaceViaIATFlag + }; +}; + +//------------------------------------------------------------------------------------------------- +// The subset of TypeFlags that Redhawk knows about at runtime +// This should match the TypeFlags enum in the managed type system. +enum EETypeElementType : UInt8 +{ + // Primitive + ElementType_Unknown = 0x00, + ElementType_Void = 0x01, + ElementType_Boolean = 0x02, + ElementType_Char = 0x03, + ElementType_SByte = 0x04, + ElementType_Byte = 0x05, + ElementType_Int16 = 0x06, + ElementType_UInt16 = 0x07, + ElementType_Int32 = 0x08, + ElementType_UInt32 = 0x09, + ElementType_Int64 = 0x0A, + ElementType_UInt64 = 0x0B, + ElementType_IntPtr = 0x0C, + ElementType_UIntPtr = 0x0D, + ElementType_Single = 0x0E, + ElementType_Double = 0x0F, + + ElementType_ValueType = 0x10, + // Enum = 0x11, // EETypes store enums as their underlying type + ElementType_Nullable = 0x12, + // Unused 0x13, + + ElementType_Class = 0x14, + ElementType_Interface = 0x15, + + ElementType_SystemArray = 0x16, // System.Array type + + ElementType_Array = 0x17, + ElementType_SzArray = 0x18, + ElementType_ByRef = 0x19, + ElementType_Pointer = 0x1A, +}; + +//------------------------------------------------------------------------------------------------- +// Support for encapsulating the location of fields in the EEType that have variable offsets or may be +// optional. +// +// The following enumaration gives symbolic names for these fields and is used with the GetFieldPointer() and +// GetFieldOffset() APIs. +enum EETypeField +{ + ETF_InterfaceMap, + ETF_TypeManagerIndirection, + ETF_WritableData, + ETF_Finalizer, + ETF_OptionalFieldsPtr, + ETF_SealedVirtualSlots, + ETF_DynamicTemplateType, + ETF_DynamicDispatchMap, + ETF_DynamicModule, + ETF_GenericDefinition, + ETF_GenericComposition, + ETF_DynamicGcStatics, + ETF_DynamicNonGcStatics, + ETF_DynamicThreadStaticOffset, +}; + +//------------------------------------------------------------------------------------------------- +// Fundamental runtime type representation +typedef DPTR(class EEType) PTR_EEType; +typedef DPTR(PTR_EEType) PTR_PTR_EEType; +typedef DPTR(class OptionalFields) PTR_OptionalFields; +typedef DPTR(PTR_OptionalFields) PTR_PTR_OptionalFields; + +class EEType +{ + friend class AsmOffsets; + +private: + struct RelatedTypeUnion + { + union + { + // Kinds.CanonicalEEType + EEType* m_pBaseType; + EEType** m_ppBaseTypeViaIAT; + + // Kinds.ClonedEEType + EEType** m_pCanonicalType; + EEType** m_ppCanonicalTypeViaIAT; + + // Kinds.ParameterizedEEType + EEType* m_pRelatedParameterType; + EEType** m_ppRelatedParameterTypeViaIAT; + }; + }; + + UInt16 m_usComponentSize; + UInt16 m_usFlags; + UInt32 m_uBaseSize; + RelatedTypeUnion m_RelatedType; + UInt16 m_usNumVtableSlots; + UInt16 m_usNumInterfaces; + UInt32 m_uHashCode; + + TgtPTR_Void m_VTable[]; // make this explicit so the binder gets the right alignment + + // after the m_usNumVtableSlots vtable slots, we have m_usNumInterfaces slots of + // EEInterfaceInfo, and after that a couple of additional pointers based on whether the type is + // finalizable (the address of the finalizer code) or has optional fields (pointer to the compacted + // fields). + + enum Flags + { + // There are four kinds of EETypes, the three of them regular types that use the full EEType encoding + // plus a fourth kind used as a grab bag of unusual edge cases which are encoded in a smaller, + // simplified version of EEType. See LimitedEEType definition below. + EETypeKindMask = 0x0003, + + // This flag is set when m_pRelatedType is in a different module. In that case, m_pRelatedType + // actually points to a 'fake' EEType whose m_pRelatedType field lines up with an IAT slot in this + // module, which then points to the desired EEType. In other words, there is an extra indirection + // through m_pRelatedType to get to the related type in the other module. + RelatedTypeViaIATFlag = 0x0004, + + IsDynamicTypeFlag = 0x0008, + + // This EEType represents a type which requires finalization + HasFinalizerFlag = 0x0010, + + // This type contain gc pointers + HasPointersFlag = 0x0020, + + // This type is generic and one or more of it's type parameters is co- or contra-variant. This only + // applies to interface and delegate types. + GenericVarianceFlag = 0x0080, + + // This type has optional fields present. + OptionalFieldsFlag = 0x0100, + + // Unused = 0x0200, + + // This type is generic. + IsGenericFlag = 0x0400, + + // We are storing a EETypeElementType in the upper bits for unboxing enums + ElementTypeMask = 0xf800, + ElementTypeShift = 11, + }; + +public: + + // These are flag values that are rarely set for types. If any of them are set then an optional field will + // be associated with the EEType to represent them. + enum RareFlags + { + // This type requires 8-byte alignment for its fields on certain platforms (only ARM currently). + RequiresAlign8Flag = 0x00000001, + + // Old unused flag + UNUSED1 = 0x00000002, + + // unused = 0x00000004, + + // unused = 0x00000008, + + // unused = 0x00000010, + + // This EEType has a Class Constructor + HasCctorFlag = 0x0000020, + + // Old unused flag + UNUSED2 = 0x00000040, + + // This EEType was constructed from a universal canonical template, and has + // its own dynamically created DispatchMap (does not use the DispatchMap of its template type) + HasDynamicallyAllocatedDispatchMapFlag = 0x00000080, + + // This EEType represents a structure that is an HFA (only ARM currently) + IsHFAFlag = 0x00000100, + + // This EEType has sealed vtable entries + HasSealedVTableEntriesFlag = 0x00000200, + + // This dynamically created type has gc statics + IsDynamicTypeWithGcStaticsFlag = 0x00000400, + + // This dynamically created type has non gc statics + IsDynamicTypeWithNonGcStaticsFlag = 0x00000800, + + // This dynamically created type has thread statics + IsDynamicTypeWithThreadStaticsFlag = 0x00001000, + + // This EEType was constructed from a module where the open type is defined in + // a dynamically loaded type + HasDynamicModuleFlag = 0x00002000, + + // This EEType is for an abstract (but non-interface) type + IsAbstractClassFlag = 0x00004000, + + // This EEType is for a Byref-like class (TypedReference, Span<T>,...) + IsByRefLikeFlag = 0x00008000, + }; + +public: + + enum Kinds + { + CanonicalEEType = 0x0000, + ClonedEEType = 0x0001, + ParameterizedEEType = 0x0002, + GenericTypeDefEEType = 0x0003, + }; + + UInt32 get_BaseSize() + { return m_uBaseSize; } + + UInt16 get_ComponentSize() + { return m_usComponentSize; } + + PTR_Code get_Slot(UInt16 slotNumber); + + PTR_PTR_Code get_SlotPtr(UInt16 slotNumber); + + Kinds get_Kind(); + + bool IsCloned() + { return get_Kind() == ClonedEEType; } + + bool IsRelatedTypeViaIAT() + { return ((m_usFlags & (UInt16)RelatedTypeViaIATFlag) != 0); } + + bool IsArray() + { + EETypeElementType elementType = GetElementType(); + return elementType == ElementType_Array || elementType == ElementType_SzArray; + } + + bool IsParameterizedType() + { return (get_Kind() == ParameterizedEEType); } + + bool IsGenericTypeDefinition() + { return (get_Kind() == GenericTypeDefEEType); } + + bool IsCanonical() + { return get_Kind() == CanonicalEEType; } + + bool IsInterface() + { return GetElementType() == ElementType_Interface; } + + EEType * get_CanonicalEEType(); + + EEType * get_RelatedParameterType(); + + // A parameterized type shape less than SZARRAY_BASE_SIZE indicates that this is not + // an array but some other parameterized type (see: ParameterizedTypeShapeConstants) + // For arrays, this number uniquely captures both Sz/Md array flavor and rank. + UInt32 get_ParameterizedTypeShape() { return m_uBaseSize; } + + bool get_IsValueType() + { return GetElementType() < ElementType_Class; } + + bool HasFinalizer() + { + return (m_usFlags & HasFinalizerFlag) != 0; + } + + bool HasReferenceFields() + { + return (m_usFlags & HasPointersFlag) != 0; + } + + bool HasOptionalFields() + { + return (m_usFlags & OptionalFieldsFlag) != 0; + } + + bool IsEquivalentTo(EEType * pOtherEEType) + { + if (this == pOtherEEType) + return true; + + EEType * pThisEEType = this; + + if (pThisEEType->IsCloned()) + pThisEEType = pThisEEType->get_CanonicalEEType(); + + if (pOtherEEType->IsCloned()) + pOtherEEType = pOtherEEType->get_CanonicalEEType(); + + if (pThisEEType == pOtherEEType) + return true; + + if (pThisEEType->IsParameterizedType() && pOtherEEType->IsParameterizedType()) + { + return pThisEEType->get_RelatedParameterType()->IsEquivalentTo(pOtherEEType->get_RelatedParameterType()) && + pThisEEType->get_ParameterizedTypeShape() == pOtherEEType->get_ParameterizedTypeShape(); + } + + return false; + } + + // How many vtable slots are there? + UInt16 GetNumVtableSlots() + { return m_usNumVtableSlots; } + + // How many entries are in the interface map after the vtable slots? + UInt16 GetNumInterfaces() + { return m_usNumInterfaces; } + + // Does this class (or its base classes) implement any interfaces? + bool HasInterfaces() + { return GetNumInterfaces() != 0; } + + bool IsGeneric() + { return (m_usFlags & IsGenericFlag) != 0; } + + DynamicModule* get_DynamicModule(); + + TypeManagerHandle* GetTypeManagerPtr(); + + // Used only by GC initialization, this initializes the EEType used to mark free entries in the GC heap. + // It should be an array type with a component size of one (so the GC can easily size it as appropriate) + // and should be marked as not containing any references. The rest of the fields don't matter: the GC does + // not query them and the rest of the runtime will never hold a reference to free object. + inline void InitializeAsGcFreeType(); + +#ifdef DACCESS_COMPILE + bool DacVerify(); + static bool DacVerifyWorker(EEType* pThis); +#endif // DACCESS_COMPILE + + // Mark or determine that a type is generic and one or more of it's type parameters is co- or + // contra-variant. This only applies to interface and delegate types. + bool HasGenericVariance() + { return (m_usFlags & GenericVarianceFlag) != 0; } + + EETypeElementType GetElementType() + { return (EETypeElementType)((m_usFlags & ElementTypeMask) >> ElementTypeShift); } + + // Determine whether a type requires 8-byte alignment for its fields (required only on certain platforms, + // only ARM so far). + bool RequiresAlign8() + { return (get_RareFlags() & RequiresAlign8Flag) != 0; } + + // Determine whether a type is an instantiation of Nullable. + bool IsNullable() + { return GetElementType() == ElementType_Nullable; } + + // Determine whether a type was created by dynamic type loader + bool IsDynamicType() + { return (m_usFlags & IsDynamicTypeFlag) != 0; } + + UInt32 GetHashCode(); + + // Retrieve optional fields associated with this EEType. May be NULL if no such fields exist. + inline PTR_OptionalFields get_OptionalFields(); + + // Get flags that are less commonly set on EETypes. + inline UInt32 get_RareFlags(); + + // Helper methods that deal with EEType topology (size and field layout). These are useful since as we + // optimize for pay-for-play we increasingly want to customize exactly what goes into an EEType on a + // per-type basis. The rules that govern this can be both complex and volatile and we risk sprinkling + // various layout rules through the binder and runtime that obscure the basic meaning of the code and are + // brittle: easy to overlook when one of the rules changes. + // + // The following methods can in some cases have fairly complex argument lists of their own and in that way + // they expose more of the implementation details than we'd ideally like. But regardless they still serve + // an arguably more useful purpose: they identify all the places that rely on the EEType layout. As we + // change layout rules we might have to change the arguments to the methods below but in doing so we will + // instantly identify all the other parts of the binder and runtime that need to be updated. + + // Calculate the offset of a field of the EEType that has a variable offset. + inline UInt32 GetFieldOffset(EETypeField eField); + + // Validate an EEType extracted from an object. + bool Validate(bool assertOnFail = true); +}; + +#pragma warning(pop) + +#include "OptionalFields.h" diff --git a/src/coreclr/src/nativeaot/Runtime/inc/eetype.inl b/src/coreclr/src/nativeaot/Runtime/inc/eetype.inl new file mode 100644 index 0000000000000..c3549c0d8caef --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/inc/eetype.inl @@ -0,0 +1,272 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#ifndef __eetype_inl__ +#define __eetype_inl__ +//----------------------------------------------------------------------------------------------------------- +inline UInt32 EEType::GetHashCode() +{ + return m_uHashCode; +} + +//----------------------------------------------------------------------------------------------------------- +inline PTR_Code EEType::get_Slot(UInt16 slotNumber) +{ + ASSERT(slotNumber < m_usNumVtableSlots); + return *get_SlotPtr(slotNumber); +} + +//----------------------------------------------------------------------------------------------------------- +inline PTR_PTR_Code EEType::get_SlotPtr(UInt16 slotNumber) +{ + ASSERT(slotNumber < m_usNumVtableSlots); + return dac_cast(dac_cast(this) + offsetof(EEType, m_VTable)) + slotNumber; +} + +#ifdef DACCESS_COMPILE +inline bool EEType::DacVerify() +{ + // Use a separate static worker because the worker validates + // the whole chain of EETypes and we don't want to accidentally + // answer questions from 'this' that should have come from the + // 'current' EEType. + return DacVerifyWorker(this); +} +// static +inline bool EEType::DacVerifyWorker(EEType* pThis) +{ + //********************************************************************* + //**** ASSUMES MAX TYPE HIERARCHY DEPTH OF 1024 TYPES **** + //********************************************************************* + const int MAX_SANE_RELATED_TYPES = 1024; + //********************************************************************* + //**** ASSUMES MAX OF 200 INTERFACES IMPLEMENTED ON ANY GIVEN TYPE **** + //********************************************************************* + const int MAX_SANE_NUM_INSTANCES = 200; + + + PTR_EEType pCurrentType = dac_cast(pThis); + for (int i = 0; i < MAX_SANE_RELATED_TYPES; i++) + { + // Verify interface map + if (pCurrentType->GetNumInterfaces() > MAX_SANE_NUM_INSTANCES) + return false; + + // Validate the current type + if (!pCurrentType->Validate(false)) + return false; + + // + // Now on to the next type in the hierarchy. + // + + if (pCurrentType->IsRelatedTypeViaIAT()) + pCurrentType = *dac_cast(reinterpret_cast(pCurrentType->m_RelatedType.m_ppBaseTypeViaIAT)); + else + pCurrentType = dac_cast(reinterpret_cast(pCurrentType->m_RelatedType.m_pBaseType)); + + if (pCurrentType == NULL) + break; + } + + if (pCurrentType != NULL) + return false; // assume we found an infinite loop + + return true; +} +#endif + +#if !defined(DACCESS_COMPILE) +inline PTR_UInt8 FollowRelativePointer(const Int32* pDist) +{ + Int32 dist = *pDist; + + PTR_UInt8 result = (PTR_UInt8)pDist + dist; + + return result; +} + +// Retrieve optional fields associated with this EEType. May be NULL if no such fields exist. +inline PTR_OptionalFields EEType::get_OptionalFields() +{ + if ((m_usFlags & OptionalFieldsFlag) == 0) + return NULL; + + UInt32 cbOptionalFieldsOffset = GetFieldOffset(ETF_OptionalFieldsPtr); + +#if !defined(USE_PORTABLE_HELPERS) + if (!IsDynamicType()) + { + return (OptionalFields*)FollowRelativePointer((Int32*)((UInt8*)this + cbOptionalFieldsOffset)); + } + else +#endif + { + return *(OptionalFields**)((UInt8*)this + cbOptionalFieldsOffset); + } +} + +// Get flags that are less commonly set on EETypes. +inline UInt32 EEType::get_RareFlags() +{ + OptionalFields * pOptFields = get_OptionalFields(); + + // If there are no optional fields then none of the rare flags have been set. + if (!pOptFields) + return 0; + + // Get the flags from the optional fields. The default is zero if that particular field was not included. + return pOptFields->GetRareFlags(0); +} + +inline TypeManagerHandle* EEType::GetTypeManagerPtr() +{ + UInt32 cbOffset = GetFieldOffset(ETF_TypeManagerIndirection); + +#if !defined(USE_PORTABLE_HELPERS) + if (!IsDynamicType()) + { + return (TypeManagerHandle*)FollowRelativePointer((Int32*)((UInt8*)this + cbOffset)); + } + else +#endif + { + return *(TypeManagerHandle**)((UInt8*)this + cbOffset); + } +} +#endif // !defined(DACCESS_COMPILE) + +// Calculate the offset of a field of the EEType that has a variable offset. +__forceinline UInt32 EEType::GetFieldOffset(EETypeField eField) +{ + // First part of EEType consists of the fixed portion followed by the vtable. + UInt32 cbOffset = offsetof(EEType, m_VTable) + (sizeof(UIntTarget) * m_usNumVtableSlots); + + // Then we have the interface map. + if (eField == ETF_InterfaceMap) + { + ASSERT(GetNumInterfaces() > 0); + return cbOffset; + } + cbOffset += sizeof(EEInterfaceInfo) * GetNumInterfaces(); + + const UInt32 relativeOrFullPointerOffset = +#if USE_PORTABLE_HELPERS + sizeof(UIntTarget); +#else + IsDynamicType() ? sizeof(UIntTarget) : sizeof(UInt32); +#endif + + // Followed by the type manager indirection cell. + if (eField == ETF_TypeManagerIndirection) + { + return cbOffset; + } + cbOffset += relativeOrFullPointerOffset; + +#if SUPPORTS_WRITABLE_DATA + // Followed by writable data. + if (eField == ETF_WritableData) + { + return cbOffset; + } + cbOffset += relativeOrFullPointerOffset; +#endif + + // Followed by the pointer to the finalizer method. + if (eField == ETF_Finalizer) + { + ASSERT(HasFinalizer()); + return cbOffset; + } + if (HasFinalizer()) + cbOffset += relativeOrFullPointerOffset; + + // Followed by the pointer to the optional fields. + if (eField == ETF_OptionalFieldsPtr) + { + ASSERT(HasOptionalFields()); + return cbOffset; + } + if (HasOptionalFields()) + cbOffset += relativeOrFullPointerOffset; + + // Followed by the pointer to the sealed virtual slots + if (eField == ETF_SealedVirtualSlots) + return cbOffset; + + UInt32 rareFlags = get_RareFlags(); + + // in the case of sealed vtable entries on static types, we have a UInt sized relative pointer + if (rareFlags & HasSealedVTableEntriesFlag) + cbOffset += relativeOrFullPointerOffset; + + if (eField == ETF_DynamicDispatchMap) + { + ASSERT(IsDynamicType()); + return cbOffset; + } + if ((rareFlags & HasDynamicallyAllocatedDispatchMapFlag) != 0) + cbOffset += sizeof(UIntTarget); + + if (eField == ETF_GenericDefinition) + { + ASSERT(IsGeneric()); + return cbOffset; + } + if (IsGeneric()) + cbOffset += relativeOrFullPointerOffset; + + if (eField == ETF_GenericComposition) + { + ASSERT(IsGeneric()); + return cbOffset; + } + if (IsGeneric()) + cbOffset += relativeOrFullPointerOffset; + + if (eField == ETF_DynamicModule) + { + ASSERT((rareFlags & HasDynamicModuleFlag) != 0); + return cbOffset; + } + + if ((rareFlags & HasDynamicModuleFlag) != 0) + cbOffset += sizeof(UIntTarget); + + if (eField == ETF_DynamicTemplateType) + { + ASSERT(IsDynamicType()); + return cbOffset; + } + if (IsDynamicType()) + cbOffset += sizeof(UIntTarget); + + if (eField == ETF_DynamicGcStatics) + { + ASSERT((rareFlags & IsDynamicTypeWithGcStaticsFlag) != 0); + return cbOffset; + } + if ((rareFlags & IsDynamicTypeWithGcStaticsFlag) != 0) + cbOffset += sizeof(UIntTarget); + + if (eField == ETF_DynamicNonGcStatics) + { + ASSERT((rareFlags & IsDynamicTypeWithNonGcStaticsFlag) != 0); + return cbOffset; + } + if ((rareFlags & IsDynamicTypeWithNonGcStaticsFlag) != 0) + cbOffset += sizeof(UIntTarget); + + if (eField == ETF_DynamicThreadStaticOffset) + { + ASSERT((rareFlags & IsDynamicTypeWithThreadStaticsFlag) != 0); + return cbOffset; + } + if ((rareFlags & IsDynamicTypeWithThreadStaticsFlag) != 0) + cbOffset += sizeof(UInt32); + + ASSERT(!"Unknown EEType field type"); + return 0; +} +#endif // __eetype_inl__ diff --git a/src/coreclr/src/nativeaot/Runtime/inc/gcinfo.h b/src/coreclr/src/nativeaot/Runtime/inc/gcinfo.h new file mode 100644 index 0000000000000..25f4d1c4458f5 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/inc/gcinfo.h @@ -0,0 +1,1588 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +/*****************************************************************************/ +#ifndef _GCINFO_H_ +#define _GCINFO_H_ +/*****************************************************************************/ + +// Keep definitions in this file in sync with Nutc\UTC\gcinfo.h + +#ifdef TARGET_ARM + +#define NUM_PRESERVED_REGS 9 + +enum RegMask +{ + RBM_R0 = 0x0001, + RBM_R1 = 0x0002, + RBM_R2 = 0x0004, + RBM_R3 = 0x0008, + RBM_R4 = 0x0010, // callee saved + RBM_R5 = 0x0020, // callee saved + RBM_R6 = 0x0040, // callee saved + RBM_R7 = 0x0080, // callee saved + RBM_R8 = 0x0100, // callee saved + RBM_R9 = 0x0200, // callee saved + RBM_R10 = 0x0400, // callee saved + RBM_R11 = 0x0800, // callee saved + RBM_R12 = 0x1000, + RBM_SP = 0x2000, + RBM_LR = 0x4000, // callee saved, but not valid to be alive across a call! + RBM_PC = 0x8000, + RBM_RETVAL = RBM_R0, + RBM_CALLEE_SAVED_REGS = (RBM_R4|RBM_R5|RBM_R6|RBM_R7|RBM_R8|RBM_R9|RBM_R10|RBM_R11|RBM_LR), + RBM_CALLEE_SAVED_REG_COUNT = 9, + // Special case: LR is callee saved, but may not appear as a live GC ref except + // in the leaf frame because calls will trash it. Therefore, we ALSO consider + // it a scratch register. + RBM_SCRATCH_REGS = (RBM_R0|RBM_R1|RBM_R2|RBM_R3|RBM_R12|RBM_LR), + RBM_SCRATCH_REG_COUNT = 6, +}; + +enum RegNumber +{ + RN_R0 = 0, + RN_R1 = 1, + RN_R2 = 2, + RN_R3 = 3, + RN_R4 = 4, + RN_R5 = 5, + RN_R6 = 6, + RN_R7 = 7, + RN_R8 = 8, + RN_R9 = 9, + RN_R10 = 10, + RN_R11 = 11, + RN_R12 = 12, + RN_SP = 13, + RN_LR = 14, + RN_PC = 15, + + RN_NONE = 16, +}; + +enum CalleeSavedRegNum +{ + CSR_NUM_R4 = 0x00, + CSR_NUM_R5 = 0x01, + CSR_NUM_R6 = 0x02, + CSR_NUM_R7 = 0x03, + CSR_NUM_R8 = 0x04, + CSR_NUM_R9 = 0x05, + CSR_NUM_R10 = 0x06, + CSR_NUM_R11 = 0x07, + // NOTE: LR is omitted because it may not be live except as a 'scratch' reg +}; + +enum CalleeSavedRegMask +{ + CSR_MASK_NONE = 0x00, + CSR_MASK_R4 = 0x001, + CSR_MASK_R5 = 0x002, + CSR_MASK_R6 = 0x004, + CSR_MASK_R7 = 0x008, + CSR_MASK_R8 = 0x010, + CSR_MASK_R9 = 0x020, + CSR_MASK_R10 = 0x040, + CSR_MASK_R11 = 0x080, + CSR_MASK_LR = 0x100, + + CSR_MASK_ALL = 0x1ff, + CSR_MASK_HIGHEST = 0x100, +}; + +enum ScratchRegNum +{ + SR_NUM_R0 = 0x00, + SR_NUM_R1 = 0x01, + SR_NUM_R2 = 0x02, + SR_NUM_R3 = 0x03, + SR_NUM_R12 = 0x04, + SR_NUM_LR = 0x05, +}; + +enum ScratchRegMask +{ + SR_MASK_NONE = 0x00, + SR_MASK_R0 = 0x01, + SR_MASK_R1 = 0x02, + SR_MASK_R2 = 0x04, + SR_MASK_R3 = 0x08, + SR_MASK_R12 = 0x10, + SR_MASK_LR = 0x20, +}; + +#elif defined(TARGET_ARM64) + +enum RegMask +{ + RBM_NONE = 0, + + RBM_X0 = 0x00000001, + RBM_X1 = 0x00000002, + RBM_X2 = 0x00000004, + RBM_X3 = 0x00000008, + RBM_X4 = 0x00000010, + RBM_X5 = 0x00000020, + RBM_X6 = 0x00000040, + RBM_X7 = 0x00000080, + RBM_X8 = 0x00000100, // ARM64 ABI: indirect result register + RBM_X9 = 0x00000200, + RBM_X10 = 0x00000400, + RBM_X11 = 0x00000800, + RBM_X12 = 0x00001000, + RBM_X13 = 0x00002000, + RBM_X14 = 0x00004000, + RBM_X15 = 0x00008000, + + RBM_XIP0 = 0x00010000, // This one is occasionally used as a scratch register (but can be destroyed by branching or a call) + RBM_XIP1 = 0x00020000, // This one may be also used as a scratch register (but can be destroyed by branching or a call) + RBM_XPR = 0x00040000, + + RBM_X19 = 0x00080000, // RA_CALLEESAVE + RBM_X20 = 0x00100000, // RA_CALLEESAVE + RBM_X21 = 0x00200000, // RA_CALLEESAVE + RBM_X22 = 0x00400000, // RA_CALLEESAVE + RBM_X23 = 0x00800000, // RA_CALLEESAVE + RBM_X24 = 0x01000000, // RA_CALLEESAVE + RBM_X25 = 0x02000000, // RA_CALLEESAVE + RBM_X26 = 0x04000000, // RA_CALLEESAVE + RBM_X27 = 0x08000000, // RA_CALLEESAVE + RBM_X28 = 0x10000000, // RA_CALLEESAVE + + RBM_FP = 0x20000000, + RBM_LR = 0x40000000, + RBM_SP = 0x80000000, + + RBM_RETVAL = RBM_X8, + // Note: Callee saved regs: X19-X28; FP and LR are treated as callee-saved in unwinding code + RBM_CALLEE_SAVED_REG_COUNT = 12, + + // Scratch regs: X0-X15, XIP0, XIP1, LR + RBM_SCRATCH_REG_COUNT = 19, +}; + +#define NUM_PRESERVED_REGS RBM_CALLEE_SAVED_REG_COUNT + +// Number of the callee-saved registers stored in the fixed header +#define NUM_PRESERVED_REGS_LOW 9 +#define MASK_PRESERVED_REGS_LOW ((1 << NUM_PRESERVED_REGS_LOW) - 1) + +enum RegNumber +{ + RN_X0 = 0, + RN_X1 = 1, + RN_X2 = 2, + RN_X3 = 3, + RN_X4 = 4, + RN_X5 = 5, + RN_X6 = 6, + RN_X7 = 7, + RN_X8 = 8, // indirect result register + RN_X9 = 9, + RN_X10 = 10, + RN_X11 = 11, + RN_X12 = 12, + RN_X13 = 13, + RN_X14 = 14, + RN_X15 = 15, + + RN_XIP0 = 16, + RN_XIP1 = 17, + RN_XPR = 18, + + RN_X19 = 19, // RA_CALLEESAVE + RN_X20 = 20, // RA_CALLEESAVE + RN_X21 = 21, // RA_CALLEESAVE + RN_X22 = 22, // RA_CALLEESAVE + RN_X23 = 23, // RA_CALLEESAVE + RN_X24 = 24, // RA_CALLEESAVE + RN_X25 = 25, // RA_CALLEESAVE + RN_X26 = 26, // RA_CALLEESAVE + RN_X27 = 27, // RA_CALLEESAVE + RN_X28 = 28, // RA_CALLEESAVE + + RN_FP = 29, + RN_LR = 30, + RN_SP = 31, + + RN_NONE = 32, +}; + +enum CalleeSavedRegNum +{ + // NOTE: LR is omitted because it may not be live except as a 'scratch' reg + CSR_NUM_X19 = 1, + CSR_NUM_X20 = 2, + CSR_NUM_X21 = 3, + CSR_NUM_X22 = 4, + CSR_NUM_X23 = 5, + CSR_NUM_X24 = 6, + CSR_NUM_X25 = 7, + CSR_NUM_X26 = 8, + CSR_NUM_X27 = 9, + CSR_NUM_X28 = 10, + CSR_NUM_FP = 11, + CSR_NUM_NONE = 12, +}; + +enum CalleeSavedRegMask +{ + CSR_MASK_NONE = 0x00, + // LR is placed here to reduce the frequency of the long encoding + CSR_MASK_LR = 0x001, + CSR_MASK_X19 = 0x002, + CSR_MASK_X20 = 0x004, + CSR_MASK_X21 = 0x008, + CSR_MASK_X22 = 0x010, + CSR_MASK_X23 = 0x020, + CSR_MASK_X24 = 0x040, + CSR_MASK_X25 = 0x080, + CSR_MASK_X26 = 0x100, + CSR_MASK_X27 = 0x200, + CSR_MASK_X28 = 0x400, + CSR_MASK_FP = 0x800, + + CSR_MASK_ALL = 0xfff, + CSR_MASK_HIGHEST = 0x800, +}; + +enum ScratchRegNum +{ + SR_NUM_X0 = 0, + SR_NUM_X1 = 1, + SR_NUM_X2 = 2, + SR_NUM_X3 = 3, + SR_NUM_X4 = 4, + SR_NUM_X5 = 5, + SR_NUM_X6 = 6, + SR_NUM_X7 = 7, + SR_NUM_X8 = 8, + SR_NUM_X9 = 9, + SR_NUM_X10 = 10, + SR_NUM_X11 = 11, + SR_NUM_X12 = 12, + SR_NUM_X13 = 13, + SR_NUM_X14 = 14, + SR_NUM_X15 = 15, + + SR_NUM_XIP0 = 16, + SR_NUM_XIP1 = 17, + SR_NUM_LR = 18, + + SR_NUM_NONE = 19, +}; + +enum ScratchRegMask +{ + SR_MASK_NONE = 0x00, + SR_MASK_X0 = 0x01, + SR_MASK_X1 = 0x02, + SR_MASK_X2 = 0x04, + SR_MASK_X3 = 0x08, + SR_MASK_X4 = 0x10, + SR_MASK_X5 = 0x20, + SR_MASK_X6 = 0x40, + SR_MASK_X7 = 0x80, + SR_MASK_X8 = 0x100, + SR_MASK_X9 = 0x200, + SR_MASK_X10 = 0x400, + SR_MASK_X11 = 0x800, + SR_MASK_X12 = 0x1000, + SR_MASK_X13 = 0x2000, + SR_MASK_X14 = 0x4000, + SR_MASK_X15 = 0x8000, + + SR_MASK_XIP0 = 0x10000, + SR_MASK_XIP1 = 0x20000, + SR_MASK_LR = 0x40000, +}; + +#else // TARGET_ARM + +#ifdef TARGET_AMD64 +#define NUM_PRESERVED_REGS 8 +#else +#define NUM_PRESERVED_REGS 4 +#endif + +enum RegMask +{ + RBM_EAX = 0x0001, + RBM_ECX = 0x0002, + RBM_EDX = 0x0004, + RBM_EBX = 0x0008, // callee saved + RBM_ESP = 0x0010, + RBM_EBP = 0x0020, // callee saved + RBM_ESI = 0x0040, // callee saved + RBM_EDI = 0x0080, // callee saved + + RBM_R8 = 0x0100, + RBM_R9 = 0x0200, + RBM_R10 = 0x0400, + RBM_R11 = 0x0800, + RBM_R12 = 0x1000, // callee saved + RBM_R13 = 0x2000, // callee saved + RBM_R14 = 0x4000, // callee saved + RBM_R15 = 0x8000, // callee saved + + RBM_RETVAL = RBM_EAX, + +#ifdef TARGET_AMD64 + RBM_CALLEE_SAVED_REGS = (RBM_EDI|RBM_ESI|RBM_EBX|RBM_EBP|RBM_R12|RBM_R13|RBM_R14|RBM_R15), + RBM_CALLEE_SAVED_REG_COUNT = 8, + RBM_SCRATCH_REGS = (RBM_EAX|RBM_ECX|RBM_EDX|RBM_R8|RBM_R9|RBM_R10|RBM_R11), + RBM_SCRATCH_REG_COUNT = 7, +#else + RBM_CALLEE_SAVED_REGS = (RBM_EDI|RBM_ESI|RBM_EBX|RBM_EBP), + RBM_CALLEE_SAVED_REG_COUNT = 4, + RBM_SCRATCH_REGS = (RBM_EAX|RBM_ECX|RBM_EDX), + RBM_SCRATCH_REG_COUNT = 3, +#endif // TARGET_AMD64 +}; + +enum RegNumber +{ + RN_EAX = 0, + RN_ECX = 1, + RN_EDX = 2, + RN_EBX = 3, + RN_ESP = 4, + RN_EBP = 5, + RN_ESI = 6, + RN_EDI = 7, + RN_R8 = 8, + RN_R9 = 9, + RN_R10 = 10, + RN_R11 = 11, + RN_R12 = 12, + RN_R13 = 13, + RN_R14 = 14, + RN_R15 = 15, + + RN_NONE = 16, +}; + +enum CalleeSavedRegNum +{ + CSR_NUM_RBX = 0x00, + CSR_NUM_RSI = 0x01, + CSR_NUM_RDI = 0x02, + CSR_NUM_RBP = 0x03, +#ifdef TARGET_AMD64 + CSR_NUM_R12 = 0x04, + CSR_NUM_R13 = 0x05, + CSR_NUM_R14 = 0x06, + CSR_NUM_R15 = 0x07, +#endif // TARGET_AMD64 +}; + +enum CalleeSavedRegMask +{ + CSR_MASK_NONE = 0x00, + CSR_MASK_RBX = 0x01, + CSR_MASK_RSI = 0x02, + CSR_MASK_RDI = 0x04, + CSR_MASK_RBP = 0x08, + CSR_MASK_R12 = 0x10, + CSR_MASK_R13 = 0x20, + CSR_MASK_R14 = 0x40, + CSR_MASK_R15 = 0x80, + +#ifdef TARGET_AMD64 + CSR_MASK_ALL = 0xFF, + CSR_MASK_HIGHEST = 0x80, +#else + CSR_MASK_ALL = 0x0F, + CSR_MASK_HIGHEST = 0x08, +#endif +}; + +enum ScratchRegNum +{ + SR_NUM_RAX = 0x00, + SR_NUM_RCX = 0x01, + SR_NUM_RDX = 0x02, +#ifdef TARGET_AMD64 + SR_NUM_R8 = 0x03, + SR_NUM_R9 = 0x04, + SR_NUM_R10 = 0x05, + SR_NUM_R11 = 0x06, +#endif // TARGET_AMD64 +}; + +enum ScratchRegMask +{ + SR_MASK_NONE = 0x00, + SR_MASK_RAX = 0x01, + SR_MASK_RCX = 0x02, + SR_MASK_RDX = 0x04, + SR_MASK_R8 = 0x08, + SR_MASK_R9 = 0x10, + SR_MASK_R10 = 0x20, + SR_MASK_R11 = 0x40, +}; + +#endif // TARGET_ARM + +struct GCInfoHeader +{ +private: + UInt16 prologSize : 6; // 0 [0:5] // @TODO: define an 'overflow' encoding for big prologs? + UInt16 hasFunclets : 1; // 0 [6] + UInt16 fixedEpilogSize : 6; // 0 [7] + 1 [0:4] '0' encoding implies that epilog size varies and is encoded for each epilog + UInt16 epilogCountSmall : 2; // 1 [5:6] '3' encoding implies the number of epilogs is encoded separately + UInt16 hasExtraData : 1; // 1 [7] 1: more data follows (dynamic alignment, GS cookie, common vars, etc.) + +#ifdef TARGET_ARM + UInt16 returnKind : 2; // 2 [0:1] one of: MethodReturnKind enum + UInt16 ebpFrame : 1; // 2 [2] on x64, this means "has frame pointer and it is RBP", on ARM R7 + UInt16 epilogAtEnd : 1; // 2 [3] + UInt16 hasFrameSize : 1; // 2 [4] 1: frame size is encoded below, 0: frame size is 0 + UInt16 calleeSavedRegMask : NUM_PRESERVED_REGS; // 2 [5:7] 3 [0:5] + UInt16 arm_areParmOrVfpRegsPushed:1; // 3 [6] 1: pushed param reg set (R0-R3) and pushed fp reg start and count are encoded below, 0: no pushed param or fp registers +#elif defined (TARGET_ARM64) + UInt16 returnKind : 2; // 2 [0:1] one of: MethodReturnKind enum + UInt16 ebpFrame : 1; // 2 [2] 1: has frame pointer and it is FP + UInt16 epilogAtEnd : 1; // 2 [3] + UInt16 hasFrameSize : 1; // 2 [4] 1: frame size is encoded below, 0: frame size is 0 + UInt16 arm64_longCsrMask : 1; // 2 [5] 1: high bits of calleeSavedRegMask are encoded below + UInt16 arm64_areParmOrVfpRegsPushed : 1; // 2 [6] 1: pushed param reg count (X0-X7) and pushed fp reg set (D8-D15) are encoded below, 0: no pushed param or fp registers + UInt16 arm64_calleeSavedRegMaskLow : NUM_PRESERVED_REGS_LOW; // 2 [7] 3 [0:7] +#else + UInt8 returnKind : 2; // 2 [0:1] one of: MethodReturnKind enum + UInt8 ebpFrame : 1; // 2 [2] on x64, this means "has frame pointer and it is RBP", on ARM R7 + UInt8 epilogAtEnd : 1; // 2 [3] +#ifdef TARGET_AMD64 + UInt8 hasFrameSize : 1; // 2 [4] 1: frame size is encoded below, 0: frame size is 0 + UInt8 x64_framePtrOffsetSmall : 2; // 2 [5:6] 00: framePtrOffset = 0x20 + // 01: framePtrOffset = 0x30 + // 10: framePtrOffset = 0x40 + // 11: a variable-length integer 'x64_frameOffset' follows. + UInt8 x64_hasSavedXmmRegs : 1; // 2 [7] any saved xmm registers? +#endif + // X86 X64 + UInt8 calleeSavedRegMask : NUM_PRESERVED_REGS; // 2 [4:7] 3 [0:7] + +#ifdef TARGET_X86 + UInt8 x86_argCountLow : 5; // 3 [0-4] expressed in pointer-sized units // @TODO: steal more bits here? + UInt8 x86_argCountIsLarge : 1; // 3 [5] if this bit is set, then the high 8 bits are encoded in x86_argCountHigh + UInt8 x86_hasStackChanges : 1; // 3 [6] x86-only, !ebpFrame-only, this method has pushes + // and pops in it, and a string follows this header + // which describes them + UInt8 hasFrameSize : 1; // 3 [7] 1: frame size is encoded below, 0: frame size is 0 +#endif +#endif + + // + // OPTIONAL FIELDS FOLLOW + // + // The following values are encoded with variable-length integers on disk, but are decoded into these + // fields in memory. + // + + // For ARM and ARM64 this field stores the offset of the callee-saved area relative to FP/SP + UInt32 frameSize; // expressed in pointer-sized units, only encoded if hasFrameSize==1 + // OPTIONAL: only encoded if returnKind = MRK_ReturnsToNative + UInt32 reversePinvokeFrameOffset; // expressed in pointer-sized units away from the frame pointer + +#ifdef TARGET_AMD64 + // OPTIONAL: only encoded if x64_framePtrOffsetSmall = 11 + // + // ENCODING NOTE: In the encoding, the variable-sized unsigned will be 7 less than the total number + // of 16-byte units that make up the frame pointer offset. + // + // In memory, this value will always be set and will always be the total number of 16-byte units that make + // up the frame pointer offset. + UInt8 x64_framePtrOffset; // expressed in 16-byte unit + + // OPTIONAL: only encoded using a variable-sized unsigned if x64_hasSavedXmmRegs is set. + // + // An additional optimization is possible because registers xmm0 .. xmm5 should never be saved, + // so they are not encoded in the variable-sized unsigned - instead the mask is shifted right 6 bits + // for encoding. Thus, any subset of registers xmm6 .. xmm12 can be represented using one byte + // - this covers the most frequent cases. + // + // The shift applies to decoding/encoding only though - the actual header field below uses the + // straightforward mapping where bit 0 corresponds to xmm0, bit 1 corresponds to xmm1 and so on. + // + UInt16 x64_savedXmmRegMask; // which xmm regs were saved +#elif defined(TARGET_X86) + // OPTIONAL: only encoded if x86_argCountIsLarge = 1 + // NOTE: because we are using pointer-sized units, only 14 bits are required to represent the entire range + // that can be expressed by a 'ret NNNN' instruction. Therefore, with 6 in the 'low' field and 8 in the + // 'high' field, we are not losing any range here. (Although the need for that full range is debatable.) + UInt8 x86_argCountHigh; +#elif defined(TARGET_ARM) + // OPTIONAL: only encoded if arm_areParmOrVfpRegsPushed = 1 + UInt8 arm_parmRegsPushedSet; + UInt8 arm_vfpRegFirstPushed; + UInt8 arm_vfpRegPushedCount; +#elif defined(TARGET_ARM64) + // OPTIONAL: high bits of calleeSavedRegMask are encoded only if arm64_longCsrMask = 1; low bits equal to arm64_calleeSavedRegMaskLow + UInt16 calleeSavedRegMask; + + // OPTIONAL: only encoded if arm64_areParmOrVfpRegsPushed = 1 + UInt8 arm64_parmRegsPushedCount; // how many of X0-X7 registers are saved + UInt8 arm64_vfpRegsPushedMask; // which of D8-D15 registers are saved +#endif + + // + // OPTIONAL: only encoded if hasExtraData = 1 + union + { + struct + { +#if defined(TARGET_ARM64) + UInt8 FPLRAreOnTop : 1; // [0] 1: FP and LR are saved on top of locals, not at the bottom (see MdmSaveFPAndLRAtTopOfLocalsArea) + UInt8 reg1ReturnKind : 2; // [1:2] One of MRK_Returns{Scalar|Object|Byref} constants describing value returned in x1 if any + UInt8 hasGSCookie : 1; // [3] 1: frame uses GS cookie + UInt8 hasCommonVars : 1; // [4] 1: method has a list of "common vars" + // as an optimization for methods with many call sites and variables + UInt8 : 3; // [5:7] unused bits +#else + UInt8 logStackAlignment : 4; // [0:3] binary logarithm of frame alignment (3..15) or 0 + UInt8 hasGSCookie : 1; // [4] 1: frame uses GS cookie + UInt8 hasCommonVars : 1; // [5] 1: method has a list of "common vars" + // as an optimization for methods with many call sites and variables + UInt8 : 2; // [6:7] unused bits +#endif +#pragma warning(suppress:4201) // nameless struct + }; + UInt8 extraDataHeader; + }; + + // OPTIONAL: only encoded if logStackAlignment != 0 + UInt8 paramPointerReg; + + // OPTIONAL: only encoded if epilogCountSmall = 3 + UInt16 epilogCount; + + // OPTIONAL: only encoded if gsCookie = 1 + UInt32 gsCookieOffset; // expressed in pointer-sized units away from the frame pointer + + // + // OPTIONAL: only encoded if hasFunclets = 1 + // {numFunclets} // encoded as variable-length unsigned + // {start-funclet0} // offset from start of previous funclet, encoded as variable-length unsigned + // {start-funclet1} // + // {start-funclet2} + // ... + // {sizeof-funclet(N-1)} // numFunclets == N (i.e. there are N+1 sizes here) + // ----------------- + // {GCInfoHeader-funclet0} // encoded as normal, must not have 'hasFunclets' set. + // {GCInfoHeader-funclet1} + // ... + // {GCInfoHeader-funclet(N-1)} + + // WARNING: + // WARNING: Do not add fields to the file-format after the funclet header encodings -- these are decoded + // WARNING: recursively and 'in-place' when looking for the info associated with a funclet. Therefore, + // WARNING: in that case, we cannot easily continue to decode things associated with the main body + // WARNING: GCInfoHeader once we start this recursive decode. + // WARNING: + + // ------------------------------------------------------------------------------------------------------- + // END of file-encoding-related-fields + // ------------------------------------------------------------------------------------------------------- + + // The following fields are not encoded in the file format, they are just used as convenience placeholders + // for decode state. + UInt32 funcletOffset; // non-zero indicates that this GCInfoHeader is for a funclet + +public: + // + // CONSTANTS / STATIC STUFF + // + + enum MethodReturnKind + { + MRK_ReturnsScalar = 0, + MRK_ReturnsObject = 1, + MRK_ReturnsByref = 2, + MRK_ReturnsToNative = 3, + +#if defined(TARGET_ARM64) + // Cases for structs returned in two registers. + // Naming scheme: MRK_reg0Kind_reg1Kind. + // Encoding scheme: . + // We do not distinguish returning a scalar in reg1 and no return value in reg1, + // which means we can use MRK_ReturnsObject for MRK_Obj_Scalar, etc. + MRK_Scalar_Obj = (MRK_ReturnsObject << 2) | MRK_ReturnsScalar, + MRK_Obj_Obj = (MRK_ReturnsObject << 2) | MRK_ReturnsObject, + MRK_Byref_Obj = (MRK_ReturnsObject << 2) | MRK_ReturnsByref, + MRK_Scalar_Byref = (MRK_ReturnsByref << 2) | MRK_ReturnsScalar, + MRK_Obj_Byref = (MRK_ReturnsByref << 2) | MRK_ReturnsObject, + MRK_Byref_Byref = (MRK_ReturnsByref << 2) | MRK_ReturnsByref, + + MRK_LastValid = MRK_Byref_Byref, + // Illegal or uninitialized value. Never written to the image. + MRK_Unknown = 0xff, +#else + MRK_LastValid = MRK_ReturnsToNative, + // Illegal or uninitialized value. Never written to the image. + MRK_Unknown = 4, +#endif + }; + + enum EncodingConstants + { + EC_SizeOfFixedHeader = 4, + EC_MaxFrameByteSize = 10*1024*1024, + EC_MaxReversePInvokeFrameByteOffset = 10*1024*1024, + EC_MaxX64FramePtrByteOffset = UInt16_MAX * 0x10, + EC_MaxEpilogCountSmall = 3, + EC_MaxEpilogCount = 64*1024 - 1, + }; + + // + // MEMBER FUNCTIONS + // + + void Init() + { + memset(this, 0, sizeof(GCInfoHeader)); + } + + // + // SETTERS + // + + void SetPrologSize(UInt32 sizeInBytes) + { +#if defined (TARGET_ARM64) + // For arm64 we encode multiples of 4, rather than raw bytes, since instructions are all same size. + ASSERT((sizeInBytes & 3) == 0); + prologSize = sizeInBytes >> 2; + ASSERT(prologSize == sizeInBytes >> 2); +#else + prologSize = sizeInBytes; + ASSERT(prologSize == sizeInBytes); +#endif + } + + void SetHasFunclets(bool fHasFunclets) + { + hasFunclets = fHasFunclets ? 1 : 0; + } + + void PokeFixedEpilogSize(UInt32 sizeInBytes) + { +#if defined (TARGET_ARM64) + // For arm64 we encode multiples of 4, rather than raw bytes, since instructions are all same size. + ASSERT((sizeInBytes & 3) == 0); + fixedEpilogSize = sizeInBytes >> 2; + ASSERT(fixedEpilogSize == sizeInBytes >> 2); +#else + fixedEpilogSize = sizeInBytes; + ASSERT(fixedEpilogSize == sizeInBytes); +#endif + } + + void SetFixedEpilogSize(UInt32 sizeInBytes, bool varyingSizes) + { + if (varyingSizes) + fixedEpilogSize = 0; + else + { + ASSERT(sizeInBytes != 0); +#if defined (TARGET_ARM64) + // For arm64 we encode multiples of 4, rather than raw bytes, since instructions are all same size. + ASSERT((sizeInBytes & 3) == 0); + fixedEpilogSize = sizeInBytes >> 2; + ASSERT(fixedEpilogSize == sizeInBytes >> 2); +#else + fixedEpilogSize = sizeInBytes; + ASSERT(fixedEpilogSize == sizeInBytes); +#endif + } + } + + void SetEpilogCount(UInt32 count, bool isAtEnd) + { + epilogCount = ToUInt16(count); + epilogAtEnd = isAtEnd ? 1 : 0; + + ASSERT(epilogCount == count); + ASSERT((count == 1) || !isAtEnd); + epilogCountSmall = count < EC_MaxEpilogCountSmall ? count : EC_MaxEpilogCountSmall; + } + +#if !defined(TARGET_ARM64) + void SetReturnKind(MethodReturnKind kind) + { + ASSERT(kind <= MRK_ReturnsToNative); // not enough bits to encode 'unknown' + returnKind = kind; + } + + void SetDynamicAlignment(UInt8 logByteAlignment) + { +#ifdef TARGET_X86 + ASSERT(logByteAlignment >= 3); // 4 byte aligned frames +#else + ASSERT(logByteAlignment >= 4); // 8 byte aligned frames +#endif + + hasExtraData = 1; + logStackAlignment = logByteAlignment; + ASSERT(logStackAlignment == logByteAlignment); + paramPointerReg = RN_NONE; + } +#endif // !defined(TARGET_ARM64) + +#if defined(TARGET_ARM64) + void SetFPLROnTop(void) + { + hasExtraData = 1; + FPLRAreOnTop = 1; + } +#endif + + void SetGSCookieOffset(UInt32 offsetInBytes) + { + ASSERT(offsetInBytes != 0); + ASSERT(0 == (offsetInBytes % POINTER_SIZE)); + hasExtraData = 1; + hasGSCookie = 1; + gsCookieOffset = offsetInBytes / POINTER_SIZE; + } + + void SetHasCommonVars() + { + hasExtraData = 1; + hasCommonVars = 1; + } + + void SetParamPointer(RegNumber regNum, UInt32 offsetInBytes, bool isOffsetFromSP = false) + { + UNREFERENCED_PARAMETER(offsetInBytes); + UNREFERENCED_PARAMETER(isOffsetFromSP); + ASSERT(HasDynamicAlignment()); // only expected for dynamic aligned frames + ASSERT(offsetInBytes==0); // not yet supported + + paramPointerReg = (UInt8)regNum; + } + + void SetFramePointer(RegNumber regNum, UInt32 offsetInBytes, bool isOffsetFromSP = false) + { + UNREFERENCED_PARAMETER(offsetInBytes); + UNREFERENCED_PARAMETER(isOffsetFromSP); + + if (regNum == RN_NONE) + { + ebpFrame = 0; + } + else + { +#ifdef TARGET_ARM + ASSERT(regNum == RN_R7); +#elif defined(TARGET_AMD64) || defined(TARGET_X86) + ASSERT(regNum == RN_EBP); +#elif defined(TARGET_ARM64) + ASSERT(regNum == RN_FP); +#else + ASSERT(!"NYI"); +#endif + ebpFrame = 1; + } + ASSERT(offsetInBytes == 0 || isOffsetFromSP); + +#ifdef TARGET_AMD64 + if (isOffsetFromSP) + offsetInBytes += SKEW_FOR_OFFSET_FROM_SP; + + ASSERT((offsetInBytes % 0x10) == 0); + UInt32 offsetInSlots = offsetInBytes / 0x10; + if (offsetInSlots >= 3 && offsetInSlots <= 3 + 2) + { + x64_framePtrOffsetSmall = offsetInSlots - 3; + } + else + { + x64_framePtrOffsetSmall = 3; + } + x64_framePtrOffset = (UInt8)offsetInSlots; + ASSERT(x64_framePtrOffset == offsetInSlots); +#else + ASSERT(offsetInBytes == 0 && !isOffsetFromSP); +#endif // TARGET_AMD64 + } + + void SetFrameSize(UInt32 frameSizeInBytes) + { + ASSERT(0 == (frameSizeInBytes % POINTER_SIZE)); + frameSize = (frameSizeInBytes / POINTER_SIZE); + ASSERT(frameSize == (frameSizeInBytes / POINTER_SIZE)); + if (frameSize != 0) + { + hasFrameSize = 1; + } + } + + void SetSavedRegs(CalleeSavedRegMask regMask) + { + calleeSavedRegMask = (UInt16)regMask; + } + + void SetRegSaved(CalleeSavedRegMask regMask) + { + calleeSavedRegMask |= regMask; + } + + void SetReversePinvokeFrameOffset(int offsetInBytes) + { + ASSERT(HasFramePointer()); + ASSERT((offsetInBytes % POINTER_SIZE) == 0); + ASSERT(GetReturnKind() == MRK_ReturnsToNative); + +#if defined(TARGET_ARM) || defined(TARGET_AMD64) || defined(TARGET_ARM64) + // The offset can be either positive or negative on ARM and x64. + bool isNeg = (offsetInBytes < 0); + UInt32 uOffsetInBytes = isNeg ? -offsetInBytes : offsetInBytes; + UInt32 uEncodedVal = ((uOffsetInBytes / POINTER_SIZE) << 1) | (isNeg ? 1 : 0); + reversePinvokeFrameOffset = uEncodedVal; + ASSERT(reversePinvokeFrameOffset == uEncodedVal); +#elif defined (TARGET_X86) + // Use a positive number because it encodes better and + // the offset is always negative on x86. + ASSERT(offsetInBytes < 0); + reversePinvokeFrameOffset = (-offsetInBytes / POINTER_SIZE); + ASSERT(reversePinvokeFrameOffset == (UInt32)(-offsetInBytes / POINTER_SIZE)); +#else + ASSERT(!"NYI"); +#endif + } + +#ifdef TARGET_X86 + void SetReturnPopSize(UInt32 popSizeInBytes) + { + ASSERT(0 == (popSizeInBytes % POINTER_SIZE)); + ASSERT(GetReturnPopSize() == 0 || GetReturnPopSize() == (int)popSizeInBytes); + + UInt32 argCount = popSizeInBytes / POINTER_SIZE; + x86_argCountLow = argCount & 0x1F; + if (argCount != x86_argCountLow) + { + x86_argCountIsLarge = 1; + x86_argCountHigh = (UInt8)(argCount >> 5); + } + } + + void SetHasStackChanges() + { + x86_hasStackChanges = 1; + } +#endif // TARGET_X86 + +#ifdef TARGET_ARM + void SetParmRegsPushed(ScratchRegMask pushedParmRegs) + { + // should be a subset of {RO-R3} + ASSERT((pushedParmRegs & ~(SR_MASK_R0|SR_MASK_R1|SR_MASK_R2|SR_MASK_R3)) == 0); + arm_areParmOrVfpRegsPushed = pushedParmRegs != 0 || arm_vfpRegPushedCount != 0; + arm_parmRegsPushedSet = (UInt8)pushedParmRegs; + } + + void SetVfpRegsPushed(UInt8 vfpRegFirstPushed, UInt8 vfpRegPushedCount) + { + // mrt100.dll really only supports pushing a subinterval of d8-d15 + // these are the preserved floating point registers according to the ABI spec + ASSERT(8 <= vfpRegFirstPushed && vfpRegFirstPushed + vfpRegPushedCount <= 16 || vfpRegPushedCount == 0); + arm_vfpRegFirstPushed = vfpRegFirstPushed; + arm_vfpRegPushedCount = vfpRegPushedCount; + arm_areParmOrVfpRegsPushed = arm_parmRegsPushedSet != 0 || vfpRegPushedCount != 0; + } +#elif defined(TARGET_ARM64) + void SetParmRegsPushedCount(UInt8 parmRegsPushedCount) + { + // pushed parameter registers are a subset of {R0-R7} + ASSERT(parmRegsPushedCount <= 8); + arm64_parmRegsPushedCount = parmRegsPushedCount; + arm64_areParmOrVfpRegsPushed = (arm64_parmRegsPushedCount != 0) || (arm64_vfpRegsPushedMask != 0); + } + + void SetVfpRegsPushed(UInt8 vfpRegsPushedMask) + { + arm64_vfpRegsPushedMask = vfpRegsPushedMask; + arm64_areParmOrVfpRegsPushed = (arm64_parmRegsPushedCount != 0) || (arm64_vfpRegsPushedMask != 0); + } +#elif defined(TARGET_AMD64) + void SetSavedXmmRegs(UInt32 savedXmmRegMask) + { + // any subset of xmm6-xmm15 may be saved, but no registers in xmm0-xmm5 should be present + ASSERT((savedXmmRegMask & 0xffff003f) == 0); + x64_hasSavedXmmRegs = savedXmmRegMask != 0; + x64_savedXmmRegMask = (UInt16)savedXmmRegMask; + } +#endif + + void SetFuncletOffset(UInt32 offset) + { + funcletOffset = offset; + } + + // + // GETTERS + // + UInt32 GetPrologSize() + { +#if defined (TARGET_ARM64) + return prologSize << 2; +#else + return prologSize; +#endif + } + + bool HasFunclets() + { + return (hasFunclets != 0); + } + + bool HasVaryingEpilogSizes() + { + return fixedEpilogSize == 0; + } + + UInt32 PeekFixedEpilogSize() + { +#if defined (TARGET_ARM64) + return fixedEpilogSize << 2; +#else + return fixedEpilogSize; +#endif + } + + UInt32 GetFixedEpilogSize() + { + ASSERT(!HasVaryingEpilogSizes()); +#if defined (TARGET_ARM64) + return fixedEpilogSize << 2; +#else + return fixedEpilogSize; +#endif + } + + UInt32 GetEpilogCount() + { + return epilogCount; + } + + bool IsEpilogAtEnd() + { + return (epilogAtEnd != 0); + } + + MethodReturnKind GetReturnKind() + { +#if defined(TARGET_ARM64) + return (MethodReturnKind)((reg1ReturnKind << 2) | returnKind); +#else + return (MethodReturnKind)returnKind; +#endif + } + + bool ReturnsToNative() + { + return (GetReturnKind() == MRK_ReturnsToNative); + } + + bool HasFramePointer() const + { + return !!ebpFrame; + } + + bool IsFunclet() + { + return funcletOffset != 0; + } + + UInt32 GetFuncletOffset() + { + return funcletOffset; + } + + int GetPreservedRegsSaveSize() const // returned in bytes + { + UInt32 count = 0; + UInt32 mask = calleeSavedRegMask; + while (mask != 0) + { + count += mask & 1; + mask >>= 1; + } + + return count * POINTER_SIZE; + } + + int GetParamPointerReg() + { + return paramPointerReg; + } + + bool HasDynamicAlignment() + { +#if defined(TARGET_ARM64) + return false; +#else + return !!logStackAlignment; +#endif + } + + UInt32 GetDynamicAlignment() + { +#if defined(TARGET_ARM64) + ASSERT(!"Not supported"); + return 1; +#else + return 1 << logStackAlignment; +#endif + } + + bool HasGSCookie() + { + return hasGSCookie; + } + +#if defined(TARGET_ARM64) + bool AreFPLROnTop() const + { + return FPLRAreOnTop; + } +#endif + + UInt32 GetGSCookieOffset() + { + ASSERT(hasGSCookie); + return gsCookieOffset * POINTER_SIZE; + } + + bool HasCommonVars() const + { + return hasCommonVars; + } + +#ifdef TARGET_AMD64 + static const UInt32 SKEW_FOR_OFFSET_FROM_SP = 0x10; + + int GetFramePointerOffset() const // returned in bytes + { + // traditional frames where FP points to the pushed FP have fp offset == 0 + if (x64_framePtrOffset == 0) + return 0; + + // otherwise it's an x64 style frame where the fp offset is measured from the sp + // at the end of the prolog + int offsetFromSP = GetFramePointerOffsetFromSP(); + + int preservedRegsSaveSize = GetPreservedRegsSaveSize(); + + // we when called from the binder, rbp isn't set to be a preserved reg, + // when called from the runtime, it is - compensate for this inconsistency + if (IsRegSaved(CSR_MASK_RBP)) + preservedRegsSaveSize -= POINTER_SIZE; + + return offsetFromSP - preservedRegsSaveSize - GetFrameSize(); + } + + bool IsFramePointerOffsetFromSP() const + { + return x64_framePtrOffset != 0; + } + + int GetFramePointerOffsetFromSP() const + { + ASSERT(IsFramePointerOffsetFromSP()); + int offsetFromSP; + offsetFromSP = x64_framePtrOffset * 0x10; + ASSERT(offsetFromSP >= SKEW_FOR_OFFSET_FROM_SP); + offsetFromSP -= SKEW_FOR_OFFSET_FROM_SP; + + return offsetFromSP; + } + + int GetFramePointerReg() + { + return RN_EBP; + } + + bool HasSavedXmmRegs() + { + return x64_hasSavedXmmRegs != 0; + } + + UInt16 GetSavedXmmRegMask() + { + ASSERT(x64_hasSavedXmmRegs); + return x64_savedXmmRegMask; + } +#elif defined(TARGET_X86) + int GetReturnPopSize() // returned in bytes + { + if (!x86_argCountIsLarge) + { + return x86_argCountLow * POINTER_SIZE; + } + return ((x86_argCountHigh << 5) | x86_argCountLow) * POINTER_SIZE; + } + + bool HasStackChanges() + { + return !!x86_hasStackChanges; + } +#endif + + int GetFrameSize() const + { + return frameSize * POINTER_SIZE; + } + + + int GetReversePinvokeFrameOffset() + { +#if defined(TARGET_ARM) || defined(TARGET_AMD64) || defined(TARGET_ARM64) + // The offset can be either positive or negative on ARM. + Int32 offsetInBytes; + UInt32 uEncodedVal = reversePinvokeFrameOffset; + bool isNeg = ((uEncodedVal & 1) == 1); + offsetInBytes = (uEncodedVal >> 1) * POINTER_SIZE; + offsetInBytes = isNeg ? -offsetInBytes : offsetInBytes; + return offsetInBytes; +#elif defined(TARGET_X86) + // it's always at "EBP - something", so we encode it as a positive + // number and then apply the negative here. + int unsignedOffset = reversePinvokeFrameOffset * POINTER_SIZE; + return -unsignedOffset; +#else + ASSERT(!"NYI"); +#endif + } + + CalleeSavedRegMask GetSavedRegs() + { + return (CalleeSavedRegMask) calleeSavedRegMask; + } + + bool IsRegSaved(CalleeSavedRegMask reg) const + { + return (0 != (calleeSavedRegMask & reg)); + } + +#ifdef TARGET_ARM + bool AreParmRegsPushed() + { + return arm_parmRegsPushedSet != 0; + } + + UInt16 ParmRegsPushedCount() + { + UInt8 set = arm_parmRegsPushedSet; + UInt8 count = 0; + while (set != 0) + { + count += set & 1; + set >>= 1; + } + return count; + } + + UInt8 GetVfpRegFirstPushed() + { + return arm_vfpRegFirstPushed; + } + + UInt8 GetVfpRegPushedCount() + { + return arm_vfpRegPushedCount; + } +#elif defined(TARGET_ARM64) + UInt8 ParmRegsPushedCount() + { + return arm64_parmRegsPushedCount; + } + + UInt8 GetVfpRegsPushedMask() + { + return arm64_vfpRegsPushedMask; + } +#endif + + // + // ENCODING HELPERS + // +#ifndef DACCESS_COMPILE + size_t EncodeHeader(UInt8 * & pDest) + { +#ifdef _DEBUG + UInt8 * pStart = pDest; +#endif // _DEBUG + +#if defined(TARGET_ARM64) + UInt8 calleeSavedRegMaskHigh = calleeSavedRegMask >> NUM_PRESERVED_REGS_LOW; + arm64_calleeSavedRegMaskLow = calleeSavedRegMask & MASK_PRESERVED_REGS_LOW; + if (calleeSavedRegMaskHigh) + { + arm64_longCsrMask = 1; + } +#endif + + size_t size = EC_SizeOfFixedHeader; + if (pDest) + { + memcpy(pDest, this, EC_SizeOfFixedHeader); + pDest += EC_SizeOfFixedHeader; + } + + if (hasFrameSize) + size += WriteUnsigned(pDest, frameSize); + + if (returnKind == MRK_ReturnsToNative) + size += WriteUnsigned(pDest, reversePinvokeFrameOffset); + +#ifdef TARGET_AMD64 + if (x64_framePtrOffsetSmall == 0x3) + size += WriteUnsigned(pDest, x64_framePtrOffset); + + if (x64_hasSavedXmmRegs) + { + ASSERT((x64_savedXmmRegMask & 0x3f) == 0); + UInt32 encodedValue = x64_savedXmmRegMask >> 6; + size += WriteUnsigned(pDest, encodedValue); + } +#elif defined(TARGET_X86) + if (x86_argCountIsLarge) + { + size += 1; + if (pDest) + *pDest++ = x86_argCountHigh; + } + ASSERT(!x86_hasStackChanges || !"NYI -- stack changes for ESP frames"); +#elif defined(TARGET_ARM) + if (arm_areParmOrVfpRegsPushed) + { + // we encode a bit field where the low 4 bits represent the pushed parameter register + // set, the next 8 bits are the number of pushed floating point registers, and the highest + // bits are the first pushed floating point register plus 1. + // The 0 encoding means the first floating point register is 8 as this is the most frequent. + UInt32 encodedValue = arm_parmRegsPushedSet | (arm_vfpRegPushedCount << 4); + // usually, the first pushed floating point register is d8 + if (arm_vfpRegFirstPushed != 8) + encodedValue |= (arm_vfpRegFirstPushed+1) << (8+4); + + size += WriteUnsigned(pDest, encodedValue); + } +#elif defined(TARGET_ARM64) + if (calleeSavedRegMaskHigh) + { + size += 1; + if (pDest) + *pDest++ = calleeSavedRegMaskHigh; + } + + if (arm64_areParmOrVfpRegsPushed) + { + // At present arm64_parmRegsPushedCount is non-zero only for variadic functions, so place this field higher + UInt32 encodedValue = arm64_vfpRegsPushedMask | (arm64_parmRegsPushedCount << 8); + size += WriteUnsigned(pDest, encodedValue); + } +#endif + + // encode dynamic alignment and GS cookie information + if (hasExtraData) + { + size += WriteUnsigned(pDest, extraDataHeader); + } + if (HasDynamicAlignment()) + { + size += WriteUnsigned(pDest, paramPointerReg); + } + if (hasGSCookie) + { + size += WriteUnsigned(pDest, gsCookieOffset); + } + + if (epilogCountSmall == EC_MaxEpilogCountSmall) + { + size += WriteUnsigned(pDest, epilogCount); + } + + // WARNING: + // WARNING: Do not add fields to the file-format after the funclet header encodings -- these are + // WARNING: decoded recursively and 'in-place' when looking for the info associated with a funclet. + // WARNING: Therefore, in that case, we cannot easily continue to decode things associated with the + // WARNING: main body GCInfoHeader once we start this recursive decode. + // WARNING: + size += EncodeFuncletInfo(pDest); + +#ifdef _DEBUG + ASSERT(!pDest || (size == (size_t)(pDest - pStart))); +#endif // _DEBUG + + return size; + } + + size_t WriteUnsigned(UInt8 * & pDest, UInt32 value) + { + size_t size = (size_t)VarInt::WriteUnsigned(pDest, value); + pDest = pDest ? (pDest + size) : pDest; + return size; + } +#endif // DACCESS_COMPILE + + UInt16 ToUInt16(UInt32 val) + { + UInt16 result = (UInt16)val; + ASSERT(val == result); + return result; + } + + UInt8 ToUInt8(UInt32 val) + { + UInt8 result = (UInt8)val; + ASSERT(val == result); + return result; + } + + // + // DECODING HELPERS + // + // Returns a pointer to the 'stack change string' on x86. + PTR_UInt8 DecodeHeader(UInt32 methodOffset, PTR_UInt8 pbHeaderEncoding, size_t* pcbHeader) + { + PTR_UInt8 pbStackChangeString = NULL; + + TADDR pbTemp = PTR_TO_TADDR(pbHeaderEncoding); + memcpy(this, PTR_READ(pbTemp, EC_SizeOfFixedHeader), EC_SizeOfFixedHeader); + + PTR_UInt8 pbDecode = pbHeaderEncoding + EC_SizeOfFixedHeader; + frameSize = hasFrameSize + ? VarInt::ReadUnsigned(pbDecode) + : 0; + + reversePinvokeFrameOffset = (returnKind == MRK_ReturnsToNative) + ? VarInt::ReadUnsigned(pbDecode) + : 0; + +#ifdef TARGET_AMD64 + x64_framePtrOffset = (x64_framePtrOffsetSmall == 0x3) + ? ToUInt8(VarInt::ReadUnsigned(pbDecode)) + : x64_framePtrOffsetSmall + 3; + + + x64_savedXmmRegMask = 0; + if (x64_hasSavedXmmRegs) + { + UInt32 encodedValue = VarInt::ReadUnsigned(pbDecode); + ASSERT((encodedValue & ~0x3ff) == 0); + x64_savedXmmRegMask = ToUInt16(encodedValue << 6); + } + +#elif defined(TARGET_X86) + if (x86_argCountIsLarge) + x86_argCountHigh = *pbDecode++; + else + x86_argCountHigh = 0; + + if (x86_hasStackChanges) + { + pbStackChangeString = pbDecode; + + bool last = false; + while (!last) + { + UInt8 b = *pbDecode++; + // 00111111 {delta} forwarder + // 00dddddd push 1, dddddd = delta + // nnnldddd pop nnn-1, l = last, dddd = delta (nnn=0 and nnn=1 are disallowed) + if (b == 0x3F) + { + // 00111111 {delta} forwarder + VarInt::ReadUnsigned(pbDecode); + } + else if (0 != (b & 0xC0)) + { + // nnnldddd pop nnn-1, l = last, dddd = delta (nnn=0 and nnn=1 are disallowed) + last = ((b & 0x10) == 0x10); + } + } + } +#elif defined(TARGET_ARM) + arm_parmRegsPushedSet = 0; + arm_vfpRegPushedCount = 0; + arm_vfpRegFirstPushed = 0; + if (arm_areParmOrVfpRegsPushed) + { + UInt32 encodedValue = VarInt::ReadUnsigned(pbDecode); + arm_parmRegsPushedSet = encodedValue & 0x0f; + arm_vfpRegPushedCount = (UInt8)(encodedValue >> 4); + UInt32 vfpRegFirstPushed = encodedValue >> (8 + 4); + if (vfpRegFirstPushed == 0) + arm_vfpRegFirstPushed = 8; + else + arm_vfpRegFirstPushed = (UInt8)(vfpRegFirstPushed - 1); + } +#elif defined(TARGET_ARM64) + calleeSavedRegMask = arm64_calleeSavedRegMaskLow; + if (arm64_longCsrMask) + { + calleeSavedRegMask |= (*pbDecode++ << NUM_PRESERVED_REGS_LOW); + } + + arm64_parmRegsPushedCount = 0; + arm64_vfpRegsPushedMask = 0; + if (arm64_areParmOrVfpRegsPushed) + { + UInt32 encodedValue = VarInt::ReadUnsigned(pbDecode); + arm64_vfpRegsPushedMask = (UInt8)encodedValue; + arm64_parmRegsPushedCount = (UInt8)(encodedValue >> 8); + ASSERT(arm64_parmRegsPushedCount <= 8); + } +#endif + + extraDataHeader = hasExtraData ? ToUInt8(VarInt::ReadUnsigned(pbDecode)) : 0; + paramPointerReg = HasDynamicAlignment() ? ToUInt8(VarInt::ReadUnsigned(pbDecode)) : (UInt8)RN_NONE; + gsCookieOffset = hasGSCookie ? VarInt::ReadUnsigned(pbDecode) : 0; + + epilogCount = epilogCountSmall < EC_MaxEpilogCountSmall ? epilogCountSmall : ToUInt16(VarInt::ReadUnsigned(pbDecode)); + + this->funcletOffset = 0; + if (hasFunclets) + { + // WORKAROUND: Epilog tables are still per-method instead of per-funclet, but we don't deal with + // them here. So we will simply overwrite the funclet's epilogAtEnd and epilogCount + // with the values from the main code body -- these were the values used to generate + // the per-method epilog table, so at least we're consistent with what is encoded. + UInt8 mainEpilogAtEnd = epilogAtEnd; + UInt16 mainEpilogCount = epilogCount; + UInt16 mainFixedEpilogSize = fixedEpilogSize; // Either in bytes or in instructions + UInt8 mainHasCommonVars = hasCommonVars; + // ------- + + int nFunclets = (int)VarInt::ReadUnsigned(pbDecode); + int idxFunclet = -2; + UInt32 offsetFunclet = 0; + // Decode the funclet start offsets, remembering which one is of interest. + UInt32 prevFuncletStart = 0; + for (int i = 0; i < nFunclets; i++) + { + UInt32 offsetThisFunclet = prevFuncletStart + VarInt::ReadUnsigned(pbDecode); + if ((idxFunclet == -2) && (methodOffset < offsetThisFunclet)) + { + idxFunclet = (i - 1); + offsetFunclet = prevFuncletStart; + } + prevFuncletStart = offsetThisFunclet; + } + if ((idxFunclet == -2) && (methodOffset >= prevFuncletStart)) + { + idxFunclet = (nFunclets - 1); + offsetFunclet = prevFuncletStart; + } + + // Now decode headers until we find the one we want. Keep decoding if we need to report a size. + if (pcbHeader || (idxFunclet >= 0)) + { + for (int i = 0; i < nFunclets; i++) + { + size_t hdrSize; + if (i == idxFunclet) + { + this->DecodeHeader(methodOffset, pbDecode, &hdrSize); + pbDecode += hdrSize; + this->funcletOffset = offsetFunclet; + if (!pcbHeader) // if nobody is going to look at the header size, we don't need to keep going + break; + } + else + { + // keep decoding into a temp just to get the right header size + GCInfoHeader tmp; + tmp.DecodeHeader(methodOffset, pbDecode, &hdrSize); + pbDecode += hdrSize; + } + } + } + + // WORKAROUND: see above + this->epilogAtEnd = mainEpilogAtEnd; + this->epilogCount = mainEpilogCount; + this->PokeFixedEpilogSize(mainFixedEpilogSize); + this->hasCommonVars = mainHasCommonVars; + + // ------- + } + + // WARNING: + // WARNING: Do not add fields to the file-format after the funclet header encodings -- these are + // WARNING: decoded recursively and 'in-place' when looking for the info associated with a funclet. + // WARNING: Therefore, in that case, we cannot easily continue to decode things associated with the + // WARNING: main body GCInfoHeader once we start this recursive decode. + // WARNING: + + if (pcbHeader) + *pcbHeader = pbDecode - pbHeaderEncoding; + + return pbStackChangeString; + } + + void GetFuncletInfo(PTR_UInt8 pbHeaderEncoding, UInt32* pnFuncletsOut, PTR_UInt8* pEncodedFuncletStartOffsets) + { + ASSERT(hasFunclets); + + PTR_UInt8 pbDecode = pbHeaderEncoding + EC_SizeOfFixedHeader; + if (hasFrameSize) { VarInt::SkipUnsigned(pbDecode); } + if (returnKind == MRK_ReturnsToNative) { VarInt::SkipUnsigned(pbDecode); } + if (hasExtraData) { VarInt::SkipUnsigned(pbDecode); } + if (HasDynamicAlignment()) { VarInt::SkipUnsigned(pbDecode); } + if (hasGSCookie) { VarInt::SkipUnsigned(pbDecode); } + +#ifdef TARGET_AMD64 + if (x64_framePtrOffsetSmall == 0x3) { VarInt::SkipUnsigned(pbDecode); } +#elif defined(TARGET_X86) + if (x86_argCountIsLarge) + pbDecode++; + + if (x86_hasStackChanges) + { + bool last = false; + while (!last) + { + UInt8 b = *pbDecode++; + // 00111111 {delta} forwarder + // 00dddddd push 1, dddddd = delta + // nnnldddd pop nnn-1, l = last, dddd = delta (nnn=0 and nnn=1 are disallowed) + if (b == 0x3F) + { + // 00111111 {delta} forwarder + VarInt::SkipUnsigned(pbDecode); + } + else if (0 != (b & 0xC0)) + { + // nnnldddd pop nnn-1, l = last, dddd = delta (nnn=0 and nnn=1 are disallowed) + last = ((b & 0x10) == 0x10); + } + } + } +#elif defined(TARGET_ARM) + if (arm_areParmOrVfpRegsPushed) { VarInt::SkipUnsigned(pbDecode); } +#elif defined(TARGET_ARM64) + if (arm64_longCsrMask) { pbDecode++; } + if (arm64_areParmOrVfpRegsPushed) { VarInt::SkipUnsigned(pbDecode); } +#endif + + *pnFuncletsOut = VarInt::ReadUnsigned(pbDecode); + *pEncodedFuncletStartOffsets = pbDecode; + } + + bool IsValidEpilogOffset(UInt32 epilogOffset, UInt32 epilogSize) + { + if (!this->HasVaryingEpilogSizes()) + return (epilogOffset < this->GetFixedEpilogSize()); + else + return (epilogOffset < epilogSize); + } +}; + +/*****************************************************************************/ +#endif //_GCINFO_H_ +/*****************************************************************************/ diff --git a/src/coreclr/src/nativeaot/Runtime/inc/rhbinder.h b/src/coreclr/src/nativeaot/Runtime/inc/rhbinder.h new file mode 100644 index 0000000000000..480aa8e8ce7a6 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/inc/rhbinder.h @@ -0,0 +1,664 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// +// This header contains binder-generated data structures that the runtime consumes. +// +#include "TargetPtrs.h" + +class GcPollInfo +{ +public: + static const UInt32 indirCellsPerBitmapBit = 64 / POINTER_SIZE; // one cache line per bit + + static const UInt32 cbChunkCommonCode_X64 = 17; + static const UInt32 cbChunkCommonCode_X86 = 16; + static const UInt32 cbChunkCommonCode_ARM = 32; +#ifdef TARGET_ARM + // on ARM, the index of the indirection cell can be computed + // from the pointer to the indirection cell left in R12, + // thus we need only one entry point on ARM, + // thus entries take no space, and you can have as many as you want + static const UInt32 cbEntry = 0; + static const UInt32 cbBundleCommonCode = 0; + static const UInt32 entriesPerBundle = 0x7fffffff; + static const UInt32 bundlesPerChunk = 0x7fffffff; + static const UInt32 entriesPerChunk = 0x7fffffff; +#else + static const UInt32 cbEntry = 4; // push imm8 / jmp rel8 + static const UInt32 cbBundleCommonCode = 5; // jmp rel32 + + static const UInt32 entriesPerSubBundlePos = 32; // for the half with forward jumps + static const UInt32 entriesPerSubBundleNeg = 30; // for the half with negative jumps + static const UInt32 entriesPerBundle = entriesPerSubBundlePos + entriesPerSubBundleNeg; + static const UInt32 bundlesPerChunk = 4; + static const UInt32 entriesPerChunk = bundlesPerChunk * entriesPerBundle; +#endif + + static const UInt32 cbFullBundle = cbBundleCommonCode + + (entriesPerBundle * cbEntry); + + static UInt32 EntryIndexToStubOffset(UInt32 entryIndex) + { +# if defined(TARGET_ARM) + return EntryIndexToStubOffset(entryIndex, cbChunkCommonCode_ARM); +# elif defined(TARGET_AMD64) + return EntryIndexToStubOffset(entryIndex, cbChunkCommonCode_X64); +# else + return EntryIndexToStubOffset(entryIndex, cbChunkCommonCode_X86); +# endif + } + + static UInt32 EntryIndexToStubOffset(UInt32 entryIndex, UInt32 cbChunkCommonCode) + { +# if defined(TARGET_ARM) + UNREFERENCED_PARAMETER(entryIndex); + UNREFERENCED_PARAMETER(cbChunkCommonCode); + + return 0; +# else + UInt32 cbFullChunk = cbChunkCommonCode + + (bundlesPerChunk * cbBundleCommonCode) + + (entriesPerChunk * cbEntry); + + UInt32 numFullChunks = entryIndex / entriesPerChunk; + UInt32 numEntriesInLastChunk = entryIndex - (numFullChunks * entriesPerChunk); + + UInt32 numFullBundles = numEntriesInLastChunk / entriesPerBundle; + UInt32 numEntriesInLastBundle = numEntriesInLastChunk - (numFullBundles * entriesPerBundle); + + UInt32 offset = (numFullChunks * cbFullChunk) + + cbChunkCommonCode + + (numFullBundles * cbFullBundle) + + (numEntriesInLastBundle * cbEntry); + + if (numEntriesInLastBundle >= entriesPerSubBundlePos) + offset += cbBundleCommonCode; + + return offset; +# endif + } +}; + +struct StaticGcDesc +{ + struct GCSeries + { + UInt32 m_size; + UInt32 m_startOffset; + }; + + UInt32 m_numSeries; + GCSeries m_series[1]; + + UInt32 GetSize() + { + return (UInt32)(offsetof(StaticGcDesc, m_series) + (m_numSeries * sizeof(GCSeries))); + } + +#ifdef DACCESS_COMPILE + static UInt32 DacSize(TADDR addr); +#endif +}; + +typedef SPTR(StaticGcDesc) PTR_StaticGcDesc; +typedef DPTR(StaticGcDesc::GCSeries) PTR_StaticGcDescGCSeries; + +class EEType; + +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH + +enum class DispatchCellType +{ + InterfaceAndSlot = 0x0, + MetadataToken = 0x1, + VTableOffset = 0x2, +}; + +struct DispatchCellInfo +{ + DispatchCellType CellType; + EEType *InterfaceType = nullptr; + UInt16 InterfaceSlot = 0; + UInt8 HasCache = 0; + UInt32 MetadataToken = 0; + UInt32 VTableOffset = 0; +}; + +struct InterfaceDispatchCacheHeader +{ +private: + enum Flags + { + CH_TypeAndSlotIndex = 0x0, + CH_MetadataToken = 0x1, + CH_Mask = 0x3, + CH_Shift = 0x2, + }; + +public: + void Initialize(EEType *pInterfaceType, UInt16 interfaceSlot, UInt32 metadataToken) + { + if (pInterfaceType != nullptr) + { + ASSERT(metadataToken == 0); + m_pInterfaceType = pInterfaceType; + m_slotIndexOrMetadataTokenEncoded = CH_TypeAndSlotIndex | (((UInt32)interfaceSlot) << CH_Shift); + } + else + { + ASSERT(pInterfaceType == nullptr); + ASSERT(interfaceSlot == 0); + m_pInterfaceType = nullptr; + m_slotIndexOrMetadataTokenEncoded = CH_MetadataToken | (metadataToken << CH_Shift); + } + } + + void Initialize(const DispatchCellInfo *pCellInfo) + { + ASSERT((pCellInfo->CellType == DispatchCellType::InterfaceAndSlot) || + (pCellInfo->CellType == DispatchCellType::MetadataToken)); + if (pCellInfo->CellType == DispatchCellType::InterfaceAndSlot) + { + ASSERT(pCellInfo->MetadataToken == 0); + Initialize(pCellInfo->InterfaceType, pCellInfo->InterfaceSlot, 0); + } + else + { + ASSERT(pCellInfo->CellType == DispatchCellType::MetadataToken); + ASSERT(pCellInfo->InterfaceType == nullptr); + Initialize(nullptr, 0, pCellInfo->MetadataToken); + } + } + + DispatchCellInfo GetDispatchCellInfo() + { + DispatchCellInfo cellInfo; + + if ((m_slotIndexOrMetadataTokenEncoded & CH_Mask) == CH_TypeAndSlotIndex) + { + cellInfo.InterfaceType = m_pInterfaceType; + cellInfo.InterfaceSlot = (UInt16)(m_slotIndexOrMetadataTokenEncoded >> CH_Shift); + cellInfo.CellType = DispatchCellType::InterfaceAndSlot; + } + else + { + cellInfo.MetadataToken = m_slotIndexOrMetadataTokenEncoded >> CH_Shift; + cellInfo.CellType = DispatchCellType::MetadataToken; + } + cellInfo.HasCache = 1; + return cellInfo; + } + +private: + EEType * m_pInterfaceType; // EEType of interface to dispatch on + UInt32 m_slotIndexOrMetadataTokenEncoded; +}; + +// One of these is allocated per interface call site. It holds the stub to call, data to pass to that stub +// (cache information) and the interface contract, i.e. the interface type and slot being called. +struct InterfaceDispatchCell +{ + // The first two fields must remain together and at the beginning of the structure. This is due to the + // synchronization requirements of the code that updates these at runtime and the instructions generated + // by the binder for interface call sites. + UIntTarget m_pStub; // Call this code to execute the interface dispatch + volatile UIntTarget m_pCache; // Context used by the stub above (one or both of the low two bits are set + // for initial dispatch, and if not set, using this as a cache pointer or + // as a vtable offset.) + // + // In addition, there is a Slot/Flag use of this field. DispatchCells are + // emitted as a group, and the final one in the group (identified by m_pStub + // having the null value) will have a Slot field is the low 16 bits of the + // m_pCache field, and in the second lowest 16 bits, a Flags field. For the interface + // case Flags shall be 0, and for the metadata token case, Flags shall be 1. + + // + // Keep these in sync with the managed copy in src\Common\src\Internal\Runtime\InterfaceCachePointerType.cs + // + enum Flags + { + // The low 2 bits of the m_pCache pointer are treated specially so that we can avoid the need for + // extra fields on this type. + // OR if the m_pCache value is less than 0x1000 then this it is a vtable offset and should be used as such + IDC_CachePointerIsInterfaceRelativePointer = 0x3, + IDC_CachePointerIsIndirectedInterfaceRelativePointer = 0x2, + IDC_CachePointerIsInterfacePointerOrMetadataToken = 0x1, // Metadata token is a 30 bit number in this case. + // Tokens are required to have at least one of their upper 20 bits set + // But they are not required by this part of the system to follow any specific + // token format + IDC_CachePointerPointsAtCache = 0x0, + IDC_CachePointerMask = 0x3, + IDC_CachePointerMaskShift = 0x2, + IDC_MaxVTableOffsetPlusOne = 0x1000, + }; + + DispatchCellInfo GetDispatchCellInfo() + { + // Capture m_pCache into a local for safe access (this is a volatile read of a value that may be + // modified on another thread while this function is executing.) + UIntTarget cachePointerValue = m_pCache; + DispatchCellInfo cellInfo; + + if ((cachePointerValue < IDC_MaxVTableOffsetPlusOne) && ((cachePointerValue & IDC_CachePointerMask) == IDC_CachePointerPointsAtCache)) + { + cellInfo.VTableOffset = (UInt32)cachePointerValue; + cellInfo.CellType = DispatchCellType::VTableOffset; + cellInfo.HasCache = 1; + return cellInfo; + } + + // If there is a real cache pointer, grab the data from there. + if ((cachePointerValue & IDC_CachePointerMask) == IDC_CachePointerPointsAtCache) + { + return ((InterfaceDispatchCacheHeader*)cachePointerValue)->GetDispatchCellInfo(); + } + + // Otherwise, walk to cell with Flags and Slot field + + // The slot number/flags for a dispatch cell is encoded once per run of DispatchCells + // The run is terminated by having an dispatch cell with a null stub pointer. + const InterfaceDispatchCell *currentCell = this; + while (currentCell->m_pStub != 0) + { + currentCell = currentCell + 1; + } + UIntTarget cachePointerValueFlags = currentCell->m_pCache; + + DispatchCellType cellType = (DispatchCellType)(cachePointerValueFlags >> 16); + cellInfo.CellType = cellType; + + if (cellType == DispatchCellType::InterfaceAndSlot) + { + cellInfo.InterfaceSlot = (UInt16)cachePointerValueFlags; + + switch (cachePointerValue & IDC_CachePointerMask) + { + case IDC_CachePointerIsInterfacePointerOrMetadataToken: + cellInfo.InterfaceType = (EEType*)(cachePointerValue & ~IDC_CachePointerMask); + break; + + case IDC_CachePointerIsInterfaceRelativePointer: + case IDC_CachePointerIsIndirectedInterfaceRelativePointer: + { + UIntTarget interfacePointerValue = (UIntTarget)&m_pCache + (Int32)cachePointerValue; + interfacePointerValue &= ~IDC_CachePointerMask; + if ((cachePointerValue & IDC_CachePointerMask) == IDC_CachePointerIsInterfaceRelativePointer) + { + cellInfo.InterfaceType = (EEType*)interfacePointerValue; + } + else + { + cellInfo.InterfaceType = *(EEType**)interfacePointerValue; + } + } + break; + } + } + else + { + cellInfo.MetadataToken = (UInt32)(cachePointerValue >> IDC_CachePointerMaskShift); + } + + return cellInfo; + } + + static bool IsCache(UIntTarget value) + { + if (((value & IDC_CachePointerMask) != 0) || (value < IDC_MaxVTableOffsetPlusOne)) + { + return false; + } + else + { + return true; + } + } + + InterfaceDispatchCacheHeader* GetCache() const + { + // Capture m_pCache into a local for safe access (this is a volatile read of a value that may be + // modified on another thread while this function is executing.) + UIntTarget cachePointerValue = m_pCache; + if (IsCache(cachePointerValue)) + { + return (InterfaceDispatchCacheHeader*)cachePointerValue; + } + else + { + return 0; + } + } +}; + +#endif // FEATURE_CACHED_INTERFACE_DISPATCH + +#ifdef TARGET_ARM +// Note for ARM: try and keep the flags in the low 16-bits, since they're not easy to load into a register in +// a single instruction within our stubs. +enum PInvokeTransitionFrameFlags +{ + // NOTE: Keep in sync with ndp\FxCore\CoreRT\src\Native\Runtime\arm\AsmMacros.h + + // NOTE: The order in which registers get pushed in the PInvokeTransitionFrame's m_PreservedRegs list has + // to match the order of these flags (that's also the order in which they are read in StackFrameIterator.cpp + + // standard preserved registers + PTFF_SAVE_R4 = 0x00000001, + PTFF_SAVE_R5 = 0x00000002, + PTFF_SAVE_R6 = 0x00000004, + PTFF_SAVE_R7 = 0x00000008, // should never be used, we require FP frames for methods with + // pinvoke and it is saved into the frame pointer field instead + PTFF_SAVE_R8 = 0x00000010, + PTFF_SAVE_R9 = 0x00000020, + PTFF_SAVE_R10 = 0x00000040, + PTFF_SAVE_SP = 0x00000100, // Used for 'coop pinvokes' in runtime helper routines. Methods with + // PInvokes are required to have a frame pointers, but methods which + // call runtime helpers are not. Therefore, methods that call runtime + // helpers may need SP to seed the stackwalk. + + // scratch registers + PTFF_SAVE_R0 = 0x00000200, + PTFF_SAVE_R1 = 0x00000400, + PTFF_SAVE_R2 = 0x00000800, + PTFF_SAVE_R3 = 0x00001000, + PTFF_SAVE_LR = 0x00002000, // this is useful for the case of loop hijacking where we need both + // a return address pointing into the hijacked method and that method's + // lr register, which may hold a gc pointer + + PTFF_R0_IS_GCREF = 0x00004000, // used by hijack handler to report return value of hijacked method + PTFF_R0_IS_BYREF = 0x00008000, // used by hijack handler to report return value of hijacked method + + PTFF_THREAD_ABORT = 0x00010000, // indicates that ThreadAbortException should be thrown when returning from the transition +}; +#elif defined(TARGET_ARM64) +enum PInvokeTransitionFrameFlags : UInt64 +{ + // NOTE: Keep in sync with ndp\FxCore\CoreRT\src\Native\Runtime\arm64\AsmMacros.h + + // NOTE: The order in which registers get pushed in the PInvokeTransitionFrame's m_PreservedRegs list has + // to match the order of these flags (that's also the order in which they are read in StackFrameIterator.cpp + + // standard preserved registers + PTFF_SAVE_X19 = 0x0000000000000001, + PTFF_SAVE_X20 = 0x0000000000000002, + PTFF_SAVE_X21 = 0x0000000000000004, + PTFF_SAVE_X22 = 0x0000000000000008, + PTFF_SAVE_X23 = 0x0000000000000010, + PTFF_SAVE_X24 = 0x0000000000000020, + PTFF_SAVE_X25 = 0x0000000000000040, + PTFF_SAVE_X26 = 0x0000000000000080, + PTFF_SAVE_X27 = 0x0000000000000100, + PTFF_SAVE_X28 = 0x0000000000000200, + + PTFF_SAVE_SP = 0x0000000000000400, // Used for 'coop pinvokes' in runtime helper routines. Methods with + // PInvokes are required to have a frame pointers, but methods which + // call runtime helpers are not. Therefore, methods that call runtime + // helpers may need SP to seed the stackwalk. + + // Scratch registers + PTFF_SAVE_X0 = 0x0000000000000800, + PTFF_SAVE_X1 = 0x0000000000001000, + PTFF_SAVE_X2 = 0x0000000000002000, + PTFF_SAVE_X3 = 0x0000000000004000, + PTFF_SAVE_X4 = 0x0000000000008000, + PTFF_SAVE_X5 = 0x0000000000010000, + PTFF_SAVE_X6 = 0x0000000000020000, + PTFF_SAVE_X7 = 0x0000000000040000, + PTFF_SAVE_X8 = 0x0000000000080000, + PTFF_SAVE_X9 = 0x0000000000100000, + PTFF_SAVE_X10 = 0x0000000000200000, + PTFF_SAVE_X11 = 0x0000000000400000, + PTFF_SAVE_X12 = 0x0000000000800000, + PTFF_SAVE_X13 = 0x0000000001000000, + PTFF_SAVE_X14 = 0x0000000002000000, + PTFF_SAVE_X15 = 0x0000000004000000, + PTFF_SAVE_X16 = 0x0000000008000000, + PTFF_SAVE_X17 = 0x0000000010000000, + PTFF_SAVE_X18 = 0x0000000020000000, + + PTFF_SAVE_FP = 0x0000000040000000, // should never be used, we require FP frames for methods with + // pinvoke and it is saved into the frame pointer field instead + + PTFF_SAVE_LR = 0x0000000080000000, // this is useful for the case of loop hijacking where we need both + // a return address pointing into the hijacked method and that method's + // lr register, which may hold a gc pointer + + // used by hijack handler to report return value of hijacked method + PTFF_X0_IS_GCREF = 0x0000000100000000, + PTFF_X0_IS_BYREF = 0x0000000200000000, + PTFF_X1_IS_GCREF = 0x0000000400000000, + PTFF_X1_IS_BYREF = 0x0000000800000000, + + PTFF_THREAD_ABORT = 0x0000001000000000, // indicates that ThreadAbortException should be thrown when returning from the transition +}; + +// TODO: Consider moving the PInvokeTransitionFrameFlags definition to a separate file to simplify header dependencies +#ifdef ICODEMANAGER_INCLUDED +// Verify that we can use bitwise shifts to convert from GCRefKind to PInvokeTransitionFrameFlags and back +C_ASSERT(PTFF_X0_IS_GCREF == ((UInt64)GCRK_Object << 32)); +C_ASSERT(PTFF_X0_IS_BYREF == ((UInt64)GCRK_Byref << 32)); +C_ASSERT(PTFF_X1_IS_GCREF == ((UInt64)GCRK_Scalar_Obj << 32)); +C_ASSERT(PTFF_X1_IS_BYREF == ((UInt64)GCRK_Scalar_Byref << 32)); + +inline UInt64 ReturnKindToTransitionFrameFlags(GCRefKind returnKind) +{ + if (returnKind == GCRK_Scalar) + return 0; + + return PTFF_SAVE_X0 | PTFF_SAVE_X1 | ((UInt64)returnKind << 32); +} + +inline GCRefKind TransitionFrameFlagsToReturnKind(UInt64 transFrameFlags) +{ + GCRefKind returnKind = (GCRefKind)((transFrameFlags & (PTFF_X0_IS_GCREF | PTFF_X0_IS_BYREF | PTFF_X1_IS_GCREF | PTFF_X1_IS_BYREF)) >> 32); + ASSERT((returnKind == GCRK_Scalar) || ((transFrameFlags & PTFF_SAVE_X0) && (transFrameFlags & PTFF_SAVE_X1))); + return returnKind; +} +#endif // ICODEMANAGER_INCLUDED +#else // TARGET_ARM +enum PInvokeTransitionFrameFlags +{ + // NOTE: Keep in sync with ndp\FxCore\CoreRT\src\Native\Runtime\[amd64|i386]\AsmMacros.inc + + // NOTE: The order in which registers get pushed in the PInvokeTransitionFrame's m_PreservedRegs list has + // to match the order of these flags (that's also the order in which they are read in StackFrameIterator.cpp + + // standard preserved registers + PTFF_SAVE_RBX = 0x00000001, + PTFF_SAVE_RSI = 0x00000002, + PTFF_SAVE_RDI = 0x00000004, + PTFF_SAVE_RBP = 0x00000008, // should never be used, we require RBP frames for methods with + // pinvoke and it is saved into the frame pointer field instead + PTFF_SAVE_R12 = 0x00000010, + PTFF_SAVE_R13 = 0x00000020, + PTFF_SAVE_R14 = 0x00000040, + PTFF_SAVE_R15 = 0x00000080, + + PTFF_SAVE_RSP = 0x00008000, // Used for 'coop pinvokes' in runtime helper routines. Methods with + // PInvokes are required to have a frame pointers, but methods which + // call runtime helpers are not. Therefore, methods that call runtime + // helpers may need RSP to seed the stackwalk. + // + // NOTE: despite the fact that this flag's bit is out of order, it is + // still expected to be saved here after the preserved registers and + // before the scratch registers + PTFF_SAVE_RAX = 0x00000100, + PTFF_SAVE_RCX = 0x00000200, + PTFF_SAVE_RDX = 0x00000400, + PTFF_SAVE_R8 = 0x00000800, + PTFF_SAVE_R9 = 0x00001000, + PTFF_SAVE_R10 = 0x00002000, + PTFF_SAVE_R11 = 0x00004000, + + PTFF_RAX_IS_GCREF = 0x00010000, // used by hijack handler to report return value of hijacked method + PTFF_RAX_IS_BYREF = 0x00020000, // used by hijack handler to report return value of hijacked method + + PTFF_THREAD_ABORT = 0x00040000, // indicates that ThreadAbortException should be thrown when returning from the transition +}; +#endif // TARGET_ARM + +#pragma warning(push) +#pragma warning(disable:4200) // nonstandard extension used: zero-sized array in struct/union +class Thread; +#if defined(USE_PORTABLE_HELPERS) +//the members of this structure are currently unused except m_pThread and exist only to allow compilation +//of StackFrameIterator their values are not currently being filled in and will require significant rework +//in order to satisfy the runtime requirements of StackFrameIterator +struct PInvokeTransitionFrame +{ + void* m_RIP; + Thread* m_pThread; // unused by stack crawler, this is so GetThread is only called once per method + // can be an invalid pointer in universal transition cases (which never need to call GetThread) + uint32_t m_Flags; // PInvokeTransitionFrameFlags +}; +#else // USE_PORTABLE_HELPERS +struct PInvokeTransitionFrame +{ +#ifdef TARGET_ARM + TgtPTR_Void m_ChainPointer; // R11, used by OS to walk stack quickly +#endif +#ifdef TARGET_ARM64 + // On arm64, the FP and LR registers are pushed in that order when setting up frames + TgtPTR_Void m_FramePointer; + TgtPTR_Void m_RIP; +#else + TgtPTR_Void m_RIP; + TgtPTR_Void m_FramePointer; +#endif + TgtPTR_Thread m_pThread; // unused by stack crawler, this is so GetThread is only called once per method + // can be an invalid pointer in universal transition cases (which never need to call GetThread) +#ifdef TARGET_ARM64 + UInt64 m_Flags; // PInvokeTransitionFrameFlags +#else + UInt32 m_Flags; // PInvokeTransitionFrameFlags +#endif + UIntTarget m_PreservedRegs[]; +}; +#endif // USE_PORTABLE_HELPERS +#pragma warning(pop) + +#ifdef TARGET_AMD64 +// RBX, RSI, RDI, R12, R13, R14, R15, RAX, RSP +#define PInvokeTransitionFrame_SaveRegs_count 9 +#elif defined(TARGET_X86) +// RBX, RSI, RDI, RAX, RSP +#define PInvokeTransitionFrame_SaveRegs_count 5 +#elif defined(TARGET_ARM) +// R4-R10, R0, SP +#define PInvokeTransitionFrame_SaveRegs_count 9 +#endif +#define PInvokeTransitionFrame_MAX_SIZE (sizeof(PInvokeTransitionFrame) + (POINTER_SIZE * PInvokeTransitionFrame_SaveRegs_count)) + +#ifdef TARGET_AMD64 +#define OFFSETOF__Thread__m_pTransitionFrame 0x40 +#elif defined(TARGET_ARM64) +#define OFFSETOF__Thread__m_pTransitionFrame 0x40 +#elif defined(TARGET_X86) +#define OFFSETOF__Thread__m_pTransitionFrame 0x2c +#elif defined(TARGET_ARM) +#define OFFSETOF__Thread__m_pTransitionFrame 0x2c +#endif + +typedef DPTR(EEType) PTR_EEType; +typedef DPTR(PTR_EEType) PTR_PTR_EEType; + +struct EETypeRef +{ + union + { + EEType * pEEType; + EEType ** ppEEType; + UInt8 * rawPtr; + UIntTarget rawTargetPtr; // x86_amd64: keeps union big enough for target-platform pointer + }; + + static const size_t DOUBLE_INDIR_FLAG = 1; + + PTR_EEType GetValue() + { + if (dac_cast(rawTargetPtr) & DOUBLE_INDIR_FLAG) + return *dac_cast(rawTargetPtr - DOUBLE_INDIR_FLAG); + else + return dac_cast(rawTargetPtr); + } +}; + +// Blobs are opaque data passed from the compiler, through the binder and into the native image. At runtime we +// provide a simple API to retrieve these blobs (they're keyed by a simple integer ID). Blobs are passed to +// the binder from the compiler and stored in native images by the binder in a sequential stream, each blob +// having the following header. +struct BlobHeader +{ + UInt32 m_flags; // Flags describing the blob (used by the binder only at the moment) + UInt32 m_id; // Unique identifier of the blob (used to access the blob at runtime) + // also used by BlobTypeFieldPreInit to identify (at bind time) which field to pre-init. + UInt32 m_size; // Size of the individual blob excluding this header (DWORD aligned) +}; + +// Structure used in the runtime initialization of deferred static class constructors. Deferred here means +// executed during normal code execution just prior to a static field on the type being accessed (as opposed +// to eager cctors, which are run at module load time). This is the fixed portion of the context structure, +// class libraries can add their own fields to the end. +struct StaticClassConstructionContext +{ + // Pointer to the code for the static class constructor method. This is initialized by the + // binder/runtime. + TgtPTR_Void m_cctorMethodAddress; + + // Initialization state of the class. This is initialized to 0. Every time managed code checks the + // cctor state the runtime will call the classlibrary's CheckStaticClassConstruction with this context + // structure unless initialized == 1. This check is specific to allow the classlibrary to store more + // than a binary state for each cctor if it so desires. + Int32 m_initialized; +}; + +#ifdef FEATURE_CUSTOM_IMPORTS +struct CustomImportDescriptor +{ + UInt32 RvaEATAddr; // RVA of the indirection cell of the address of the EAT for that module + UInt32 RvaIAT; // RVA of IAT array for that module + UInt32 CountIAT; // Count of entries in the above array +}; +#endif // FEATURE_CUSTOM_IMPORTS + +enum RhEHClauseKind +{ + RH_EH_CLAUSE_TYPED = 0, + RH_EH_CLAUSE_FAULT = 1, + RH_EH_CLAUSE_FILTER = 2, + RH_EH_CLAUSE_UNUSED = 3 +}; + +#define RH_EH_CLAUSE_TYPED_INDIRECT RH_EH_CLAUSE_UNUSED + +// mapping of cold code blocks to the corresponding hot entry point RVA +// format is a as follows: +// ------------------- +// | subSectionCount | # of subsections, where each subsection has a run of hot bodies +// ------------------- followed by a run of cold bodies +// | hotMethodCount | # of hot bodies in subsection +// | coldMethodCount | # of cold bodies in subsection +// ------------------- +// ... possibly repeated on ARM +// ------------------- +// | hotRVA #1 | RVA of the hot entry point corresponding to the 1st cold body +// | hotRVA #2 | RVA of the hot entry point corresponding to the 2nd cold body +// ... one entry for each cold body containing the corresponding hot entry point + +// number of hot and cold bodies in a subsection of code +// in x86 and x64 there's only one subsection, on ARM there may be several +// for large modules with > 16 MB of code +struct SubSectionDesc +{ + UInt32 hotMethodCount; + UInt32 coldMethodCount; +}; + +// this is the structure describing the cold to hot mapping info +struct ColdToHotMapping +{ + UInt32 subSectionCount; + SubSectionDesc subSection[/*subSectionCount*/1]; + // UINT32 hotRVAofColdMethod[/*coldMethodCount*/]; +}; diff --git a/src/coreclr/src/nativeaot/Runtime/inc/stressLog.h b/src/coreclr/src/nativeaot/Runtime/inc/stressLog.h new file mode 100644 index 0000000000000..f657a8de3893a --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/inc/stressLog.h @@ -0,0 +1,832 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// --------------------------------------------------------------------------- +// StressLog.h +// +// StressLog infrastructure +// +// The StressLog is a binary, memory based circular queue of logging messages. +// It is intended to be used in retail builds during stress runs (activated +// by registry key), to help find bugs that only turn up during stress runs. +// +// Differently from the desktop implementation the RH implementation of the +// stress log will log all facilities, and only filter on logging level. +// +// The log has a very simple structure, and is meant to be dumped from an NTSD +// extention (eg. strike). +// +// debug\rhsos\stresslogdump.cpp contains the dumper utility that parses this +// log. +// --------------------------------------------------------------------------- + +#ifndef StressLog_h +#define StressLog_h 1 + +#define SUPPRESS_WARNING_4127 \ + __pragma(warning(push)) \ + __pragma(warning(disable:4127)) /* conditional expression is constant*/ + +#define POP_WARNING_STATE \ + __pragma(warning(pop)) + +#define WHILE_0 \ + SUPPRESS_WARNING_4127 \ + while(0) \ + POP_WARNING_STATE \ + + +// let's keep STRESS_LOG defined always... +#if !defined(STRESS_LOG) && !defined(NO_STRESS_LOG) +#define STRESS_LOG +#endif + +#if defined(STRESS_LOG) + +// +// Logging levels and facilities +// +#define DEFINE_LOG_FACILITY(logname, value) logname = value, + +enum LogFacilitiesEnum: unsigned int { +#include "loglf.h" + LF_ALWAYS = 0x80000000u, // Log message irrepespective of LogFacility (if the level matches) + LF_ALL = 0xFFFFFFFFu, // Used only to mask bits. Never use as LOG((LF_ALL, ...)) +}; + + +#define LL_EVERYTHING 10 +#define LL_INFO1000000 9 // can be expected to generate 1,000,000 logs per small but not trival run +#define LL_INFO100000 8 // can be expected to generate 100,000 logs per small but not trival run +#define LL_INFO10000 7 // can be expected to generate 10,000 logs per small but not trival run +#define LL_INFO1000 6 // can be expected to generate 1,000 logs per small but not trival run +#define LL_INFO100 5 // can be expected to generate 100 logs per small but not trival run +#define LL_INFO10 4 // can be expected to generate 10 logs per small but not trival run +#define LL_WARNING 3 +#define LL_ERROR 2 +#define LL_FATALERROR 1 +#define LL_ALWAYS 0 // impossible to turn off (log level never negative) + +// +// +// + +#ifndef _ASSERTE +#define _ASSERTE(expr) +#endif + + +#ifndef DACCESS_COMPILE + + +//========================================================================================== +// The STRESS_LOG* macros +// +// The STRESS_LOG* macros work like printf. In fact the use printf in their implementation +// so all printf format specifications work. In addition the Stress log dumper knows +// about certain suffixes for the %p format specification (normally used to print a pointer) +// +// %pM // The pointer is a MethodInfo -- not supported yet (use %pK instead) +// %pT // The pointer is a type (EEType) +// %pV // The pointer is a C++ Vtable pointer +// %pK // The pointer is a code address (used for call stacks or method names) +// + +// STRESS_LOG_VA was added to allow sending GC trace output to the stress log. msg must be enclosed +// in ()'s and contain a format string followed by 0 - 4 arguments. The arguments must be numbers or +// string literals. LogMsgOL is overloaded so that all of the possible sets of parameters are covered. +// This was done becasue GC Trace uses dprintf which dosen't contain info on how many arguments are +// getting passed in and using va_args would require parsing the format string during the GC +// + +#define STRESS_LOG_VA(msg) do { \ + if (StressLog::StressLogOn(LF_GC, LL_ALWAYS)) \ + StressLog::LogMsgOL msg; \ + } WHILE_0 + +#define STRESS_LOG0(facility, level, msg) do { \ + if (StressLog::StressLogOn(facility, level)) \ + StressLog::LogMsg(facility, 0, msg); \ + } WHILE_0 \ + +#define STRESS_LOG1(facility, level, msg, data1) do { \ + if (StressLog::StressLogOn(facility, level)) \ + StressLog::LogMsg(facility, 1, msg, (void*)(size_t)(data1)); \ + } WHILE_0 + +#define STRESS_LOG2(facility, level, msg, data1, data2) do { \ + if (StressLog::StressLogOn(facility, level)) \ + StressLog::LogMsg(facility, 2, msg, \ + (void*)(size_t)(data1), (void*)(size_t)(data2)); \ + } WHILE_0 + +#define STRESS_LOG3(facility, level, msg, data1, data2, data3) do { \ + if (StressLog::StressLogOn(facility, level)) \ + StressLog::LogMsg(facility, 3, msg, \ + (void*)(size_t)(data1),(void*)(size_t)(data2),(void*)(size_t)(data3)); \ + } WHILE_0 + +#define STRESS_LOG4(facility, level, msg, data1, data2, data3, data4) do { \ + if (StressLog::StressLogOn(facility, level)) \ + StressLog::LogMsg(facility, 4, msg, (void*)(size_t)(data1), \ + (void*)(size_t)(data2),(void*)(size_t)(data3),(void*)(size_t)(data4)); \ + } WHILE_0 + +#define STRESS_LOG5(facility, level, msg, data1, data2, data3, data4, data5) do { \ + if (StressLog::StressLogOn(facility, level)) \ + StressLog::LogMsg(facility, 5, msg, (void*)(size_t)(data1), \ + (void*)(size_t)(data2),(void*)(size_t)(data3),(void*)(size_t)(data4), \ + (void*)(size_t)(data5)); \ + } WHILE_0 + +#define STRESS_LOG6(facility, level, msg, data1, data2, data3, data4, data5, data6) do { \ + if (StressLog::StressLogOn(facility, level)) \ + StressLog::LogMsg(facility, 6, msg, (void*)(size_t)(data1), \ + (void*)(size_t)(data2),(void*)(size_t)(data3),(void*)(size_t)(data4), \ + (void*)(size_t)(data5), (void*)(size_t)(data6)); \ + } WHILE_0 + +#define STRESS_LOG7(facility, level, msg, data1, data2, data3, data4, data5, data6, data7) do { \ + if (StressLog::StressLogOn(facility, level)) \ + StressLog::LogMsg(facility, 7, msg, (void*)(size_t)(data1), \ + (void*)(size_t)(data2),(void*)(size_t)(data3),(void*)(size_t)(data4), \ + (void*)(size_t)(data5), (void*)(size_t)(data6), (void*)(size_t)(data7)); \ + } WHILE_0 + +#define STRESS_LOG_COND0(facility, level, msg) do { \ + if (StressLog::StressLogOn(facility, level) && (cond)) \ + StressLog::LogMsg(facility, 0, msg); \ + } WHILE_0 + +#define STRESS_LOG_COND1(facility, level, cond, msg, data1) do { \ + if (StressLog::StressLogOn(facility, level) && (cond)) \ + StressLog::LogMsg(facility, 1, msg, (void*)(size_t)(data1)); \ + } WHILE_0 + +#define STRESS_LOG_COND2(facility, level, cond, msg, data1, data2) do { \ + if (StressLog::StressLogOn(facility, level) && (cond)) \ + StressLog::LogMsg(facility, 2, msg, \ + (void*)(size_t)(data1), (void*)(size_t)(data2)); \ + } WHILE_0 + +#define STRESS_LOG_COND3(facility, level, cond, msg, data1, data2, data3) do { \ + if (StressLog::StressLogOn(facility, level) && (cond)) \ + StressLog::LogMsg(facility, 3, msg, \ + (void*)(size_t)(data1),(void*)(size_t)(data2),(void*)(size_t)(data3)); \ + } WHILE_0 + +#define STRESS_LOG_COND4(facility, level, cond, msg, data1, data2, data3, data4) do { \ + if (StressLog::StressLogOn(facility, level) && (cond)) \ + StressLog::LogMsg(facility, 4, msg, (void*)(size_t)(data1), \ + (void*)(size_t)(data2),(void*)(size_t)(data3),(void*)(size_t)(data4)); \ + } WHILE_0 + +#define STRESS_LOG_COND5(facility, level, cond, msg, data1, data2, data3, data4, data5) do { \ + if (StressLog::StressLogOn(facility, level) && (cond)) \ + StressLog::LogMsg(facility, 5, msg, (void*)(size_t)(data1), \ + (void*)(size_t)(data2),(void*)(size_t)(data3),(void*)(size_t)(data4), \ + (void*)(size_t)(data5)); \ + } WHILE_0 + +#define STRESS_LOG_COND6(facility, level, cond, msg, data1, data2, data3, data4, data5, data6) do { \ + if (StressLog::StressLogOn(facility, level) && (cond)) \ + StressLog::LogMsg(facility, 6, msg, (void*)(size_t)(data1), \ + (void*)(size_t)(data2),(void*)(size_t)(data3),(void*)(size_t)(data4), \ + (void*)(size_t)(data5), (void*)(size_t)(data6)); \ + } WHILE_0 + +#define STRESS_LOG_COND7(facility, level, cond, msg, data1, data2, data3, data4, data5, data6, data7) do { \ + if (StressLog::StressLogOn(facility, level) && (cond)) \ + StressLog::LogMsg(facility, 7, msg, (void*)(size_t)(data1), \ + (void*)(size_t)(data2),(void*)(size_t)(data3),(void*)(size_t)(data4), \ + (void*)(size_t)(data5), (void*)(size_t)(data6), (void*)(size_t)(data7)); \ + } WHILE_0 + +#define STRESS_LOG_RESERVE_MEM(numChunks) do { \ + if (StressLog::StressLogOn(LF_ALL, LL_ALWAYS)) \ + {StressLog::ReserveStressLogChunks (numChunks);} \ + } WHILE_0 + +// !!! WARNING !!! +// !!! DO NOT ADD STRESS_LOG8, as the stress log infrastructure supports a maximum of 7 arguments +// !!! WARNING !!! + +#define STRESS_LOG_PLUG_MOVE(plug_start, plug_end, plug_delta) do { \ + if (StressLog::StressLogOn(LF_GC, LL_INFO1000)) \ + StressLog::LogMsg(LF_GC, 3, ThreadStressLog::gcPlugMoveMsg(), \ + (void*)(size_t)(plug_start), (void*)(size_t)(plug_end), (void*)(size_t)(plug_delta)); \ + } WHILE_0 + +#define STRESS_LOG_ROOT_PROMOTE(root_addr, objPtr, methodTable) do { \ + if (StressLog::StressLogOn(LF_GC|LF_GCROOTS, LL_INFO1000)) \ + StressLog::LogMsg(LF_GC|LF_GCROOTS, 3, ThreadStressLog::gcRootPromoteMsg(), \ + (void*)(size_t)(root_addr), (void*)(size_t)(objPtr), (void*)(size_t)(methodTable)); \ + } WHILE_0 + +#define STRESS_LOG_ROOT_RELOCATE(root_addr, old_value, new_value, methodTable) do { \ + if (StressLog::StressLogOn(LF_GC|LF_GCROOTS, LL_INFO1000) && ((size_t)(old_value) != (size_t)(new_value))) \ + StressLog::LogMsg(LF_GC|LF_GCROOTS, 4, ThreadStressLog::gcRootMsg(), \ + (void*)(size_t)(root_addr), (void*)(size_t)(old_value), \ + (void*)(size_t)(new_value), (void*)(size_t)(methodTable)); \ + } WHILE_0 + +#define STRESS_LOG_GC_START(gcCount, Gen, collectClasses) do { \ + if (StressLog::StressLogOn(LF_GCROOTS|LF_GC|LF_GCALLOC, LL_INFO10)) \ + StressLog::LogMsg(LF_GCROOTS|LF_GC|LF_GCALLOC, 3, ThreadStressLog::gcStartMsg(), \ + (void*)(size_t)(gcCount), (void*)(size_t)(Gen), (void*)(size_t)(collectClasses)); \ + } WHILE_0 + +#define STRESS_LOG_GC_END(gcCount, Gen, collectClasses) do { \ + if (StressLog::StressLogOn(LF_GCROOTS|LF_GC|LF_GCALLOC, LL_INFO10)) \ + StressLog::LogMsg(LF_GCROOTS|LF_GC|LF_GCALLOC, 3, ThreadStressLog::gcEndMsg(),\ + (void*)(size_t)(gcCount), (void*)(size_t)(Gen), (void*)(size_t)(collectClasses), 0);\ + } WHILE_0 + +#if defined(_DEBUG) +#define MAX_CALL_STACK_TRACE 20 +#define STRESS_LOG_OOM_STACK(size) do { \ + if (StressLog::StressLogOn(LF_ALWAYS, LL_ALWAYS)) \ + { \ + StressLog::LogMsgOL("OOM on alloc of size %x \n", (void*)(size_t)(size)); \ + StressLog::LogCallStack ("OOM"); \ + } \ + } WHILE_0 +#define STRESS_LOG_GC_STACK do { \ + if (StressLog::StressLogOn(LF_GC |LF_GCINFO, LL_ALWAYS)) \ + { \ + StressLog::LogMsgOL("GC is triggered \n"); \ + StressLog::LogCallStack ("GC"); \ + } \ + } WHILE_0 +#else //_DEBUG +#define STRESS_LOG_OOM_STACK(size) +#define STRESS_LOG_GC_STACK +#endif //_DEBUG + +#endif // DACCESS_COMPILE + +// +// forward declarations: +// +class CrstStatic; +class Thread; +typedef DPTR(Thread) PTR_Thread; +class StressLog; +typedef DPTR(StressLog) PTR_StressLog; +class ThreadStressLog; +typedef DPTR(ThreadStressLog) PTR_ThreadStressLog; +struct StressLogChunk; +typedef DPTR(StressLogChunk) PTR_StressLogChunk; +struct DacpStressLogEnumCBArgs; + + +//========================================================================================== +// StressLog - per-thread circular queue of stresslog messages +// +class StressLog { +public: +// private: + unsigned facilitiesToLog; // Bitvector of facilities to log (see loglf.h) + unsigned levelToLog; // log level + unsigned MaxSizePerThread; // maximum number of bytes each thread should have before wrapping + unsigned MaxSizeTotal; // maximum memory allowed for stress log + Int32 totalChunk; // current number of total chunks allocated + PTR_ThreadStressLog logs; // the list of logs for every thread. + Int32 deadCount; // count of dead threads in the log + CrstStatic *pLock; // lock + unsigned __int64 tickFrequency; // number of ticks per second + unsigned __int64 startTimeStamp; // start time from when tick counter started + FILETIME startTime; // time the application started + size_t moduleOffset; // Used to compute format strings. + +#ifndef DACCESS_COMPILE +public: + static void Initialize(unsigned facilities, unsigned level, unsigned maxBytesPerThread, + unsigned maxBytesTotal, HANDLE hMod); + // Called at DllMain THREAD_DETACH to recycle thread's logs + static void ThreadDetach(ThreadStressLog *msgs); + static long NewChunk () { return PalInterlockedIncrement (&theLog.totalChunk); } + static long ChunkDeleted () { return PalInterlockedDecrement (&theLog.totalChunk); } + + //the result is not 100% accurate. If multiple threads call this funciton at the same time, + //we could allow the total size be bigger than required. But the memory won't grow forever + //and this is not critical so we don't try to fix the race + static bool AllowNewChunk (long numChunksInCurThread); + + //preallocate Stress log chunks for current thread. The memory we could preallocate is still + //bounded by per thread size limit and total size limit. If chunksToReserve is 0, we will try to + //preallocate up to per thread size limit + static bool ReserveStressLogChunks (unsigned int chunksToReserve); + +// private: + static ThreadStressLog* CreateThreadStressLog(Thread * pThread); + static ThreadStressLog* CreateThreadStressLogHelper(Thread * pThread); + +#else // DACCESS_COMPILE +public: + bool Initialize(); + + // Can't refer to the types in sospriv.h because it drags in windows.h + void EnumerateStressMsgs(/*STRESSMSGCALLBACK*/ void* smcb, /*ENDTHREADLOGCALLBACK*/ void* etcb, + void *token); + void EnumStressLogMemRanges(/*STRESSLOGMEMRANGECALLBACK*/ void* slmrcb, void *token); + + // Called while dumping logs after operations are completed, to ensure DAC-caches + // allow the stress logs to be dumped again + void ResetForRead(); + + ThreadStressLog* FindLatestThreadLog() const; + + friend class ClrDataAccess; + +#endif // DACCESS_COMPILE + +#ifndef DACCESS_COMPILE +public: + FORCEINLINE static bool StressLogOn(unsigned /*facility*/, unsigned level) + { + #if defined(DACCESS_COMPILE) + UNREFERENCED_PARAMETER(level); + return FALSE; + #else + // In Redhawk we have rationalized facility codes and have much + // fewer compared to desktop, as such we'll log all facilities and + // limit the filtering to the log level... + return + // (theLog.facilitiesToLog & facility) + // && + (level <= theLog.levelToLog); + #endif + } + + static void LogMsg(unsigned facility, int cArgs, const char* format, ... ); + + // Support functions for STRESS_LOG_VA + // We disable the warning "conversion from 'type' to 'type' of greater size" since everything will + // end up on the stack, and LogMsg will know the size of the variable based on the format string. + #ifdef _MSC_VER + #pragma warning( push ) + #pragma warning( disable : 4312 ) + #endif + static void LogMsgOL(const char* format) + { LogMsg(LF_GC, 0, format); } + + template < typename T1 > + static void LogMsgOL(const char* format, T1 data1) + { + C_ASSERT(sizeof(T1) <= sizeof(void*)); + LogMsg(LF_GC, 1, format, (void*)(size_t)data1); + } + + template < typename T1, typename T2 > + static void LogMsgOL(const char* format, T1 data1, T2 data2) + { + C_ASSERT(sizeof(T1) <= sizeof(void*) && sizeof(T2) <= sizeof(void*)); + LogMsg(LF_GC, 2, format, (void*)(size_t)data1, (void*)(size_t)data2); + } + + template < typename T1, typename T2, typename T3 > + static void LogMsgOL(const char* format, T1 data1, T2 data2, T3 data3) + { + C_ASSERT(sizeof(T1) <= sizeof(void*) && sizeof(T2) <= sizeof(void*) && sizeof(T3) <= sizeof(void*)); + LogMsg(LF_GC, 3, format, (void*)(size_t)data1, (void*)(size_t)data2, (void*)(size_t)data3); + } + + template < typename T1, typename T2, typename T3, typename T4 > + static void LogMsgOL(const char* format, T1 data1, T2 data2, T3 data3, T4 data4) + { + C_ASSERT(sizeof(T1) <= sizeof(void*) && sizeof(T2) <= sizeof(void*) && sizeof(T3) <= sizeof(void*) && sizeof(T4) <= sizeof(void*)); + LogMsg(LF_GC, 4, format, (void*)(size_t)data1, (void*)(size_t)data2, (void*)(size_t)data3, (void*)(size_t)data4); + } + + template < typename T1, typename T2, typename T3, typename T4, typename T5 > + static void LogMsgOL(const char* format, T1 data1, T2 data2, T3 data3, T4 data4, T5 data5) + { + C_ASSERT(sizeof(T1) <= sizeof(void*) && sizeof(T2) <= sizeof(void*) && sizeof(T3) <= sizeof(void*) && sizeof(T4) <= sizeof(void*) && sizeof(T5) <= sizeof(void*)); + LogMsg(LF_GC, 5, format, (void*)(size_t)data1, (void*)(size_t)data2, (void*)(size_t)data3, (void*)(size_t)data4, (void*)(size_t)data5); + } + + template < typename T1, typename T2, typename T3, typename T4, typename T5, typename T6 > + static void LogMsgOL(const char* format, T1 data1, T2 data2, T3 data3, T4 data4, T5 data5, T6 data6) + { + C_ASSERT(sizeof(T1) <= sizeof(void*) && sizeof(T2) <= sizeof(void*) && sizeof(T3) <= sizeof(void*) && sizeof(T4) <= sizeof(void*) && sizeof(T5) <= sizeof(void*) && sizeof(T6) <= sizeof(void*)); + LogMsg(LF_GC, 6, format, (void*)(size_t)data1, (void*)(size_t)data2, (void*)(size_t)data3, (void*)(size_t)data4, (void*)(size_t)data5, (void*)(size_t)data6); + } + + template < typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7 > + static void LogMsgOL(const char* format, T1 data1, T2 data2, T3 data3, T4 data4, T5 data5, T6 data6, T7 data7) + { + C_ASSERT(sizeof(T1) <= sizeof(void*) && sizeof(T2) <= sizeof(void*) && sizeof(T3) <= sizeof(void*) && sizeof(T4) <= sizeof(void*) && sizeof(T5) <= sizeof(void*) && sizeof(T6) <= sizeof(void*) && sizeof(T7) <= sizeof(void*)); + LogMsg(LF_GC, 7, format, (void*)(size_t)data1, (void*)(size_t)data2, (void*)(size_t)data3, (void*)(size_t)data4, (void*)(size_t)data5, (void*)(size_t)data6, (void*)(size_t)data7); + } + + #ifdef _MSC_VER + #pragma warning( pop ) + #endif + +// We can only log the stacktrace on DEBUG builds! +#ifdef _DEBUG + static void LogCallStack(const char *const callTag); +#endif //_DEBUG + +#endif // DACCESS_COMPILE + +// private: // static variables + static StressLog theLog; // We only have one log, and this is it +}; + + +//========================================================================================== +// Private classes +// + +#if defined(_MSC_VER) +// don't warn about 0 sized array below or unnamed structures +#pragma warning(disable:4200 4201) +#endif + +//========================================================================================== +// StressMsg +// +// The order of fields is important. Keep the prefix length as the first field. +// And make sure the timeStamp field is naturally aligned, so we don't waste +// space on 32-bit platforms +// +struct StressMsg { + union { + struct { + UInt32 numberOfArgs : 3; // at most 7 arguments + UInt32 formatOffset : 29; // offset of string in mscorwks + }; + UInt32 fmtOffsCArgs; // for optimized access + }; + UInt32 facility; // facility used to log the entry + unsigned __int64 timeStamp; // time when mssg was logged + void* args[0]; // size given by numberOfArgs + + static const size_t maxArgCnt = 7; + static const size_t maxOffset = 0x20000000; + static size_t maxMsgSize () + { return sizeof(StressMsg) + maxArgCnt*sizeof(void*); } + + friend class ThreadStressLog; + friend class StressLog; +}; + +#ifdef _WIN64 +#define STRESSLOG_CHUNK_SIZE (32 * 1024) +#else //_WIN64 +#define STRESSLOG_CHUNK_SIZE (16 * 1024) +#endif //_WIN64 +#define GC_STRESSLOG_MULTIPLY (5) + +//========================================================================================== +// StressLogChunk +// +// A chunk of contiguous memory containing instances of StressMsg +// +struct StressLogChunk +{ + PTR_StressLogChunk prev; + PTR_StressLogChunk next; + char buf[STRESSLOG_CHUNK_SIZE]; + UInt32 dwSig1; + UInt32 dwSig2; + +#ifndef DACCESS_COMPILE + + StressLogChunk (PTR_StressLogChunk p = NULL, PTR_StressLogChunk n = NULL) + :prev (p), next (n), dwSig1 (0xCFCFCFCF), dwSig2 (0xCFCFCFCF) + {} + +#endif //!DACCESS_COMPILE + + char * StartPtr () + { + return buf; + } + + char * EndPtr () + { + return buf + STRESSLOG_CHUNK_SIZE; + } + + bool IsValid () const + { + return dwSig1 == 0xCFCFCFCF && dwSig2 == 0xCFCFCFCF; + } +}; + +//========================================================================================== +// ThreadStressLog +// +// This class implements a circular stack of variable sized elements +// .The buffer between startPtr-endPtr is used in a circular manner +// to store instances of the variable-sized struct StressMsg. +// The StressMsg are always aligned to endPtr, while the space +// left between startPtr and the last element is 0-padded. +// .curPtr points to the most recently written log message +// .readPtr points to the next log message to be dumped +// .hasWrapped is TRUE while dumping the log, if we had wrapped +// past the endPtr marker, back to startPtr +// The AdvanceRead/AdvanceWrite operations simply update the +// readPtr / curPtr fields. thecaller is responsible for reading/writing +// to the corresponding field +class ThreadStressLog { + PTR_ThreadStressLog next; // we keep a linked list of these + uint64_t threadId; // the id for the thread using this buffer + bool isDead; // Is this thread dead + bool readHasWrapped; // set when read ptr has passed chunListTail + bool writeHasWrapped; // set when write ptr has passed chunListHead + StressMsg* curPtr; // where packets are being put on the queue + StressMsg* readPtr; // where we are reading off the queue (used during dumping) + PTR_StressLogChunk chunkListHead; //head of a list of stress log chunks + PTR_StressLogChunk chunkListTail; //tail of a list of stress log chunks + PTR_StressLogChunk curReadChunk; //the stress log chunk we are currently reading + PTR_StressLogChunk curWriteChunk; //the stress log chunk we are currently writing + long chunkListLength; // how many stress log chunks are in this stress log + PTR_Thread pThread; // thread associated with these stress logs + StressMsg * origCurPtr; // this holds the original curPtr before we start the dump + + friend class StressLog; + +#ifndef DACCESS_COMPILE +public: + inline ThreadStressLog (); + inline ~ThreadStressLog (); + + void LogMsg ( UInt32 facility, int cArgs, const char* format, ... ) + { + va_list Args; + va_start(Args, format); + LogMsg (facility, cArgs, format, Args); + } + + void LogMsg ( UInt32 facility, int cArgs, const char* format, va_list Args); + +private: + FORCEINLINE StressMsg* AdvanceWrite(int cArgs); + inline StressMsg* AdvWritePastBoundary(int cArgs); + FORCEINLINE bool GrowChunkList (); + +#else // DACCESS_COMPILE +public: + friend class ClrDataAccess; + + // Called while dumping. Returns true after all messages in log were dumped + FORCEINLINE bool CompletedDump (); + +private: + FORCEINLINE bool IsReadyForRead() { return readPtr != NULL; } + FORCEINLINE StressMsg* AdvanceRead(); + inline StressMsg* AdvReadPastBoundary(); +#endif //!DACCESS_COMPILE + +public: + void Activate (Thread * pThread); + + bool IsValid () const + { + return chunkListHead != NULL && (!curWriteChunk || curWriteChunk->IsValid ()); + } + + static const char* gcStartMsg() + { + return "{ =========== BEGINGC %d, (requested generation = %lu, collect_classes = %lu) ==========\n"; + } + + static const char* gcEndMsg() + { + return "========== ENDGC %d (gen = %lu, collect_classes = %lu) ===========}\n"; + } + + static const char* gcRootMsg() + { + return " GC Root %p RELOCATED %p -> %p MT = %pT\n"; + } + + static const char* gcRootPromoteMsg() + { + return " GCHeap::Promote: Promote GC Root *%p = %p MT = %pT\n"; + } + + static const char* gcPlugMoveMsg() + { + return "GC_HEAP RELOCATING Objects in heap within range [%p %p) by -0x%x bytes\n"; + } + +}; + + +//========================================================================================== +// Inline implementations: +// + +#ifdef DACCESS_COMPILE + +//------------------------------------------------------------------------------------------ +// Called while dumping. Returns true after all messages in log were dumped +FORCEINLINE bool ThreadStressLog::CompletedDump () +{ + return readPtr->timeStamp == 0 + //if read has passed end of list but write has not passed head of list yet, we are done + //if write has also wrapped, we are at the end if read pointer passed write pointer + || (readHasWrapped && + (!writeHasWrapped || (curReadChunk == curWriteChunk && readPtr >= curPtr))); +} + +//------------------------------------------------------------------------------------------ +// Called when dumping the log (by StressLog::Dump()) +// Updates readPtr to point to next stress messaage to be dumped +inline StressMsg* ThreadStressLog::AdvanceRead() { + // advance the marker + readPtr = (StressMsg*)((char*)readPtr + sizeof(StressMsg) + readPtr->numberOfArgs*sizeof(void*)); + // wrap around if we need to + if (readPtr >= (StressMsg *)curReadChunk->EndPtr ()) + { + AdvReadPastBoundary(); + } + return readPtr; +} + +//------------------------------------------------------------------------------------------ +// The factored-out slow codepath for AdvanceRead(), only called by AdvanceRead(). +// Updates readPtr to and returns the first stress message >= startPtr +inline StressMsg* ThreadStressLog::AdvReadPastBoundary() { + //if we pass boundary of tail list, we need to set has Wrapped + if (curReadChunk == chunkListTail) + { + readHasWrapped = true; + //If write has not wrapped, we know the contents from list head to + //cur pointer is garbage, we don't need to read them + if (!writeHasWrapped) + { + return readPtr; + } + } + curReadChunk = curReadChunk->next; + void** p = (void**)curReadChunk->StartPtr(); + while (*p == NULL && (size_t)(p-(void**)curReadChunk->StartPtr ()) < (StressMsg::maxMsgSize()/sizeof(void*))) + { + ++p; + } + // if we failed to find a valid start of a StressMsg fallback to startPtr (since timeStamp==0) + if (*p == NULL) + { + p = (void**) curReadChunk->StartPtr (); + } + readPtr = (StressMsg*)p; + + return readPtr; +} + +#else // DACCESS_COMPILE + +//------------------------------------------------------------------------------------------ +// Initialize a ThreadStressLog +inline ThreadStressLog::ThreadStressLog() +{ + chunkListHead = chunkListTail = curWriteChunk = NULL; + StressLogChunk * newChunk = new (nothrow) StressLogChunk; + //OOM or in cantalloc region + if (newChunk == NULL) + { + return; + } + StressLog::NewChunk (); + + newChunk->prev = newChunk; + newChunk->next = newChunk; + + chunkListHead = chunkListTail = newChunk; + + next = NULL; + isDead = TRUE; + curPtr = NULL; + readPtr = NULL; + writeHasWrapped = FALSE; + curReadChunk = NULL; + curWriteChunk = NULL; + chunkListLength = 1; + origCurPtr = NULL; +} + +inline ThreadStressLog::~ThreadStressLog () +{ + //no thing to do if the list is empty (failed to initialize) + if (chunkListHead == NULL) + { + return; + } + + StressLogChunk * chunk = chunkListHead; + + do + { + StressLogChunk * tmp = chunk; + chunk = chunk->next; + delete tmp; + StressLog::ChunkDeleted (); + } while (chunk != chunkListHead); +} + +//------------------------------------------------------------------------------------------ +// Called when logging, checks if we can increase the number of stress log chunks associated +// with the current thread +FORCEINLINE bool ThreadStressLog::GrowChunkList () +{ + _ASSERTE (chunkListLength >= 1); + if (!StressLog::AllowNewChunk (chunkListLength)) + { + return FALSE; + } + StressLogChunk * newChunk = new (nothrow) StressLogChunk (chunkListTail, chunkListHead); + if (newChunk == NULL) + { + return FALSE; + } + StressLog::NewChunk (); + chunkListLength++; + chunkListHead->prev = newChunk; + chunkListTail->next = newChunk; + chunkListHead = newChunk; + + return TRUE; +} + +//------------------------------------------------------------------------------------------ +// Called at runtime when writing the log (by StressLog::LogMsg()) +// Updates curPtr to point to the next spot in the log where we can write +// a stress message with cArgs arguments +// For convenience it returns a pointer to the empty slot where we can +// write the next stress message. +// cArgs is the number of arguments in the message to be written. +inline StressMsg* ThreadStressLog::AdvanceWrite(int cArgs) { + // _ASSERTE(cArgs <= StressMsg::maxArgCnt); + // advance the marker + StressMsg* p = (StressMsg*)((char*)curPtr - sizeof(StressMsg) - cArgs*sizeof(void*)); + + //past start of current chunk + //wrap around if we need to + if (p < (StressMsg*)curWriteChunk->StartPtr ()) + { + curPtr = AdvWritePastBoundary(cArgs); + } + else + { + curPtr = p; + } + + return curPtr; +} + +//------------------------------------------------------------------------------------------ +// This is the factored-out slow codepath for AdvanceWrite() and is only called by +// AdvanceWrite(). +// Returns the stress message flushed against endPtr +// In addition it writes NULLs b/w the startPtr and curPtr +inline StressMsg* ThreadStressLog::AdvWritePastBoundary(int cArgs) { + //zeroed out remaining buffer + memset (curWriteChunk->StartPtr (), 0, (char *)curPtr - (char *)curWriteChunk->StartPtr ()); + + //if we are already at head of the list, try to grow the list + if (curWriteChunk == chunkListHead) + { + GrowChunkList (); + } + + curWriteChunk = curWriteChunk->prev; + if (curWriteChunk == chunkListTail) + { + writeHasWrapped = TRUE; + } + curPtr = (StressMsg*)((char*)curWriteChunk->EndPtr () - sizeof(StressMsg) - cArgs * sizeof(void*)); + return curPtr; +} + +#endif // DACCESS_COMPILE + +#endif // STRESS_LOG + +#ifndef __GCENV_BASE_INCLUDED__ +#if !defined(STRESS_LOG) || defined(DACCESS_COMPILE) +#define STRESS_LOG_VA(msg) do { } WHILE_0 +#define STRESS_LOG0(facility, level, msg) do { } WHILE_0 +#define STRESS_LOG1(facility, level, msg, data1) do { } WHILE_0 +#define STRESS_LOG2(facility, level, msg, data1, data2) do { } WHILE_0 +#define STRESS_LOG3(facility, level, msg, data1, data2, data3) do { } WHILE_0 +#define STRESS_LOG4(facility, level, msg, data1, data2, data3, data4) do { } WHILE_0 +#define STRESS_LOG5(facility, level, msg, data1, data2, data3, data4, data5) do { } WHILE_0 +#define STRESS_LOG6(facility, level, msg, data1, data2, data3, data4, data5, data6) do { } WHILE_0 +#define STRESS_LOG7(facility, level, msg, data1, data2, data3, data4, data5, data6, data7) do { } WHILE_0 +#define STRESS_LOG_PLUG_MOVE(plug_start, plug_end, plug_delta) do { } WHILE_0 +#define STRESS_LOG_ROOT_PROMOTE(root_addr, objPtr, methodTable) do { } WHILE_0 +#define STRESS_LOG_ROOT_RELOCATE(root_addr, old_value, new_value, methodTable) do { } WHILE_0 +#define STRESS_LOG_GC_START(gcCount, Gen, collectClasses) do { } WHILE_0 +#define STRESS_LOG_GC_END(gcCount, Gen, collectClasses) do { } WHILE_0 +#define STRESS_LOG_OOM_STACK(size) do { } WHILE_0 +#define STRESS_LOG_GC_STACK do { } WHILE_0 +#define STRESS_LOG_RESERVE_MEM(numChunks) do { } WHILE_0 +#endif // !STRESS_LOG || DACCESS_COMPILE +#endif // !__GCENV_BASE_INCLUDED__ + +#endif // StressLog_h diff --git a/src/coreclr/src/nativeaot/Runtime/inc/type_traits.hpp b/src/coreclr/src/nativeaot/Runtime/inc/type_traits.hpp new file mode 100644 index 0000000000000..45bdf8392abd3 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/inc/type_traits.hpp @@ -0,0 +1,311 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// +// type_traits.hpp +// +// Type trait metaprogramming utilities. +// + +#ifndef __TYPE_TRAITS_HPP__ +#define __TYPE_TRAITS_HPP__ + +#include "CommonTypes.h" + +namespace type_traits +{ + +namespace imp +{ + +struct true_type { static const bool value = true; }; +struct false_type { static const bool value = false; }; + +//////////////////////////////////////////////////////////////////////////////// +// Helper types Small and Big - guarantee that sizeof(Small) < sizeof(Big) +// + +template +struct conversion_helper +{ + typedef char Small; + struct Big { char dummy[2]; }; + static Big Test(...); + static Small Test(U); + static T MakeT(); +}; + +//////////////////////////////////////////////////////////////////////////////// +// class template conversion +// Figures out the conversion relationships between two types +// Invocations (T and U are types): +// a) conversion::exists +// returns (at compile time) true if there is an implicit conversion from T +// to U (example: Derived to Base) +// b) conversion::exists2Way +// returns (at compile time) true if there are both conversions from T +// to U and from U to T (example: int to char and back) +// c) conversion::sameType +// returns (at compile time) true if T and U represent the same type +// +// NOTE: might not work if T and U are in a private inheritance hierarchy. +// + +template +struct conversion +{ + typedef imp::conversion_helper H; + static const bool exists = sizeof(typename H::Small) == sizeof((H::Test(H::MakeT()))); + static const bool exists2Way = exists && conversion::exists; + static const bool sameType = false; +}; + +template +struct conversion +{ + static const bool exists = true; + static const bool exists2Way = true; + static const bool sameType = true; +}; + +template +struct conversion +{ + static const bool exists = false; + static const bool exists2Way = false; + static const bool sameType = false; +}; + +template +struct conversion +{ + static const bool exists = false; + static const bool exists2Way = false; + static const bool sameType = false; +}; + +template <> +struct conversion +{ + static const bool exists = true; + static const bool exists2Way = true; + static const bool sameType = true; +}; + +template +struct is_base_of_helper; + +template <> +struct is_base_of_helper : public true_type {} ; + +template <> +struct is_base_of_helper : public false_type {} ; + +}// imp + +//////////////////////////////////////////////////////////////////////////////// +// is_base_of::value is typedefed to be true if TDerived derives from TBase +// and false otherwise. +// +// +// NOTE: use TR1 type_traits::is_base_of when available. +// +#ifdef _MSC_VER + +template +struct is_base_of : public imp::is_base_of_helper<__is_base_of( TBase, TDerived)> {}; + +#else + +// Note that we need to compare pointer types here, since conversion of types by-value +// just tells us whether or not an implicit conversion constructor exists. We handle +// type parameters that are already pointers specially; see below. +template +struct is_base_of : public imp::is_base_of_helper::exists> {}; + +// Specialization to handle type parameters that are already pointers. +template +struct is_base_of : public imp::is_base_of_helper::exists> {}; + +// Specialization to handle invalid mixing of pointer types. +template +struct is_base_of : public imp::false_type {}; + +// Specialization to handle invalid mixing of pointer types. +template +struct is_base_of : public imp::false_type {}; + +#endif + +//////////////////////////////////////////////////////////////////////////////// +// Remove const qualifications, if any. Access using remove_const::type +// +template struct remove_const { typedef T type; }; +template struct remove_const { typedef T type; }; + +//////////////////////////////////////////////////////////////////////////////// +// is_signed::value is true if T is a signed integral type, false otherwise. +// +template +struct is_signed { static const bool value = (static_cast(-1) < 0); }; + +} + +//////////////////////////////////////////////////////////////////////////////// +// These are related to type traits, but they are more like asserts of type +// traits in that the result is that either the compiler does or does not +// produce an error. +// +namespace type_constraints +{ + +//////////////////////////////////////////////////////////////////////////////// +// derived_from will produce a compiler error if TDerived does not +// derive from TBase. +// +// NOTE: use TR1 type_traits::is_base_of when available. +// + +template struct is_base_of +{ + is_base_of() + { + static_assert((type_traits::is_base_of::value), + "is_base_of() constraint violation: TDerived does not derive from TBase"); + } +}; + +}; // namespace type_constraints + +namespace rh { namespace std +{ + // Import some select components of the STL + + // TEMPLATE FUNCTION for_each + template + inline + _Fn1 for_each(_InIt _First, _InIt _Last, _Fn1 _Func) + { // perform function for each element + for (; _First != _Last; ++_First) + _Func(*_First); + return (_Func); + } + + template + inline + _InIt find(_InIt _First, _InIt _Last, const _Ty& _Val) + { // find first matching _Val + for (; _First != _Last; ++_First) + if (*_First == _Val) + break; + return (_First); + } + + template + inline + _InIt find_if(_InIt _First, _InIt _Last, _Pr _Pred) + { // find first satisfying _Pred + for (; _First != _Last; ++_First) + if (_Pred(*_First)) + break; + return (_First); + } + + template + inline + bool exists(_InIt _First, _InIt _Last, const _Ty& _Val) + { + return find(_First, _Last, _Val) != _Last; + } + + template + inline + bool exists_if(_InIt _First, _InIt _Last, _Pr _Pred) + { + return find_if(_First, _Last, _Pred) != _Last; + } + + template + inline + UIntNative count(_InIt _First, _InIt _Last, const _Ty& _Val) + { + UIntNative _Ret = 0; + for (; _First != _Last; _First++) + if (*_First == _Val) + ++_Ret; + return _Ret; + } + + template + inline + UIntNative count_if(_InIt _First, _InIt _Last, _Pr _Pred) + { + UIntNative _Ret = 0; + for (; _First != _Last; _First++) + if (_Pred(*_First)) + ++_Ret; + return _Ret; + } + + // Forward declaration, each collection requires specialization + template + inline + _FwdIt remove(_FwdIt _First, _FwdIt _Last, const _Ty& _Val); +} // namespace std +} // namespace rh + +#if 0 + +// ----------------------------------------------------------------- +// Holding place for unused-but-possibly-useful-in-the-future code. + +// ------------------------------------------------- +// This belongs in type_traits.hpp + +// +// is_pointer::value is true if the type is a pointer, false otherwise +// +template struct is_pointer : public false_type {}; +template struct is_pointer : public true_type {}; + +// +// Remove pointer from type, if it has one. Use remove_pointer::type +// Further specialized in daccess.h +// +template struct remove_pointer { typedef T type; }; +template struct remove_pointer { typedef T type; }; + +// ------------------------------------------------- +// This belongs in daccess.h + +namespace type_traits +{ + +// +// is_pointer::value is true if the type is a pointer, false otherwise +// specialized from type_traits.hpp +// +template struct is_pointer > : public type_traits::true_type {}; + +// +// remove_pointer::type is T with one less pointer qualification, if it had one. +// specialized from type_traits.hpp +// +template struct remove_pointer > { typedef T type; }; + +} // type_traits + +namespace dac +{ + +// +// is_dptr::value is true if T is a __DPtr, false otherwise. +// This is a partial specialization case for the positive case. +// +//template struct is_dptr > : public type_traits::true_type {}; + +} + +#endif + +#endif + diff --git a/src/coreclr/src/nativeaot/Runtime/inc/varint.h b/src/coreclr/src/nativeaot/Runtime/inc/varint.h new file mode 100644 index 0000000000000..06e9d65d32db3 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/inc/varint.h @@ -0,0 +1,144 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +class VarInt +{ +public: + static UInt32 ReadUnsigned(PTR_UInt8 & pbEncoding) + { + UIntNative lengthBits = *pbEncoding & 0x0F; + size_t negLength = s_negLengthTab[lengthBits]; + UIntNative shift = s_shiftTab[lengthBits]; + UInt32 result = *(PTR_UInt32)(pbEncoding - negLength - 4); + + result >>= shift; + pbEncoding -= negLength; + + return result; + } + + // + // WARNING: This method returns the negative of the length of the value that it just skipped! + // + // This was helpful in the GC info scan loop because it allowed us to always skip past unsigned values in + // the body of the loop. At the end of loop, we use this negative sign to distinguish between two cases + // and that allows us to decode the unsigned value that we need outside of the loop. Note that we encode + // the negatives in the s_negLengthTable to avoid any additional operations in the body of the GC scan + // loop. + // + static IntNative SkipUnsigned(PTR_UInt8 & pbEncoding) + { + UIntNative lengthBits = *pbEncoding & 0x0F; + size_t negLength = s_negLengthTab[lengthBits]; + pbEncoding -= negLength; + return negLength; + } + + static UIntNative WriteUnsigned(PTR_UInt8 pbDest, UInt32 value) + { + if (pbDest == NULL) + { + if (value < 128) + return 1; + + if (value < 128*128) + return 2; + + if (value < 128*128*128) + return 3; + + if (value < 128*128*128*128) + return 4; + + return 5; + } + + if (value < 128) + { + *pbDest++ = (UInt8)(value*2 + 0); + return 1; + } + + if (value < 128*128) + { + *pbDest++ = (UInt8)(value*4 + 1); + *pbDest++ = (UInt8)(value >> 6); + return 2; + } + + if (value < 128*128*128) + { + *pbDest++ = (UInt8)(value*8 + 3); + *pbDest++ = (UInt8)(value >> 5); + *pbDest++ = (UInt8)(value >> 13); + return 3; + } + + if (value < 128*128*128*128) + { + *pbDest++ = (UInt8)(value*16 + 7); + *pbDest++ = (UInt8)(value >> 4); + *pbDest++ = (UInt8)(value >> 12); + *pbDest++ = (UInt8)(value >> 20); + return 4; + } + + *pbDest++ = 15; + *pbDest++ = (UInt8)value; + *pbDest++ = (UInt8)(value >> 8); + *pbDest++ = (UInt8)(value >> 16); + *pbDest++ = (UInt8)(value >> 24); + return 5; + } + +private: + static Int8 s_negLengthTab[16]; + static UInt8 s_shiftTab[16]; +}; + +__declspec(selectany) +Int8 VarInt::s_negLengthTab[16] = +{ + -1, // 0 + -2, // 1 + -1, // 2 + -3, // 3 + + -1, // 4 + -2, // 5 + -1, // 6 + -4, // 7 + + -1, // 8 + -2, // 9 + -1, // 10 + -3, // 11 + + -1, // 12 + -2, // 13 + -1, // 14 + -5, // 15 +}; + +__declspec(selectany) +UInt8 VarInt::s_shiftTab[16] = +{ + 32-7*1, // 0 + 32-7*2, // 1 + 32-7*1, // 2 + 32-7*3, // 3 + + 32-7*1, // 4 + 32-7*2, // 5 + 32-7*1, // 6 + 32-7*4, // 7 + + 32-7*1, // 8 + 32-7*2, // 9 + 32-7*1, // 10 + 32-7*3, // 11 + + 32-7*1, // 12 + 32-7*2, // 13 + 32-7*1, // 14 + 0, // 15 +}; diff --git a/src/coreclr/src/nativeaot/Runtime/loglf.h b/src/coreclr/src/nativeaot/Runtime/loglf.h new file mode 100644 index 0000000000000..75c5d126a7873 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/loglf.h @@ -0,0 +1,17 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// The code in sos.DumpStressLog depends on the facility codes +// being bit flags sorted in increasing order. +// See code:EEStartup#TableOfContents for EE overview +DEFINE_LOG_FACILITY(LF_GC ,0x00000001) +DEFINE_LOG_FACILITY(LF_GCINFO ,0x00000002) +DEFINE_LOG_FACILITY(LF_GCALLOC ,0x00000004) +DEFINE_LOG_FACILITY(LF_GCROOTS ,0x00000008) +DEFINE_LOG_FACILITY(LF_STARTUP ,0x00000010) // Log startup and shutdown failures +DEFINE_LOG_FACILITY(LF_STACKWALK ,0x00000020) +// LF_ALWAYS 0x80000000 // make certain you don't try to use this bit for a real facility +// LF_ALL 0xFFFFFFFF +// +#undef DEFINE_LOG_FACILITY + diff --git a/src/coreclr/src/nativeaot/Runtime/portable.cpp b/src/coreclr/src/nativeaot/Runtime/portable.cpp new file mode 100644 index 0000000000000..4372925d0ddf4 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/portable.cpp @@ -0,0 +1,440 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +#include "common.h" + +#include "CommonTypes.h" +#include "CommonMacros.h" +#include "daccess.h" +#include "PalRedhawkCommon.h" +#include "CommonMacros.inl" +#include "volatile.h" +#include "PalRedhawk.h" +#include "rhassert.h" + +#include "slist.h" +#include "gcrhinterface.h" +#include "shash.h" +#include "RWLock.h" +#include "varint.h" +#include "holder.h" +#include "rhbinder.h" +#include "Crst.h" +#include "RuntimeInstance.h" +#include "event.h" +#include "regdisplay.h" +#include "StackFrameIterator.h" +#include "thread.h" +#include "threadstore.h" +#include "threadstore.inl" + +#include "eetype.h" +#include "TypeManager.h" +#include "eetype.inl" +#include "ObjectLayout.h" + +#include "GCMemoryHelpers.h" +#include "GCMemoryHelpers.inl" + +#if defined(USE_PORTABLE_HELPERS) + +EXTERN_C REDHAWK_API void* REDHAWK_CALLCONV RhpGcAlloc(EEType *pEEType, UInt32 uFlags, UIntNative cbSize, void * pTransitionFrame); +EXTERN_C REDHAWK_API void* REDHAWK_CALLCONV RhpPublishObject(void* pObject, UIntNative cbSize); + +struct gc_alloc_context +{ + UInt8* alloc_ptr; + UInt8* alloc_limit; + __int64 alloc_bytes; //Number of bytes allocated on SOH by this context + __int64 alloc_bytes_loh; //Number of bytes allocated on LOH by this context + void* gc_reserved_1; + void* gc_reserved_2; + int alloc_count; +}; + +// +// Allocations +// +COOP_PINVOKE_HELPER(Object *, RhpNewFast, (EEType* pEEType)) +{ + ASSERT(!pEEType->RequiresAlign8()); + ASSERT(!pEEType->HasFinalizer()); + + Thread * pCurThread = ThreadStore::GetCurrentThread(); + gc_alloc_context * acontext = pCurThread->GetAllocContext(); + Object * pObject; + + size_t size = pEEType->get_BaseSize(); + + UInt8* result = acontext->alloc_ptr; + UInt8* advance = result + size; + if (advance <= acontext->alloc_limit) + { + acontext->alloc_ptr = advance; + pObject = (Object *)result; + pObject->set_EEType(pEEType); + return pObject; + } + + pObject = (Object *)RhpGcAlloc(pEEType, 0, size, NULL); + if (pObject == nullptr) + { + ASSERT_UNCONDITIONALLY("NYI"); // TODO: Throw OOM + } + pObject->set_EEType(pEEType); + + if (size >= RH_LARGE_OBJECT_SIZE) + RhpPublishObject(pObject, size); + + return pObject; +} + +#define GC_ALLOC_FINALIZE 0x1 // TODO: Defined in gc.h + +COOP_PINVOKE_HELPER(Object *, RhpNewFinalizable, (EEType* pEEType)) +{ + ASSERT(!pEEType->RequiresAlign8()); + ASSERT(pEEType->HasFinalizer()); + + size_t size = pEEType->get_BaseSize(); + + Object * pObject = (Object *)RhpGcAlloc(pEEType, GC_ALLOC_FINALIZE, size, NULL); + if (pObject == nullptr) + { + ASSERT_UNCONDITIONALLY("NYI"); // TODO: Throw OOM + } + pObject->set_EEType(pEEType); + + if (size >= RH_LARGE_OBJECT_SIZE) + RhpPublishObject(pObject, size); + + return pObject; +} + +COOP_PINVOKE_HELPER(Array *, RhpNewArray, (EEType * pArrayEEType, int numElements)) +{ + ASSERT_MSG(!pArrayEEType->RequiresAlign8(), "NYI"); + + Thread * pCurThread = ThreadStore::GetCurrentThread(); + gc_alloc_context * acontext = pCurThread->GetAllocContext(); + Array * pObject; + + if (numElements < 0) + { + ASSERT_UNCONDITIONALLY("NYI"); // TODO: Throw overflow + } + + size_t size; +#ifndef HOST_64BIT + // if the element count is <= 0x10000, no overflow is possible because the component size is + // <= 0xffff, and thus the product is <= 0xffff0000, and the base size is only ~12 bytes + if (numElements > 0x10000) + { + // Perform the size computation using 64-bit integeres to detect overflow + uint64_t size64 = (uint64_t)pArrayEEType->get_BaseSize() + ((uint64_t)numElements * (uint64_t)pArrayEEType->get_ComponentSize()); + size64 = (size64 + (sizeof(UIntNative)-1)) & ~(sizeof(UIntNative)-1); + + size = (size_t)size64; + if (size != size64) + { + ASSERT_UNCONDITIONALLY("NYI"); // TODO: Throw overflow + } + } + else +#endif // !HOST_64BIT + { + size = (size_t)pArrayEEType->get_BaseSize() + ((size_t)numElements * (size_t)pArrayEEType->get_ComponentSize()); + size = ALIGN_UP(size, sizeof(UIntNative)); + } + + UInt8* result = acontext->alloc_ptr; + UInt8* advance = result + size; + if (advance <= acontext->alloc_limit) + { + acontext->alloc_ptr = advance; + pObject = (Array *)result; + pObject->set_EEType(pArrayEEType); + pObject->InitArrayLength((UInt32)numElements); + return pObject; + } + + pObject = (Array *)RhpGcAlloc(pArrayEEType, 0, size, NULL); + if (pObject == nullptr) + { + ASSERT_UNCONDITIONALLY("NYI"); // TODO: Throw OOM + } + pObject->set_EEType(pArrayEEType); + pObject->InitArrayLength((UInt32)numElements); + + if (size >= RH_LARGE_OBJECT_SIZE) + RhpPublishObject(pObject, size); + + return pObject; +} + +COOP_PINVOKE_HELPER(String *, RhNewString, (EEType * pArrayEEType, int numElements)) +{ + // TODO: Implement. We tail call to RhpNewArray for now since there's a bunch of TODOs in the places + // that matter anyway. + return (String*)RhpNewArray(pArrayEEType, numElements); +} + +#endif +#if defined(USE_PORTABLE_HELPERS) + +#ifdef HOST_ARM +COOP_PINVOKE_HELPER(Object *, RhpNewFinalizableAlign8, (EEType* pEEType)) +{ + Object * pObject = nullptr; + /* TODO */ ASSERT_UNCONDITIONALLY("NYI"); + return pObject; +} + +COOP_PINVOKE_HELPER(Object *, RhpNewFastMisalign, (EEType* pEEType)) +{ + Object * pObject = nullptr; + /* TODO */ ASSERT_UNCONDITIONALLY("NYI"); + return pObject; +} + +COOP_PINVOKE_HELPER(Object *, RhpNewFastAlign8, (EEType* pEEType)) +{ + Object * pObject = nullptr; + /* TODO */ ASSERT_UNCONDITIONALLY("NYI"); + return pObject; +} + +COOP_PINVOKE_HELPER(Array *, RhpNewArrayAlign8, (EEType * pArrayEEType, int numElements)) +{ + Array * pObject = nullptr; + /* TODO */ ASSERT_UNCONDITIONALLY("NYI"); + return pObject; +} +#endif + +COOP_PINVOKE_HELPER(void, RhpInitialDynamicInterfaceDispatch, ()) +{ + ASSERT_UNCONDITIONALLY("NYI"); +} + +COOP_PINVOKE_HELPER(void, RhpInterfaceDispatch1, ()) +{ + ASSERT_UNCONDITIONALLY("NYI"); +} + +COOP_PINVOKE_HELPER(void, RhpInterfaceDispatch2, ()) +{ + ASSERT_UNCONDITIONALLY("NYI"); +} + +COOP_PINVOKE_HELPER(void, RhpInterfaceDispatch4, ()) +{ + ASSERT_UNCONDITIONALLY("NYI"); +} + +COOP_PINVOKE_HELPER(void, RhpInterfaceDispatch8, ()) +{ + ASSERT_UNCONDITIONALLY("NYI"); +} + +COOP_PINVOKE_HELPER(void, RhpInterfaceDispatch16, ()) +{ + ASSERT_UNCONDITIONALLY("NYI"); +} + +COOP_PINVOKE_HELPER(void, RhpInterfaceDispatch32, ()) +{ + ASSERT_UNCONDITIONALLY("NYI"); +} + +COOP_PINVOKE_HELPER(void, RhpInterfaceDispatch64, ()) +{ + ASSERT_UNCONDITIONALLY("NYI"); +} + +COOP_PINVOKE_HELPER(void, RhpVTableOffsetDispatch, ()) +{ + ASSERT_UNCONDITIONALLY("NYI"); +} + +// @TODO Implement UniversalTransition +EXTERN_C void * ReturnFromUniversalTransition; +void * ReturnFromUniversalTransition; + +// @TODO Implement UniversalTransition_DebugStepTailCall +EXTERN_C void * ReturnFromUniversalTransition_DebugStepTailCall; +void * ReturnFromUniversalTransition_DebugStepTailCall; + +#endif // USE_PORTABLE_HELPERS + +// @TODO Implement CallDescrThunk +EXTERN_C void * ReturnFromCallDescrThunk; +#ifdef USE_PORTABLE_HELPERS +void * ReturnFromCallDescrThunk; +#endif + +#if defined(USE_PORTABLE_HELPERS) || defined(TARGET_UNIX) +#if !defined (HOST_ARM64) +// +// Return address hijacking +// +COOP_PINVOKE_HELPER(void, RhpGcProbeHijackScalar, ()) +{ + ASSERT_UNCONDITIONALLY("NYI"); +} +COOP_PINVOKE_HELPER(void, RhpGcProbeHijackObject, ()) +{ + ASSERT_UNCONDITIONALLY("NYI"); +} +COOP_PINVOKE_HELPER(void, RhpGcProbeHijackByref, ()) +{ + ASSERT_UNCONDITIONALLY("NYI"); +} +COOP_PINVOKE_HELPER(void, RhpGcStressHijackScalar, ()) +{ + ASSERT_UNCONDITIONALLY("NYI"); +} +COOP_PINVOKE_HELPER(void, RhpGcStressHijackObject, ()) +{ + ASSERT_UNCONDITIONALLY("NYI"); +} +COOP_PINVOKE_HELPER(void, RhpGcStressHijackByref, ()) +{ + ASSERT_UNCONDITIONALLY("NYI"); +} +#endif +#endif // defined(USE_PORTABLE_HELPERS) || defined(TARGET_UNIX) + +#if defined(USE_PORTABLE_HELPERS) + +#if !defined (HOST_ARM64) +COOP_PINVOKE_HELPER(void, RhpAssignRef, (Object ** dst, Object * ref)) +{ + // @TODO: USE_PORTABLE_HELPERS - Null check + *dst = ref; + InlineWriteBarrier(dst, ref); +} + +COOP_PINVOKE_HELPER(void, RhpCheckedAssignRef, (Object ** dst, Object * ref)) +{ + // @TODO: USE_PORTABLE_HELPERS - Null check + *dst = ref; + InlineCheckedWriteBarrier(dst, ref); +} +#endif + +COOP_PINVOKE_HELPER(Object *, RhpCheckedLockCmpXchg, (Object ** location, Object * value, Object * comparand)) +{ + // @TODO: USE_PORTABLE_HELPERS - Null check + Object * ret = (Object *)PalInterlockedCompareExchangePointer((void * volatile *)location, value, comparand); + InlineCheckedWriteBarrier(location, value); + return ret; +} + +COOP_PINVOKE_HELPER(Object *, RhpCheckedXchg, (Object ** location, Object * value)) +{ + // @TODO: USE_PORTABLE_HELPERS - Null check + Object * ret = (Object *)PalInterlockedExchangePointer((void * volatile *)location, value); + InlineCheckedWriteBarrier(location, value); + return ret; +} + +COOP_PINVOKE_HELPER(Int32, RhpLockCmpXchg32, (Int32 * location, Int32 value, Int32 comparand)) +{ + // @TODO: USE_PORTABLE_HELPERS - Null check + return PalInterlockedCompareExchange(location, value, comparand); +} + +COOP_PINVOKE_HELPER(Int64, RhpLockCmpXchg64, (Int64 * location, Int64 value, Int64 comparand)) +{ + // @TODO: USE_PORTABLE_HELPERS - Null check + return PalInterlockedCompareExchange64(location, value, comparand); +} + +#endif // USE_PORTABLE_HELPERS + +#if !defined(HOST_ARM64) +COOP_PINVOKE_HELPER(void, RhpMemoryBarrier, ()) +{ + PalMemoryBarrier(); +} +#endif + +#if defined(USE_PORTABLE_HELPERS) +EXTERN_C REDHAWK_API void* __cdecl RhAllocateThunksMapping() +{ + return NULL; +} + +COOP_PINVOKE_HELPER(void *, RhpGetThunksBase, ()) +{ + return NULL; +} + +COOP_PINVOKE_HELPER(int, RhpGetNumThunkBlocksPerMapping, ()) +{ + ASSERT_UNCONDITIONALLY("NYI"); + return 0; +} + +COOP_PINVOKE_HELPER(int, RhpGetNumThunksPerBlock, ()) +{ + ASSERT_UNCONDITIONALLY("NYI"); + return 0; +} + +COOP_PINVOKE_HELPER(int, RhpGetThunkSize, ()) +{ + ASSERT_UNCONDITIONALLY("NYI"); + return 0; +} + +COOP_PINVOKE_HELPER(void*, RhpGetThunkDataBlockAddress, (void* pThunkStubAddress)) +{ + ASSERT_UNCONDITIONALLY("NYI"); + return NULL; +} + +COOP_PINVOKE_HELPER(void*, RhpGetThunkStubsBlockAddress, (void* pThunkDataAddress)) +{ + ASSERT_UNCONDITIONALLY("NYI"); + return NULL; +} + +COOP_PINVOKE_HELPER(int, RhpGetThunkBlockSize, ()) +{ + ASSERT_UNCONDITIONALLY("NYI"); + return NULL; +} + +COOP_PINVOKE_HELPER(void, RhCallDescrWorker, (void * callDescr)) +{ + ASSERT_UNCONDITIONALLY("NYI"); +} + +#ifdef CALLDESCR_FPARGREGSARERETURNREGS +COOP_PINVOKE_HELPER(void, CallingConventionConverter_GetStubs, (UIntNative* pReturnVoidStub, UIntNative* pReturnIntegerStub, UIntNative* pCommonStub)) +#else +COOP_PINVOKE_HELPER(void, CallingConventionConverter_GetStubs, (UIntNative* pReturnVoidStub, UIntNative* pReturnIntegerStub, UIntNative* pCommonStub, UIntNative* pReturnFloatingPointReturn4Thunk, UIntNative* pReturnFloatingPointReturn8Thunk)) +#endif +{ + ASSERT_UNCONDITIONALLY("NYI"); +} + +COOP_PINVOKE_HELPER(void *, RhGetCommonStubAddress, ()) +{ + ASSERT_UNCONDITIONALLY("NYI"); + return NULL; +} + +COOP_PINVOKE_HELPER(void *, RhGetCurrentThunkContext, ()) +{ + ASSERT_UNCONDITIONALLY("NYI"); + return NULL; +} + +#endif + +COOP_PINVOKE_HELPER(void, RhpGcPoll, ()) +{ + // TODO: implement +} diff --git a/src/coreclr/src/nativeaot/Runtime/profheapwalkhelper.cpp b/src/coreclr/src/nativeaot/Runtime/profheapwalkhelper.cpp new file mode 100644 index 0000000000000..fc1288fe181c2 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/profheapwalkhelper.cpp @@ -0,0 +1,210 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// +// On desktop CLR, GC ETW event firing borrows heavily from code in the profiling API, +// as the GC already called hooks in the profapi to notify it of roots & references. +// This file shims up that profapi code the GC expects, though only for the purpose of +// firing ETW events (not for getting a full profapi up on redhawk). +// + +#include "common.h" + +#if defined(GC_PROFILING) || defined(FEATURE_EVENT_TRACE) + +#include "gcenv.h" +#include "gcheaputilities.h" +#include "eventtrace.h" +#include "profheapwalkhelper.h" + +//--------------------------------------------------------------------------------------- +// +// Callback of type promote_func called by GC while scanning roots (in GCProfileWalkHeap, +// called after the collection). Wrapper around EEToProfInterfaceImpl::RootReference2, +// which does the real work. +// +// Arguments: +// pObj - Object reference encountered +/// ppRoot - Address that references ppObject (can be interior pointer) +// pSC - ProfilingScanContext * containing the root kind and GCReferencesData used +// by RootReference2 +// dwFlags - Properties of the root as GC_CALL* constants (this function converts +// to COR_PRF_GC_ROOT_FLAGS. +// + +void ScanRootsHelper(Object* pObj, Object** ppRoot, ScanContext * pSC, DWORD dwFlags) +{ + ProfilingScanContext *pPSC = (ProfilingScanContext *)pSC; + + DWORD dwEtwRootFlags = 0; + if (dwFlags & GC_CALL_INTERIOR) + dwEtwRootFlags |= kEtwGCRootFlagsInterior; + if (dwFlags & GC_CALL_PINNED) + dwEtwRootFlags |= kEtwGCRootFlagsPinning; + + // Notify ETW of the root + + if (ETW::GCLog::ShouldWalkHeapRootsForEtw()) + { + ETW::GCLog::RootReference( + ppRoot, // root address + pObj, // object being rooted + NULL, // pSecondaryNodeForDependentHandle is NULL, cuz this isn't a dependent handle + FALSE, // is dependent handle + pPSC, + dwFlags, // dwGCFlags + dwEtwRootFlags); + } +} + +//--------------------------------------------------------------------------------------- +// +// Callback of type walk_fn used by GCHeap::WalkObject. Keeps a count of each +// object reference found. +// +// Arguments: +// pBO - Object reference encountered in walk +// context - running count of object references encountered +// +// Return Value: +// Always returns TRUE to object walker so it walks the entire object +// + +bool CountContainedObjectRef(Object * pBO, void * context) +{ + LIMITED_METHOD_CONTRACT; + UNREFERENCED_PARAMETER(pBO); + // Increase the count + (*((size_t *)context))++; + + return TRUE; +} + +//--------------------------------------------------------------------------------------- +// +// Callback of type walk_fn used by GCHeap::WalkObject. Stores each object reference +// encountered into an array. +// +// Arguments: +// pBO - Object reference encountered in walk +// context - Array of locations within the walked object that point to other +// objects. On entry, (*context) points to the next unfilled array +// entry. On exit, that location is filled, and (*context) is incremented +// to point to the next entry. +// +// Return Value: +// Always returns TRUE to object walker so it walks the entire object +// + +bool SaveContainedObjectRef(Object * pBO, void * context) +{ + LIMITED_METHOD_CONTRACT; + // Assign the value + **((Object ***)context) = pBO; + + // Now increment the array pointer + // + // Note that HeapWalkHelper has already walked the references once to count them up, + // and then allocated an array big enough to hold those references. First time this + // callback is called for a given object, (*context) points to the first entry in the + // array. So "blindly" incrementing (*context) here and using it next time around + // for the next reference, over and over again, should be safe. + (*((Object ***)context))++; + + return TRUE; +} + +//--------------------------------------------------------------------------------------- +// +// Callback of type walk_fn used by the GC when walking the heap, to help profapi +// track objects. This guy orchestrates the use of the above callbacks which dig +// into object references contained each object encountered by this callback. +// +// Arguments: +// pBO - Object reference encountered on the heap +// +// Return Value: +// BOOL indicating whether the heap walk should continue. +// TRUE=continue +// FALSE=stop +// + +bool HeapWalkHelper(Object * pBO, void * pvContext) +{ + OBJECTREF * arrObjRef = NULL; + size_t cNumRefs = 0; + bool bOnStack = false; + //MethodTable * pMT = pBO->GetMethodTable(); + + ProfilerWalkHeapContext * pProfilerWalkHeapContext = (ProfilerWalkHeapContext *) pvContext; + + //if (pMT->ContainsPointersOrCollectible()) + { + // First round through calculates the number of object refs for this class + GCHeapUtilities::GetGCHeap()->DiagWalkObject(pBO, &CountContainedObjectRef, (void *)&cNumRefs); + + if (cNumRefs > 0) + { + // Create an array to contain all of the refs for this object + bOnStack = cNumRefs <= 32 ? true : false; + + if (bOnStack) + { + // It's small enough, so just allocate on the stack + arrObjRef = (OBJECTREF *)_alloca(cNumRefs * sizeof(OBJECTREF)); + } + else + { + // Otherwise, allocate from the heap + arrObjRef = new (nothrow) OBJECTREF[cNumRefs]; + + if (!arrObjRef) + { + return FALSE; + } + } + + // Second round saves off all of the ref values + OBJECTREF * pCurObjRef = arrObjRef; + GCHeapUtilities::GetGCHeap()->DiagWalkObject(pBO, &SaveContainedObjectRef, (void *)&pCurObjRef); + } + } + + HRESULT hr = E_FAIL; + +#ifdef FEATURE_ETW + if (ETW::GCLog::ShouldWalkHeapObjectsForEtw()) + { + ETW::GCLog::ObjectReference( + pProfilerWalkHeapContext, + pBO, + ULONGLONG(pBO->get_SafeEEType()), + cNumRefs, + (Object **) arrObjRef); + } +#endif // FEATURE_ETW + + // If the data was not allocated on the stack, need to clean it up. + if ((arrObjRef != NULL) && !bOnStack) + { + delete [] arrObjRef; + } + + // Return TRUE iff we want to the heap walk to continue. The only way we'd abort the + // heap walk is if we're issuing profapi callbacks, and the profapi profiler + // intentionally returned a failed HR (as its request that we stop the walk). There's + // a potential conflict here. If a profapi profiler and an ETW profiler are both + // monitoring the heap dump, and the profapi profiler requests to abort the walk (but + // the ETW profiler may not want to abort the walk), then what do we do? The profapi + // profiler gets precedence. We don't want to accidentally send more callbacks to a + // profapi profiler that explicitly requested an abort. The ETW profiler will just + // have to deal. In theory, I could make the code more complex by remembering that a + // profapi profiler requested to abort the dump but an ETW profiler is still + // attached, and then intentionally inhibit the remainder of the profapi callbacks + // for this GC. But that's unnecessary complexity. In practice, it should be + // extremely rare that a profapi profiler is monitoring heap dumps AND an ETW + // profiler is also monitoring heap dumps. + return TRUE; +} + +#endif // defined(FEATURE_EVENT_TRACE) || defined(GC_PROFILING) diff --git a/src/coreclr/src/nativeaot/Runtime/profheapwalkhelper.h b/src/coreclr/src/nativeaot/Runtime/profheapwalkhelper.h new file mode 100644 index 0000000000000..9c0da119aeabb --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/profheapwalkhelper.h @@ -0,0 +1,16 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#ifndef _GCHEAPWALKHELPER_H_ +#define _GCHEAPWALKHELPER_H_ + + +// These two functions are utilized to scan the heap if requested by ETW +// or a profiler. The implementations of these two functions are in profheapwalkhelper.cpp. +#if defined(FEATURE_EVENT_TRACE) || defined(GC_PROFILING) +void ScanRootsHelper(Object* pObj, Object** ppRoot, ScanContext* pSC, DWORD dwFlags); +bool HeapWalkHelper(Object* pBO, void* pvContext); +#endif + + +#endif // _GCHEAPWALKHELPER_H_ diff --git a/src/coreclr/src/nativeaot/Runtime/regdisplay.h b/src/coreclr/src/nativeaot/Runtime/regdisplay.h new file mode 100644 index 0000000000000..b9ef9fa4bfcac --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/regdisplay.h @@ -0,0 +1,162 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#if defined(TARGET_X86) || defined(TARGET_AMD64) + +struct REGDISPLAY +{ + PTR_UIntNative pRax; + PTR_UIntNative pRcx; + PTR_UIntNative pRdx; + PTR_UIntNative pRbx; + // pEsp; + PTR_UIntNative pRbp; + PTR_UIntNative pRsi; + PTR_UIntNative pRdi; +#ifdef TARGET_AMD64 + PTR_UIntNative pR8; + PTR_UIntNative pR9; + PTR_UIntNative pR10; + PTR_UIntNative pR11; + PTR_UIntNative pR12; + PTR_UIntNative pR13; + PTR_UIntNative pR14; + PTR_UIntNative pR15; +#endif // TARGET_AMD64 + + UIntNative SP; + PTR_PCODE pIP; + PCODE IP; + +#if defined(TARGET_AMD64) && !defined(UNIX_AMD64_ABI) + Fp128 Xmm[16-6]; // preserved xmm6..xmm15 regs for EH stackwalk + // these need to be unwound during a stack walk + // for EH, but not adjusted, so we only need + // their values, not their addresses +#endif // TARGET_AMD64 && !UNIX_AMD64_ABI + + inline PCODE GetIP() { return IP; } + inline PTR_PCODE GetAddrOfIP() { return pIP; } + inline UIntNative GetSP() { return SP; } + inline UIntNative GetFP() { return *pRbp; } + inline UIntNative GetPP() { return *pRbx; } + + inline void SetIP(PCODE IP) { this->IP = IP; } + inline void SetAddrOfIP(PTR_PCODE pIP) { this->pIP = pIP; } + inline void SetSP(UIntNative SP) { this->SP = SP; } +}; + +#elif defined(TARGET_ARM) + +struct REGDISPLAY +{ + PTR_UIntNative pR0; + PTR_UIntNative pR1; + PTR_UIntNative pR2; + PTR_UIntNative pR3; + PTR_UIntNative pR4; + PTR_UIntNative pR5; + PTR_UIntNative pR6; + PTR_UIntNative pR7; + PTR_UIntNative pR8; + PTR_UIntNative pR9; + PTR_UIntNative pR10; + PTR_UIntNative pR11; + PTR_UIntNative pR12; + PTR_UIntNative pLR; + + UIntNative SP; + PTR_PCODE pIP; + PCODE IP; + + UInt64 D[16-8]; // preserved D registers D8..D15 (note that D16-D31 are not preserved according to the ABI spec) + // these need to be unwound during a stack walk + // for EH, but not adjusted, so we only need + // their values, not their addresses + + inline PCODE GetIP() { return IP; } + inline PTR_PCODE GetAddrOfIP() { return pIP; } + inline UIntNative GetSP() { return SP; } + inline UIntNative GetFP() { return *pR11; } + inline void SetIP(PCODE IP) { this->IP = IP; } + inline void SetAddrOfIP(PTR_PCODE pIP) { this->pIP = pIP; } + inline void SetSP(UIntNative SP) { this->SP = SP; } +}; + +#elif defined(TARGET_ARM64) + +struct REGDISPLAY +{ + PTR_UIntNative pX0; + PTR_UIntNative pX1; + PTR_UIntNative pX2; + PTR_UIntNative pX3; + PTR_UIntNative pX4; + PTR_UIntNative pX5; + PTR_UIntNative pX6; + PTR_UIntNative pX7; + PTR_UIntNative pX8; + PTR_UIntNative pX9; + PTR_UIntNative pX10; + PTR_UIntNative pX11; + PTR_UIntNative pX12; + PTR_UIntNative pX13; + PTR_UIntNative pX14; + PTR_UIntNative pX15; + PTR_UIntNative pX16; + PTR_UIntNative pX17; + PTR_UIntNative pX18; + PTR_UIntNative pX19; + PTR_UIntNative pX20; + PTR_UIntNative pX21; + PTR_UIntNative pX22; + PTR_UIntNative pX23; + PTR_UIntNative pX24; + PTR_UIntNative pX25; + PTR_UIntNative pX26; + PTR_UIntNative pX27; + PTR_UIntNative pX28; + PTR_UIntNative pFP; // X29 + PTR_UIntNative pLR; // X30 + + UIntNative SP; + PTR_PCODE pIP; + PCODE IP; + + UInt64 D[16-8]; // Only the bottom 64-bit value of the V registers V8..V15 needs to be preserved + // (V0-V7 and V16-V31 are not preserved according to the ABI spec). + // These need to be unwound during a stack walk + // for EH, but not adjusted, so we only need + // their values, not their addresses + + inline PCODE GetIP() { return IP; } + inline PTR_PCODE GetAddrOfIP() { return pIP; } + inline UIntNative GetSP() { return SP; } + inline UIntNative GetFP() { return *pFP; } + + inline void SetIP(PCODE IP) { this->IP = IP; } + inline void SetAddrOfIP(PTR_PCODE pIP) { this->pIP = pIP; } + inline void SetSP(UIntNative SP) { this->SP = SP; } +}; +#elif defined(TARGET_WASM) + +struct REGDISPLAY +{ + // TODO: WebAssembly doesn't really have registers. What exactly do we need here? + + UIntNative SP; + PTR_PCODE pIP; + PCODE IP; + + inline PCODE GetIP() { return NULL; } + inline PTR_PCODE GetAddrOfIP() { return NULL; } + inline UIntNative GetSP() { return 0; } + inline UIntNative GetFP() { return 0; } + + inline void SetIP(PCODE IP) { } + inline void SetAddrOfIP(PTR_PCODE pIP) { } + inline void SetSP(UIntNative SP) { } +}; +#endif // HOST_X86 || HOST_AMD64 || HOST_ARM || HOST_ARM64 || HOST_WASM + +typedef REGDISPLAY * PREGDISPLAY; diff --git a/src/coreclr/src/nativeaot/Runtime/rhassert.cpp b/src/coreclr/src/nativeaot/Runtime/rhassert.cpp new file mode 100644 index 0000000000000..7ac5c540fd16a --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/rhassert.cpp @@ -0,0 +1,110 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +#include "common.h" +#include "CommonTypes.h" +#include "CommonMacros.h" +#include "PalRedhawkCommon.h" +#include "PalRedhawk.h" +#include "rhassert.h" + + +#include "RhConfig.h" + +#ifdef _DEBUG + +#define MB_ABORTRETRYIGNORE 0x00000002L +#define IDABORT 3 +#define IDRETRY 4 +#define IDIGNORE 5 + +void Assert(const char * expr, const char * file, UInt32 line_num, const char * message) +{ +#ifndef DACCESS_COMPILE +#ifdef NO_UI_ASSERT + PalDebugBreak(); +#else + if (g_pRhConfig->GetBreakOnAssert()) + { + printf( + "--------------------------------------------------\n" + "Debug Assertion Violation\n\n" + "%s%s%s" + "Expression: '%s'\n\n" + "File: %s, Line: %u\n" + "--------------------------------------------------\n", + message ? ("Message: ") : (""), + message ? (message) : (""), + message ? ("\n\n") : (""), + expr, file, line_num); + + // Flush standard output before failing fast to make sure the assertion failure message + // is retained when tests are being run with redirected stdout. + fflush(stdout); + + // If there's no debugger attached, we just FailFast + if (!PalIsDebuggerPresent()) + PalRaiseFailFastException(NULL, NULL, FAIL_FAST_GENERATE_EXCEPTION_ADDRESS); + + // If there is a debugger attached, we break and then allow continuation. + PalDebugBreak(); + return; + } + + char buffer[4096]; + + sprintf_s(buffer, COUNTOF(buffer), + "--------------------------------------------------\n" + "Debug Assertion Violation\n\n" + "%s%s%s" + "Expression: '%s'\n\n" + "File: %s, Line: %u\n" + "--------------------------------------------------\n" + "Abort: Exit Immediately\n" + "Retry: DebugBreak()\n" + "Ignore: Keep Going\n" + "--------------------------------------------------\n", + message ? ("Message: ") : (""), + message ? (message) : (""), + message ? ("\n\n") : (""), + expr, file, line_num); + + HANDLE hMod = PalLoadLibraryExW(L"user32.dll", NULL, 0); + Int32 (* pfn)(HANDLE, char *, const char *, UInt32) = + (Int32 (*)(HANDLE, char *, const char *, UInt32))PalGetProcAddress(hMod, "MessageBoxA"); + + Int32 result = pfn(NULL, buffer, "Redhawk Assert", MB_ABORTRETRYIGNORE); + + switch (result) + { + case IDABORT: + PalTerminateProcess(PalGetCurrentProcess(), 666); + break; + case IDRETRY: + PalDebugBreak(); + break; + case IDIGNORE: + break; + } +#endif +#else + UNREFERENCED_PARAMETER(expr); + UNREFERENCED_PARAMETER(file); + UNREFERENCED_PARAMETER(line_num); + UNREFERENCED_PARAMETER(message); +#endif //!DACCESS_COMPILE +} + +extern "C" void NYI_Assert(const char *message, ...) +{ +#if !defined(DACCESS_COMPILE) + va_list args; + va_start(args, message); + vprintf(message, args); + va_end(args); + ASSERT_UNCONDITIONALLY("NYI"); +#else + UNREFERENCED_PARAMETER(message); +#endif +} + +#endif // _DEBUG diff --git a/src/coreclr/src/nativeaot/Runtime/rhassert.h b/src/coreclr/src/nativeaot/Runtime/rhassert.h new file mode 100644 index 0000000000000..5da270dacb9fd --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/rhassert.h @@ -0,0 +1,69 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +#ifndef __RHASSERT_H__ +#define __RHASSERT_H__ + +#ifdef _MSC_VER +#define ASSUME(expr) __assume(expr) +#else // _MSC_VER +#define ASSUME(expr) do { if (!(expr)) __builtin_unreachable(); } while (0) +#endif // _MSC_VER + +#if defined(_DEBUG) && !defined(DACCESS_COMPILE) + +#define ASSERT(expr) \ + { \ + if (!(expr)) { Assert(#expr, __FILE__, __LINE__, NULL); } \ + } \ + +#define ASSERT_MSG(expr, msg) \ + { \ + if (!(expr)) { Assert(#expr, __FILE__, __LINE__, msg); } \ + } \ + +#define VERIFY(expr) ASSERT((expr)) + +#define ASSERT_UNCONDITIONALLY(message) \ + Assert("ASSERT_UNCONDITIONALLY", __FILE__, __LINE__, message); \ + +void Assert(const char * expr, const char * file, unsigned int line_num, const char * message); + +#else + +#define ASSERT(expr) + +#define ASSERT_MSG(expr, msg) + +#define VERIFY(expr) (expr) + +#define ASSERT_UNCONDITIONALLY(message) + +#endif + +#ifndef _ASSERTE +#define _ASSERTE(_expr) ASSERT(_expr) +#endif + +#if defined(_DEBUG) + +void NYI_ASSERT(); + +#endif + +#define PORTABILITY_ASSERT(message) \ + ASSERT_UNCONDITIONALLY(message); \ + ASSUME(0); \ + +#define UNREACHABLE() \ + ASSERT_UNCONDITIONALLY("UNREACHABLE"); \ + ASSUME(0); \ + +#define UNREACHABLE_MSG(message) \ + ASSERT_UNCONDITIONALLY(message); \ + ASSUME(0); \ + +#define FAIL_FAST_GENERATE_EXCEPTION_ADDRESS 0x1 + +#define RhFailFast() PalRaiseFailFastException(NULL, NULL, FAIL_FAST_GENERATE_EXCEPTION_ADDRESS) + +#endif // __RHASSERT_H__ diff --git a/src/coreclr/src/nativeaot/Runtime/rhcommon.h b/src/coreclr/src/nativeaot/Runtime/rhcommon.h new file mode 100644 index 0000000000000..5cd0b1a6ca32a --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/rhcommon.h @@ -0,0 +1,16 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// +// This file is here because we share some common code with the CLR and that platform uses common.h as a +// precompiled header. Due to limitations on precompilation (a precompiled header must be included first +// and must not be preceded by any other preprocessor directive) we cannot conditionally include common.h, +// so the simplest solution is to maintain this empty header under Redhawk. +// + +// +// For our DAC build, we precompile gcrhenv.h because it is extremely large (~3MB of text). For non-DAC +// builds, we do not do this because the majority of the files have more constrained #includes. +// + +#include "stdint.h" diff --git a/src/coreclr/src/nativeaot/Runtime/rheventtrace.cpp b/src/coreclr/src/nativeaot/Runtime/rheventtrace.cpp new file mode 100644 index 0000000000000..93c9a9a71d670 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/rheventtrace.cpp @@ -0,0 +1,623 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// +// Redhawk-specific ETW helper code. +// +// When Redhawk does stuff substantially different from desktop CLR, the +// Redhawk-specific implementations should go here. +// +#include "common.h" +#include "gcenv.h" +#include "rheventtrace.h" +#include "eventtrace.h" +#include "rhbinder.h" +#include "slist.h" +#include "rwlock.h" +#include "runtimeinstance.h" +#include "shash.h" +#include "eventtracepriv.h" +#include "shash.inl" +#include "palredhawk.h" + +#if defined(FEATURE_EVENT_TRACE) + +//--------------------------------------------------------------------------------------- +// BulkTypeEventLogger is a helper class to batch up type information and then flush to +// ETW once the event reaches its max # descriptors + + +//--------------------------------------------------------------------------------------- +// +// Batches up ETW information for a type and pops out to recursively call +// ETW::TypeSystemLog::LogTypeAndParametersIfNecessary for any +// "type parameters". Generics info is not reliably available, so "type parameter" +// really just refers to the type of array elements if thAsAddr is an array. +// +// Arguments: +// * thAsAddr - EEType to log +// * typeLogBehavior - Ignored in Redhawk builds +// + +void BulkTypeEventLogger::LogTypeAndParameters(UInt64 thAsAddr, ETW::TypeSystemLog::TypeLogBehavior typeLogBehavior) +{ + if (!ETW_TRACING_CATEGORY_ENABLED( + MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, + TRACE_LEVEL_INFORMATION, + CLR_TYPE_KEYWORD)) + { + return; + } + + EEType * pEEType = (EEType *) thAsAddr; + + // Batch up this type. This grabs useful info about the type, including any + // type parameters it may have, and sticks it in m_rgBulkTypeValues + int iBulkTypeEventData = LogSingleType(pEEType); + if (iBulkTypeEventData == -1) + { + // There was a failure trying to log the type, so don't bother with its type + // parameters + return; + } + + // Look at the type info we just batched, so we can get the type parameters + BulkTypeValue * pVal = &m_rgBulkTypeValues[iBulkTypeEventData]; + + // We're about to recursively call ourselves for the type parameters, so make a + // local copy of their type handles first (else, as we log them we could flush + // and clear out m_rgBulkTypeValues, thus trashing pVal) + NewArrayHolder rgTypeParameters; + DWORD cTypeParams = pVal->cTypeParameters; + if (cTypeParams == 1) + { + ETW::TypeSystemLog::LogTypeAndParametersIfNecessary(this, pVal->ullSingleTypeParameter, typeLogBehavior); + } + else if (cTypeParams > 1) + { + rgTypeParameters = new (nothrow) ULONGLONG[cTypeParams]; + for (DWORD i=0; i < cTypeParams; i++) + { + rgTypeParameters[i] = pVal->rgTypeParameters[i]; + } + + // Recursively log any referenced parameter types + for (DWORD i=0; i < cTypeParams; i++) + { + ETW::TypeSystemLog::LogTypeAndParametersIfNecessary(this, rgTypeParameters[i], typeLogBehavior); + } + } +} + +// We keep a hash of these to keep track of: +// * Which types have been logged through ETW (so we can avoid logging dupe Type +// events), and +// * GCSampledObjectAllocation stats to help with "smart sampling" which +// dynamically adjusts sampling rate of objects by type. +// See code:LoggedTypesFromModuleTraits + +class LoggedTypesTraits : public DefaultSHashTraits +{ +public: + + // explicitly declare local typedefs for these traits types, otherwise + // the compiler may get confused + typedef EEType* key_t; + + static key_t GetKey(const element_t &e) + { + LIMITED_METHOD_CONTRACT; + return e; + } + + static BOOL Equals(key_t k1, key_t k2) + { + LIMITED_METHOD_CONTRACT; + return (k1 == k2); + } + + static count_t Hash(key_t k) + { + LIMITED_METHOD_CONTRACT; + return (count_t) (UIntNative) k; + } + + static bool IsNull(const element_t &e) + { + LIMITED_METHOD_CONTRACT; + return (e == NULL); + } + + static const element_t Null() + { + LIMITED_METHOD_CONTRACT; + return NULL; + } +}; + +enum class CorElementType : UInt8 +{ + ELEMENT_TYPE_END = 0x0, + + ELEMENT_TYPE_BOOLEAN = 0x2, + ELEMENT_TYPE_CHAR = 0x3, + ELEMENT_TYPE_I1 = 0x4, + ELEMENT_TYPE_U1 = 0x5, + ELEMENT_TYPE_I2 = 0x6, + ELEMENT_TYPE_U2 = 0x7, + ELEMENT_TYPE_I4 = 0x8, + ELEMENT_TYPE_U4 = 0x9, + ELEMENT_TYPE_I8 = 0xa, + ELEMENT_TYPE_U8 = 0xb, + ELEMENT_TYPE_R4 = 0xc, + ELEMENT_TYPE_R8 = 0xd, + + ELEMENT_TYPE_I = 0x18, + ELEMENT_TYPE_U = 0x19, +}; + +static CorElementType ElementTypeToCorElementType(EETypeElementType elementType) +{ + switch (elementType) + { + case EETypeElementType::ElementType_Boolean: + return CorElementType::ELEMENT_TYPE_BOOLEAN; + case EETypeElementType::ElementType_Char: + return CorElementType::ELEMENT_TYPE_CHAR; + case EETypeElementType::ElementType_SByte: + return CorElementType::ELEMENT_TYPE_I1; + case EETypeElementType::ElementType_Byte: + return CorElementType::ELEMENT_TYPE_U1; + case EETypeElementType::ElementType_Int16: + return CorElementType::ELEMENT_TYPE_I2; + case EETypeElementType::ElementType_UInt16: + return CorElementType::ELEMENT_TYPE_U2; + case EETypeElementType::ElementType_Int32: + return CorElementType::ELEMENT_TYPE_I4; + case EETypeElementType::ElementType_UInt32: + return CorElementType::ELEMENT_TYPE_U4; + case EETypeElementType::ElementType_Int64: + return CorElementType::ELEMENT_TYPE_I8; + case EETypeElementType::ElementType_UInt64: + return CorElementType::ELEMENT_TYPE_U8; + case EETypeElementType::ElementType_Single: + return CorElementType::ELEMENT_TYPE_R4; + case EETypeElementType::ElementType_Double: + return CorElementType::ELEMENT_TYPE_R8; + case EETypeElementType::ElementType_IntPtr: + return CorElementType::ELEMENT_TYPE_I; + case EETypeElementType::ElementType_UIntPtr: + return CorElementType::ELEMENT_TYPE_U; + } + return CorElementType::ELEMENT_TYPE_END; +} + +// Avoid reporting the same type twice by keeping a hash of logged types. +SHash* s_loggedTypesHash = NULL; + +//--------------------------------------------------------------------------------------- +// +// Interrogates EEType for the info that's interesting to include in the BulkType ETW +// event. Does not recursively call self for type parameters. +// +// Arguments: +// * pEEType - EEType to log info about +// +// Return Value: +// Index into internal array where the info got batched. Or -1 if there was a +// failure. +// + +int BulkTypeEventLogger::LogSingleType(EEType * pEEType) +{ +#ifdef MULTIPLE_HEAPS + // We need to add a lock to protect the types hash for Server GC. + ASSERT_UNCONDITIONALLY("Add a lock to protect s_loggedTypesHash access!"); +#endif + //Avoid logging the same type twice, but using the hash of loggged types. + if (s_loggedTypesHash == NULL) + s_loggedTypesHash = new SHash(); + EEType* preexistingType = s_loggedTypesHash->Lookup(pEEType); + if (preexistingType != NULL) + { + return -1; + } + else + { + s_loggedTypesHash->Add(pEEType); + } + + // If there's no room for another type, flush what we've got + if (m_nBulkTypeValueCount == _countof(m_rgBulkTypeValues)) + { + FireBulkTypeEvent(); + } + + _ASSERTE(m_nBulkTypeValueCount < _countof(m_rgBulkTypeValues)); + + BulkTypeValue * pVal = &m_rgBulkTypeValues[m_nBulkTypeValueCount]; + + // Clear out pVal before filling it out (array elements can get reused if there + // are enough types that we need to flush to multiple events). + pVal->Clear(); + + pVal->fixedSizedData.TypeID = (ULONGLONG) pEEType; + pVal->fixedSizedData.Flags = kEtwTypeFlagsModuleBaseAddress; + pVal->fixedSizedData.CorElementType = (BYTE)ElementTypeToCorElementType(pEEType->GetElementType()); + + ULONGLONG * rgTypeParamsForEvent = NULL; + ULONGLONG typeParamForNonGenericType = 0; + + // Determine this EEType's module. + RuntimeInstance * pRuntimeInstance = GetRuntimeInstance(); + + ULONGLONG osModuleHandle = (ULONGLONG) pEEType->GetTypeManagerPtr()->AsTypeManager()->GetOsModuleHandle(); + + pVal->fixedSizedData.ModuleID = osModuleHandle; + + if (pEEType->IsParameterizedType()) + { + ASSERT(pEEType->IsArray()); + // Array + pVal->fixedSizedData.Flags |= kEtwTypeFlagsArray; + pVal->cTypeParameters = 1; + pVal->ullSingleTypeParameter = (ULONGLONG) pEEType->get_RelatedParameterType(); + } + else + { + // Note: if pEEType->IsCloned(), then no special handling is necessary. All the + // functionality we need from the EEType below work just as well from cloned types. + + // Note: For generic types, we do not necessarily know the generic parameters. + // So we leave it to the profiler at post-processing time to determine that via + // the PDBs. We'll leave pVal->cTypeParameters as 0, even though there could be + // type parameters. + + // Flags + if (pEEType->HasFinalizer()) + { + pVal->fixedSizedData.Flags |= kEtwTypeFlagsFinalizable; + } + + // Note: Pn runtime knows nothing about delegates, and there are no CCWs/RCWs. + // So no other type flags are applicable to set + } + + ULONGLONG rvaType = osModuleHandle == 0 ? 0 : (ULONGLONG(pEEType) - osModuleHandle); + pVal->fixedSizedData.TypeNameID = (DWORD) rvaType; + + // Now that we know the full size of this type's data, see if it fits in our + // batch or whether we need to flush + + int cbVal = pVal->GetByteCountInEvent(); + if (cbVal > kMaxBytesTypeValues) + { + // This type is apparently so huge, it's too big to squeeze into an event, even + // if it were the only type batched in the whole event. Bail + ASSERT(!"Type too big to log via ETW"); + return -1; + } + + if (m_nBulkTypeValueByteCount + cbVal > kMaxBytesTypeValues) + { + // Although this type fits into the array, its size is so big that the entire + // array can't be logged via ETW. So flush the array, and start over by + // calling ourselves--this refetches the type info and puts it at the + // beginning of the array. Since we know this type is small enough to be + // batched into an event on its own, this recursive call will not try to + // call itself again. + FireBulkTypeEvent(); + return LogSingleType(pEEType); + } + + // The type fits into the batch, so update our state + m_nBulkTypeValueCount++; + m_nBulkTypeValueByteCount += cbVal; + return m_nBulkTypeValueCount - 1; // Index of type we just added +} + + +void BulkTypeEventLogger::Cleanup() +{ + if (s_loggedTypesHash != NULL) + { + delete s_loggedTypesHash; + s_loggedTypesHash = NULL; + } +} + +#endif // defined(FEATURE_EVENT_TRACE) + + +//--------------------------------------------------------------------------------------- +// +// Outermost level of ETW-type-logging. Clients outside (rh)eventtrace.cpp call this to log +// an EETypes and (recursively) its type parameters when present. This guy then calls +// into the appropriate BulkTypeEventLogger to do the batching and logging +// +// Arguments: +// * pBulkTypeEventLogger - If our caller is keeping track of batched types, it +// passes this to us so we can use it to batch the current type (GC heap walk +// does this). In Redhawk builds this should not be NULL. +// * thAsAddr - EEType to batch +// * typeLogBehavior - Unused in Redhawk builds +// + +void ETW::TypeSystemLog::LogTypeAndParametersIfNecessary(BulkTypeEventLogger * pLogger, UInt64 thAsAddr, ETW::TypeSystemLog::TypeLogBehavior typeLogBehavior) +{ +#if defined(FEATURE_EVENT_TRACE) + + if (!ETW_TRACING_CATEGORY_ENABLED( + MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, + TRACE_LEVEL_INFORMATION, + CLR_TYPE_KEYWORD)) + { + return; + } + + _ASSERTE(pLogger != NULL); + pLogger->LogTypeAndParameters(thAsAddr, typeLogBehavior); + +#endif // defined(FEATURE_EVENT_TRACE) +} + + +//--------------------------------------------------------------------------------------- +// Runtime helpers for ETW logging. +//--------------------------------------------------------------------------------------- +typedef enum +{ + EVENT_LOG_CCW = 1, + EVENT_LOG_RCW, + EVENT_FLUSH_COM +} COM_ETW_EVENTS; + + + +COOP_PINVOKE_HELPER(void, RhpETWLogLiveCom, (Int32 eventType, void* CCWGCHandle, void* objectID, void* typeRawValue, void* IUnknown, void* VTable, Int32 comRefCount, Int32 jupiterRefCount, Int32 flags)) +{ + switch (eventType) + { + case EVENT_LOG_CCW: + BulkComLogger::WriteCCW(CCWGCHandle, objectID, typeRawValue, IUnknown, comRefCount, jupiterRefCount, flags); + break; + case EVENT_LOG_RCW: + BulkComLogger::WriteRCW(objectID, typeRawValue, IUnknown, VTable, comRefCount, flags); + break; + case EVENT_FLUSH_COM: + BulkComLogger::FlushComETW(); + break; + default: + ASSERT_UNCONDITIONALLY("unexpected COM ETW Event ID"); + } +} + +COOP_PINVOKE_HELPER(bool, RhpETWShouldWalkCom, ()) +{ + return BulkComLogger::ShouldReportComForGCHeapEtw(); +} + +//--------------------------------------------------------------------------------------- +// BulkStaticsLogger: Batches up and logs static variable roots +//--------------------------------------------------------------------------------------- + +BulkComLogger* BulkComLogger::s_comLogger; + +BulkComLogger::BulkComLogger() + : m_currRcw(0), m_currCcw(0), m_etwRcwData(0), m_etwCcwData(0) +{ + CONTRACTL + { + THROWS; + GC_NOTRIGGER; + MODE_ANY; + } + CONTRACTL_END; + + m_etwRcwData = new EventRCWEntry[kMaxRcwCount]; + m_etwCcwData = new EventCCWEntry[kMaxCcwCount]; +} + + +BulkComLogger::~BulkComLogger() +{ + CONTRACTL + { + NOTHROW; + GC_NOTRIGGER; + MODE_ANY; + } + CONTRACTL_END; + + FireBulkComEvent(); + + if (m_etwRcwData) + delete[] m_etwRcwData; + + if (m_etwCcwData) + delete[] m_etwCcwData; +} + +bool BulkComLogger::ShouldReportComForGCHeapEtw() +{ + return ETW::GCLog::ShouldWalkHeapObjectsForEtw(); +} + +void BulkComLogger::WriteCCW(void* CCWGCHandle, void* objectID, void* typeRawValue, void* IUnknown, long comRefCount, long jupiterRefCount, long flags) +{ + EventCCWEntry ccwEntry; + + ccwEntry.RootID = (UInt64)CCWGCHandle; + ccwEntry.ObjectID = (UInt64) objectID; + ccwEntry.TypeID = (UInt64) typeRawValue; + ccwEntry.IUnk = (UInt64) IUnknown; + ccwEntry.RefCount = (ULONG) comRefCount; + ccwEntry.JupiterRefCount = (ULONG) jupiterRefCount; + ccwEntry.Flags = flags; + + BulkComLogger* comLogger = BulkComLogger::GetInstance(); + if (comLogger != NULL) + { + comLogger->WriteCcw(ccwEntry); + } +} + +void BulkComLogger::WriteRCW(void* objectID, void* typeRawValue, void* IUnknown, void* VTable, long comRefCount, long flags) +{ + EventRCWEntry rcwEntry; + + rcwEntry.ObjectID = (UInt64) objectID; + rcwEntry.TypeID = (UInt64) typeRawValue; + rcwEntry.IUnk = (UInt64) IUnknown; + rcwEntry.VTable = (UInt64) VTable; + rcwEntry.RefCount = comRefCount; + rcwEntry.Flags = flags; + + BulkComLogger* comLogger = BulkComLogger::GetInstance(); + if (comLogger != NULL) + { + comLogger->WriteRcw(rcwEntry); + } +} + +void BulkComLogger::FlushComETW() +{ + BulkComLogger* comLogger = BulkComLogger::GetInstance(); + if (comLogger != NULL) + comLogger->Cleanup(); +} + +void BulkComLogger::FireBulkComEvent() +{ + WRAPPER_NO_CONTRACT; + + FlushRcw(); + FlushCcw(); +} + + +BulkComLogger* BulkComLogger::GetInstance() +{ + if (s_comLogger == NULL) + { + s_comLogger = new BulkComLogger(); + } + + return s_comLogger; +} + +void BulkComLogger::Cleanup() +{ + if (s_comLogger != NULL) + { + delete s_comLogger; + s_comLogger = NULL; + } +} + +void BulkComLogger::WriteCcw(const EventCCWEntry& ccw) +{ + CONTRACTL + { + THROWS; + GC_NOTRIGGER; + MODE_ANY; + } + CONTRACTL_END; + + _ASSERTE(m_currCcw < kMaxCcwCount); + + EventCCWEntry &mccw = m_etwCcwData[m_currCcw++]; + mccw = ccw; + + if (m_currCcw >= kMaxCcwCount) + FlushCcw(); +} + +void BulkComLogger::FlushCcw() +{ + CONTRACTL + { + NOTHROW; + GC_NOTRIGGER; + MODE_ANY; + } + CONTRACTL_END; + + _ASSERTE(m_currCcw <= kMaxCcwCount); + + if (m_currCcw == 0) + return; + + unsigned short instance = GetClrInstanceId(); + + EVENT_DATA_DESCRIPTOR eventData[3]; + EventDataDescCreate(&eventData[0], &m_currCcw, sizeof(const unsigned int)); + EventDataDescCreate(&eventData[1], &instance, sizeof(const unsigned short)); + EventDataDescCreate(&eventData[2], m_etwCcwData, sizeof(EventCCWEntry) * m_currCcw); + + ULONG result = PalEventWrite(Microsoft_Windows_DotNETRuntimeHandle, &GCBulkRootCCW, _countof(eventData), eventData); + _ASSERTE(result == ERROR_SUCCESS); + + m_currCcw = 0; +} + +void BulkComLogger::WriteRcw(const EventRCWEntry& rcw) +{ + CONTRACTL + { + THROWS; + GC_NOTRIGGER; + MODE_ANY; + } + CONTRACTL_END; + + _ASSERTE(m_currRcw < kMaxRcwCount); + + EventRCWEntry &mrcw = m_etwRcwData[m_currRcw]; + mrcw = rcw; + + if (++m_currRcw >= kMaxRcwCount) + FlushRcw(); +} + +void BulkComLogger::FlushRcw() +{ + CONTRACTL + { + NOTHROW; + GC_NOTRIGGER; + MODE_ANY; + } + CONTRACTL_END; + + _ASSERTE(m_currRcw <= kMaxRcwCount); + + if (m_currRcw == 0) + return; + + unsigned short instance = GetClrInstanceId(); + + EVENT_DATA_DESCRIPTOR eventData[3]; + EventDataDescCreate(&eventData[0], &m_currRcw, sizeof(const unsigned int)); + EventDataDescCreate(&eventData[1], &instance, sizeof(const unsigned short)); + EventDataDescCreate(&eventData[2], m_etwRcwData, sizeof(EventRCWEntry) * m_currRcw); + + ULONG result = PalEventWrite(Microsoft_Windows_DotNETRuntimeHandle, &GCBulkRCW, _countof(eventData), eventData); + _ASSERTE(result == ERROR_SUCCESS); + + m_currRcw = 0; +} + +COOP_PINVOKE_HELPER(void, RhpEtwExceptionThrown, (LPCWSTR exceptionTypeName, LPCWSTR exceptionMessage, void* faultingIP, HRESULT hresult)) +{ + FireEtwExceptionThrown_V1(exceptionTypeName, + exceptionMessage, + faultingIP, + hresult, + 0, + GetClrInstanceId()); +} + + + diff --git a/src/coreclr/src/nativeaot/Runtime/rheventtrace.h b/src/coreclr/src/nativeaot/Runtime/rheventtrace.h new file mode 100644 index 0000000000000..33c7c7bf03968 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/rheventtrace.h @@ -0,0 +1,182 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// +// This header provides Redhawk-specific ETW code and macros, to allow sharing of common +// ETW code between Redhawk and desktop CLR. +// +#ifndef __RHEVENTTRACE_INCLUDED +#define __RHEVENTTRACE_INCLUDED + + +#ifdef FEATURE_ETW + +// FireEtwGCPerHeapHistorySpecial() has to be defined manually rather than via the manifest because it does +// not have a standard signature. +#define FireEtwGCPerHeapHistorySpecial(DataPerHeap, DataSize, ClrId) (MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PrivateHandle, &GCPerHeapHistory)) ? Template_GCPerHeapHistorySpecial(Microsoft_Windows_Redhawk_GC_PrivateHandle, &GCPerHeapHistory, DataPerHeap, DataSize, ClrId) : 0 + +// Map the CLR private provider to our version so we can avoid inserting more #ifdef's in the code. +#define MICROSOFT_WINDOWS_DOTNETRUNTIME_PRIVATE_PROVIDER_Context MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_Context +#define MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Context +#define Microsoft_Windows_DotNETRuntimeHandle Microsoft_Windows_Redhawk_GC_PublicHandle + +#define CLR_GC_KEYWORD 0x1 +#define CLR_FUSION_KEYWORD 0x4 +#define CLR_LOADER_KEYWORD 0x8 +#define CLR_JIT_KEYWORD 0x10 +#define CLR_NGEN_KEYWORD 0x20 +#define CLR_STARTENUMERATION_KEYWORD 0x40 +#define CLR_ENDENUMERATION_KEYWORD 0x80 +#define CLR_SECURITY_KEYWORD 0x400 +#define CLR_APPDOMAINRESOURCEMANAGEMENT_KEYWORD 0x800 +#define CLR_JITTRACING_KEYWORD 0x1000 +#define CLR_INTEROP_KEYWORD 0x2000 +#define CLR_CONTENTION_KEYWORD 0x4000 +#define CLR_EXCEPTION_KEYWORD 0x8000 +#define CLR_THREADING_KEYWORD 0x10000 +#define CLR_JITTEDMETHODILTONATIVEMAP_KEYWORD 0x20000 +#define CLR_OVERRIDEANDSUPPRESSNGENEVENTS_KEYWORD 0x40000 +#define CLR_TYPE_KEYWORD 0x80000 +#define CLR_GCHEAPDUMP_KEYWORD 0x100000 +#define CLR_GCHEAPALLOC_KEYWORD 0x200000 +#define CLR_GCHEAPSURVIVALANDMOVEMENT_KEYWORD 0x400000 +#define CLR_GCHEAPCOLLECT_KEYWORD 0x800000 +#define CLR_GCHEAPANDTYPENAMES_KEYWORD 0x1000000 +#define CLR_PERFTRACK_KEYWORD 0x20000000 +#define CLR_STACK_KEYWORD 0x40000000 +#ifndef ERROR_SUCCESS +#define ERROR_SUCCESS 0 +#endif + +#undef ETW_TRACING_INITIALIZED +#define ETW_TRACING_INITIALIZED(RegHandle) (RegHandle != NULL) + +#undef ETW_CATEGORY_ENABLED +#define ETW_CATEGORY_ENABLED(Context, LevelParam, Keyword) \ + (Context.IsEnabled && \ + ( \ + (LevelParam <= ((Context).Level)) || \ + ((Context.Level) == 0) \ + ) && \ + ( \ + (Keyword == (ULONGLONG)0) || \ + ( \ + (Keyword & (Context.MatchAnyKeyword)) && \ + ( \ + (Keyword & (Context.MatchAllKeyword)) == (Context.MatchAllKeyword) \ + ) \ + ) \ + ) \ + ) + +class EEType; +class BulkTypeEventLogger; + +namespace ETW +{ + // Class to wrap all type system logic for ETW + class TypeSystemLog + { + public: + // This enum is unused on Redhawk, but remains here to keep Redhawk / desktop CLR + // code shareable. + enum TypeLogBehavior + { + kTypeLogBehaviorTakeLockAndLogIfFirstTime, + kTypeLogBehaviorAssumeLockAndLogIfFirstTime, + kTypeLogBehaviorAlwaysLog, + }; + + static void LogTypeAndParametersIfNecessary(BulkTypeEventLogger * pLogger, UInt64 thAsAddr, TypeLogBehavior typeLogBehavior); + }; +}; + +struct EventRCWEntry +{ + UInt64 ObjectID; + UInt64 TypeID; + UInt64 IUnk; + UInt64 VTable; + UInt32 RefCount; + UInt32 Flags; +}; + +#pragma pack(push, 1) +struct EventCCWEntry +{ + UInt64 RootID; + UInt64 ObjectID; + UInt64 TypeID; + UInt64 IUnk; + UInt32 RefCount; + UInt32 JupiterRefCount; + UInt32 Flags; +}; + +C_ASSERT(sizeof(EventCCWEntry) == 44); +#pragma pack(pop) + +const UInt32 cbComMaxEtwEvent = 64 * 1024; + +// Does all logging for RCWs and CCWs in the process. +class BulkComLogger +{ +public: + // Returns true is gc heap collection is on. + static bool ShouldReportComForGCHeapEtw(); + + // Write one CCW to the CCW buffer. + static void WriteCCW(void* CCWGCHandle, void* objectID, void* typeRawValue, void* IUnknown, long comRefCount, long jupiterRefCount, long flags); + + // Write one RCW to the RCW buffer. + static void WriteRCW(void* objectID, void* typeRawValue, void* IUnknown, void* VTable, long refCount, long flags); + + // Gets or creates a unique BulkComLogger instance + static BulkComLogger* GetInstance(); + + // Write the remaining events and deletes the static instance. + static void FlushComETW(); + +private: + BulkComLogger(); + ~BulkComLogger(); + + // Forces a flush of all ETW events not yet fired. + void FireBulkComEvent(); + + // Writes one RCW to the RCW buffer. May or may not fire the event. + void WriteRcw(const EventRCWEntry& rcw); + + // Writes one CCW to the CCW buffer. May or may not fire the event. + void WriteCcw(const EventCCWEntry& ccw); + + // Forces a flush of all RCW ETW events not yet fired. + void FlushRcw(); + + // Forces a flush of all CCW ETW events not yet fired. + void FlushCcw(); + + // Distroys the unique instance and forces a flush for all ETW events not yet fired. + void Cleanup(); + +private: + // The maximum number of RCW/CCW events we can batch up based on the max size of an ETW event. + static const int kMaxRcwCount = (cbComMaxEtwEvent - 0x30) / sizeof(EventRCWEntry); + static const int kMaxCcwCount = (cbComMaxEtwEvent - 0x30) / sizeof(EventCCWEntry); + + int m_currRcw; // The current number of batched (but not emitted) RCW events. + int m_currCcw; // The current number of batched (but not emitted) CCW events. + + BulkTypeEventLogger *m_typeLogger; // Type logger to emit type data for. + + EventRCWEntry *m_etwRcwData; // RCW buffer. + EventCCWEntry *m_etwCcwData; // CCW buffer. + + static BulkComLogger* s_comLogger; +}; + +#else +#define FireEtwGCPerHeapHistorySpecial(DataPerHeap, DataSize, ClrId) +#endif + +#endif //__RHEVENTTRACE_INCLUDED diff --git a/src/coreclr/src/nativeaot/Runtime/sha1.cpp b/src/coreclr/src/nativeaot/Runtime/sha1.cpp new file mode 100644 index 0000000000000..ca5a8e262edd2 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/sha1.cpp @@ -0,0 +1,380 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// +// +// +// =========================================================================== +// File: sha1.cpp +// +// =========================================================================== +/*++ + +Abstract: + + SHA-1 implementation + +Revision History: + +--*/ + +/* + File sha1.cpp Version 03 August 2000. + + + This implements the SHA-1 hash function. + For algorithmic background see (for example) + + + Alfred J. Menezes et al + Handbook of Applied Cryptography + The CRC Press Series on Discrete Mathematics + and its Applications + CRC Press LLC, 1997 + ISBN 0-8495-8523-7 + QA76.9A25M643 + + Also see FIPS 180-1 - Secure Hash Standard, + 1993 May 11 and 1995 April 17, by the U.S. + National Institute of Standards and Technology (NIST). + +*/ + + +#include "common.h" +#include "common.h" +#include "CommonTypes.h" +#include "CommonMacros.h" +#include "PalRedhawkCommon.h" + +#include "sha1.h" + + +#define ROTATE32L(x,n) rotate32l(x,n) +#define SHAVE32(x) (UInt32)(x) + +static UInt32 rotate32l(UInt32 val, int shift) +{ + shift &= 0x1f; + val = (val >> (0x20 - shift)) | (val << shift); + return val; +} + +static void SHA1_block(SHA1_CTX *ctx) +/* + Update the SHA-1 hash from a fresh 64 bytes of data. +*/ +{ + static const UInt32 sha1_round1 = 0x5A827999u; + static const UInt32 sha1_round2 = 0x6ED9EBA1u; + static const UInt32 sha1_round3 = 0x8F1BBCDCu; + static const UInt32 sha1_round4 = 0xCA62C1D6u; + + UInt32 a = ctx->partial_hash[0], b = ctx->partial_hash[1]; + UInt32 c = ctx->partial_hash[2], d = ctx->partial_hash[3]; + UInt32 e = ctx->partial_hash[4]; + UInt32 msg80[80]; + int i; + + // OACR note: + // Loop conditions are using (i <= limit - increment) instead of (i < limit) to satisfy OACR. When the increment is greater + // than 1, OACR incorrectly thinks that the max value of 'i' is (limit - 1). + + for (i = 0; i < 16; i++) { // Copy to local array, zero original + // Extend length to 80 + const UInt32 datval = ctx->awaiting_data[i]; + ctx->awaiting_data[i] = 0; + msg80[i] = datval; + } + + for (i = 16; i <= 80 - 2; i += 2) { + const UInt32 temp1 = msg80[i-3] ^ msg80[i-8] + ^ msg80[i-14] ^ msg80[i-16]; + const UInt32 temp2 = msg80[i-2] ^ msg80[i-7] + ^ msg80[i-13] ^ msg80[i-15]; + msg80[i ] = ROTATE32L(temp1, 1); + msg80[i+1] = ROTATE32L(temp2, 1); + } + +#define ROUND1(B, C, D) ((D ^ (B & (C ^ D))) + sha1_round1) + // Equivalent to (B & C) | (~B & D). + // (check cases B = 0 and B = 1) +#define ROUND2(B, C, D) ((B ^ C ^ D) + sha1_round2) + +#define ROUND3(B, C, D) ((C & (B | D) | (B & D)) + sha1_round3) + +#define ROUND4(B, C, D) ((B ^ C ^ D) + sha1_round4) + +// Round 1 + for (i = 0; i <= 20 - 5; i += 5) { + e += ROTATE32L(a, 5) + ROUND1(b, c, d) + msg80[i]; + b = ROTATE32L(b, 30); + + d += ROTATE32L(e, 5) + ROUND1(a, b, c) + msg80[i+1]; + a = ROTATE32L(a, 30); + + c += ROTATE32L(d, 5) + ROUND1(e, a, b) + msg80[i+2]; + e = ROTATE32L(e, 30); + + b += ROTATE32L(c, 5) + ROUND1(d, e, a) + msg80[i+3]; + d = ROTATE32L(d, 30); + + a += ROTATE32L(b, 5) + ROUND1(c, d, e) + msg80[i+4]; + c = ROTATE32L(c, 30); +#if 0 + printf("i = %ld %08lx %08lx %08lx %08lx %08lx\n", + i, a, b, c, d, e); +#endif + } // for i + +// Round 2 + for (i = 20; i <= 40 - 5; i += 5) { + e += ROTATE32L(a, 5) + ROUND2(b, c, d) + msg80[i]; + b = ROTATE32L(b, 30); + + d += ROTATE32L(e, 5) + ROUND2(a, b, c) + msg80[i+1]; + a = ROTATE32L(a, 30); + + c += ROTATE32L(d, 5) + ROUND2(e, a, b) + msg80[i+2]; + e = ROTATE32L(e, 30); + + b += ROTATE32L(c, 5) + ROUND2(d, e, a) + msg80[i+3]; + d = ROTATE32L(d, 30); + + a += ROTATE32L(b, 5) + ROUND2(c, d, e) + msg80[i+4]; + c = ROTATE32L(c, 30); + } // for i + +// Round 3 + for (i = 40; i <= 60 - 5; i += 5) { + e += ROTATE32L(a, 5) + ROUND3(b, c, d) + msg80[i]; + b = ROTATE32L(b, 30); + + d += ROTATE32L(e, 5) + ROUND3(a, b, c) + msg80[i+1]; + a = ROTATE32L(a, 30); + + c += ROTATE32L(d, 5) + ROUND3(e, a, b) + msg80[i+2]; + e = ROTATE32L(e, 30); + + b += ROTATE32L(c, 5) + ROUND3(d, e, a) + msg80[i+3]; + d = ROTATE32L(d, 30); + + a += ROTATE32L(b, 5) + ROUND3(c, d, e) + msg80[i+4]; + c = ROTATE32L(c, 30); + } // for i + +// Round 4 + for (i = 60; i <= 80 - 5; i += 5) { + e += ROTATE32L(a, 5) + ROUND4(b, c, d) + msg80[i]; + b = ROTATE32L(b, 30); + + d += ROTATE32L(e, 5) + ROUND4(a, b, c) + msg80[i+1]; + a = ROTATE32L(a, 30); + + c += ROTATE32L(d, 5) + ROUND4(e, a, b) + msg80[i+2]; + e = ROTATE32L(e, 30); + + b += ROTATE32L(c, 5) + ROUND4(d, e, a) + msg80[i+3]; + d = ROTATE32L(d, 30); + + a += ROTATE32L(b, 5) + ROUND4(c, d, e) + msg80[i+4]; + c = ROTATE32L(c, 30); + } // for i + +#undef ROUND1 +#undef ROUND2 +#undef ROUND3 +#undef ROUND4 + + ctx->partial_hash[0] += a; + ctx->partial_hash[1] += b; + ctx->partial_hash[2] += c; + ctx->partial_hash[3] += d; + ctx->partial_hash[4] += e; +#if 0 + for (i = 0; i < 16; i++) { + printf("%8lx ", msg16[i]); + if ((i & 7) == 7) printf("\n"); + } + printf("a, b, c, d, e = %08lx %08lx %08lx %08lx %08lx\n", + a, b, c, d, e); + printf("Partial hash = %08lx %08lx %08lx %08lx %08lx\n", + (long)ctx->partial_hash[0], (long)ctx->partial_hash[1], + (long)ctx->partial_hash[2], (long)ctx->partial_hash[3], + (long)ctx->partial_hash[4]); +#endif +} // end SHA1_block + + +void SHA1Hash::SHA1Init(SHA1_CTX *ctx) +{ + ctx->nbit_total[0] = ctx->nbit_total[1] = 0; + + for (UInt32 i = 0; i != 16; i++) { + ctx->awaiting_data[i] = 0; + } + + /* + Initialize hash variables. + + */ + + ctx->partial_hash[0] = 0x67452301u; + ctx->partial_hash[1] = 0xefcdab89u; + ctx->partial_hash[2] = ~ctx->partial_hash[0]; + ctx->partial_hash[3] = ~ctx->partial_hash[1]; + ctx->partial_hash[4] = 0xc3d2e1f0u; + +} + +void SHA1Hash::SHA1Update( + SHA1_CTX * ctx, // IN/OUT + const UInt8 * msg, // IN + UInt32 nbyte) // IN +/* + Append data to a partially hashed SHA-1 message. +*/ +{ + const UInt8 *fresh_data = msg; + UInt32 nbyte_left = nbyte; + UInt32 nbit_occupied = ctx->nbit_total[0] & 511; + UInt32 *awaiting_data; + const UInt32 nbitnew_low = SHAVE32(8*nbyte); + + + ASSERT((nbit_occupied & 7) == 0); // Partial bytes not implemented + + ctx->nbit_total[0] += nbitnew_low; + ctx->nbit_total[1] += (nbyte >> 29) + + (SHAVE32(ctx->nbit_total[0]) < nbitnew_low); + + /* Advance to word boundary in waiting_data */ + + if ((nbit_occupied & 31) != 0) { + awaiting_data = ctx->awaiting_data + nbit_occupied/32; + + while ((nbit_occupied & 31) != 0 && nbyte_left != 0) { + nbit_occupied += 8; + *awaiting_data |= (UInt32)*fresh_data++ + << ((-(int)nbit_occupied) & 31); + nbyte_left--; // Start at most significant byte + } + } // if nbit_occupied + + /* Transfer 4 bytes at a time */ + + do { + const UInt32 nword_occupied = nbit_occupied/32; + UInt32 nwcopy = min(nbyte_left/4, 16 - nword_occupied); + ASSERT (nbit_occupied <= 512); + ASSERT ((nbit_occupied & 31) == 0 || nbyte_left == 0); + awaiting_data = ctx->awaiting_data + nword_occupied; + nbyte_left -= 4*nwcopy; + nbit_occupied += 32*nwcopy; + + while (nwcopy != 0) { + const UInt32 byte0 = (UInt32)fresh_data[0]; + const UInt32 byte1 = (UInt32)fresh_data[1]; + const UInt32 byte2 = (UInt32)fresh_data[2]; + const UInt32 byte3 = (UInt32)fresh_data[3]; + *awaiting_data++ = byte3 | (byte2 << 8) + | (byte1 << 16) | (byte0 << 24); + /* Big endian */ + fresh_data += 4; + nwcopy--; + } + + if (nbit_occupied == 512) { + SHA1_block(ctx); + nbit_occupied = 0; + awaiting_data -= 16; + ASSERT(awaiting_data == ctx->awaiting_data); + } + } while (nbyte_left >= 4); + + ASSERT (ctx->awaiting_data + nbit_occupied/32 + == awaiting_data); + + while (nbyte_left != 0) { + const UInt32 new_byte = (UInt32)*fresh_data++; + + ASSERT((nbit_occupied & 31) <= 16); + nbit_occupied += 8; + *awaiting_data |= new_byte << ((-(int)nbit_occupied) & 31); + nbyte_left--; + } + + ASSERT (nbit_occupied == (ctx->nbit_total[0] & 511)); +} // end SHA1Update + + + +void SHA1Hash::SHA1Final( + SHA1_CTX * ctx, // IN/OUT + UInt8 * digest) // OUT +/* + Finish a SHA-1 hash. +*/ +{ + const UInt32 nbit0 = ctx->nbit_total[0]; + const UInt32 nbit1 = ctx->nbit_total[1]; + UInt32 nbit_occupied = nbit0 & 511; + UInt32 i; + + ASSERT((nbit_occupied & 7) == 0); + + ctx->awaiting_data[nbit_occupied/32] + |= (UInt32)0x80 << ((-8-nbit_occupied) & 31); + // Append a 1 bit + nbit_occupied += 8; + + + // Append zero bits until length (in bits) is 448 mod 512. + // Then append the length, in bits. + // Here we assume the buffer was zeroed earlier. + + if (nbit_occupied > 448) { // If fewer than 64 bits left + SHA1_block(ctx); + nbit_occupied = 0; + } + ctx->awaiting_data[14] = nbit1; + ctx->awaiting_data[15] = nbit0; + SHA1_block(ctx); + + /* Copy final digest to user-supplied byte array */ + + for (i = 0; i != 5; i++) { + const UInt32 dwi = ctx->partial_hash[i]; + digest[4*i + 0] = (UInt8)((dwi >> 24) & 255); + digest[4*i + 1] = (UInt8)((dwi >> 16) & 255); + digest[4*i + 2] = (UInt8)((dwi >> 8) & 255); + digest[4*i + 3] = (UInt8)(dwi & 255); // Big-endian + } +} // end SHA1Final + +SHA1Hash::SHA1Hash() +{ + m_fFinalized = false; + SHA1Init(&m_Context); +} + +void SHA1Hash::AddData(const UInt8 *pbData, UInt32 cbData) +{ + if (m_fFinalized) + return; + + SHA1Update(&m_Context, pbData, cbData); +} + +// Retrieve a pointer to the final hash. +UInt8 *SHA1Hash::GetHash() +{ + if (m_fFinalized) + return m_Value; + + SHA1Final(&m_Context, m_Value); + + m_fFinalized = true; + + return m_Value; +} + diff --git a/src/coreclr/src/nativeaot/Runtime/sha1.h b/src/coreclr/src/nativeaot/Runtime/sha1.h new file mode 100644 index 0000000000000..70f827dcde8e9 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/sha1.h @@ -0,0 +1,52 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// + +#ifndef SHA1_H_ +#define SHA1_H_ + +// Hasher class, performs no allocation and therefore does not throw or return +// errors. Usage is as follows: +// Create an instance (this initializes the hash). +// Add one or more blocks of input data using AddData(). +// Retrieve the hash using GetHash(). This can be done as many times as desired +// until the object is destructed. Once a hash is asked for, further AddData +// calls will be ignored. There is no way to reset object state (simply +// destroy the object and create another instead). + +#define SHA1_HASH_SIZE 20 // Number of bytes output by SHA-1 + +typedef struct { + UInt32 magic_sha1; // Magic value for A_SHA_CTX + UInt32 awaiting_data[16]; + // Data awaiting full 512-bit block. + // Length (nbit_total[0] % 512) bits. + // Unused part of buffer (at end) is zero + UInt32 partial_hash[5]; + // Hash through last full block + UInt32 nbit_total[2]; + // Total length of message so far + // (bits, mod 2^64) +} SHA1_CTX; + +class SHA1Hash +{ +private: + SHA1_CTX m_Context; + UInt8 m_Value[SHA1_HASH_SIZE]; + bool m_fFinalized; + + void SHA1Init(SHA1_CTX*); + void SHA1Update(SHA1_CTX*, const UInt8*, const UInt32); + void SHA1Final(SHA1_CTX*, UInt8* digest); + +public: + SHA1Hash(); + void AddData(const UInt8 *pbData, UInt32 cbData); + UInt8 *GetHash(); +}; + +#endif // SHA1_H_ + + + diff --git a/src/coreclr/src/nativeaot/Runtime/shash.h b/src/coreclr/src/nativeaot/Runtime/shash.h new file mode 100644 index 0000000000000..65929f8d03d31 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/shash.h @@ -0,0 +1,634 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// SHash is a templated closed chaining hash table of pointers. It provides +// for multiple entries under the same key, and also for deleting elements. + +// Synchronization: +// Synchronization requirements depend on use. There are several properties to take into account: +// +// - Lookups may be asynchronous with each other +// - Lookups must be exclusive with Add operations +// (@todo: this can be remedied by delaying destruction of old tables during reallocation, e.g. during GC) +// - Remove operations may be asynchronous with Lookup/Add, unless elements are also deallocated. (In which +// case full synchronization is required) + + +// A SHash is templated by a class of TRAITS. These traits define the various specifics of the +// particular hash table. +// The required traits are: +// +// element_t Type of elements in the hash table. These elements are stored +// by value in the hash table. Elements must look more or less +// like primitives - they must support assignment relatively +// efficiently. There are 2 required sentinel values: +// Null and Deleted (described below). (Note that element_t is +// very commonly a pointer type.) +// +// The key must be derivable from the element; if your +// table's keys are independent of the stored values, element_t +// should be a key/value pair. +// +// key_t Type of the lookup key. The key is used for identity +// comparison between elements, and also as a key for lookup. +// This is also used by value and should support +// efficient assignment. +// +// count_t integral type for counts. Typically inherited by default +// Traits (count_t). +// +// static key_t GetKey(const element_t &e) Get key from element. Should be stable for a given e. +// static bool Equals(key_t k1, key_t k2) Compare 2 keys for equality. Again, should be stable. +// static count_t Hash(key_t k) Compute hash from a key. For efficient operation, the hashes +// for a set of elements should have random uniform distribution. +// +// static element_t Null() Return the Null sentinel value. May be inherited from +// default traits if it can be assigned from 0. +// static element_t Deleted() Return the Deleted sentinel value. May be inherited from the +// default traits if it can be assigned from -1. +// static bool IsNull(const ELEMENT &e) Compare element with Null sentinel value. May be inherited from +// default traits if it can be assigned from 0. +// static bool IsDeleted(const ELEMENT &e) Compare element with Deleted sentinel value. May be inherited +// from the default traits if it can be assigned from -1. +// static void OnFailure(FailureType failureType) Called when a failure occurs during SHash operation +// +// s_growth_factor_numerator +// s_growth_factor_denominator Factor to grow allocation (numerator/denominator). +// Typically inherited from default traits (3/2) +// +// s_density_factor_numerator +// s_density_factor_denominator Maximum occupied density of table before growth +// occurs (num/denom). Typically inherited (3/4). +// +// s_minimum_allocation Minimum table allocation count (size on first growth.) It is +// probably preferable to call Reallocate on initialization rather +// than override his from the default traits. (7) +// +// s_supports_remove Set to false for a slightly faster implementation that does not +// support deletes. There is a downside to the s_supports_remove flag, +// in that there may be more copies of the template instantiated through +// the system as different variants are used. + +#ifndef __shash_h__ +#define __shash_h__ + +// disable the "Conditional expression is constant" warning +#pragma warning(push) +#pragma warning(disable:4127) + + +enum FailureType { ftAllocation, ftOverflow }; + +// DefaultHashTraits provides defaults for seldomly customized values in traits classes. + +template < typename ELEMENT, typename COUNT_T = UInt32 > +class DefaultSHashTraits +{ + public: + typedef COUNT_T count_t; + typedef ELEMENT element_t; + typedef DPTR(element_t) PTR_element_t; // by default SHash is DAC-aware. For RS + // only SHash use NonDacAwareSHashTraits + // (which typedefs element_t* PTR_element_t) + static const count_t s_growth_factor_numerator = 3; + static const count_t s_growth_factor_denominator = 2; + + static const count_t s_density_factor_numerator = 3; + static const count_t s_density_factor_denominator = 4; + + static const count_t s_minimum_allocation = 7; + + static const bool s_supports_remove = true; + + static const ELEMENT Null() { return (const ELEMENT) 0; } + static const ELEMENT Deleted() { return (const ELEMENT) -1; } + static bool IsNull(const ELEMENT &e) { return e == (const ELEMENT) 0; } + static bool IsDeleted(const ELEMENT &e) { return e == (const ELEMENT) -1; } + + static void OnFailure(FailureType /*ft*/) { } + + // No defaults - must specify: + // + // typedef key_t; + // static key_t GetKey(const element_t &i); + // static bool Equals(key_t k1, key_t k2); + // static count_t Hash(key_t k); +}; + +// Hash table class definition + +template +class SHash : public TRAITS +{ + private: + class Index; + friend class Index; + + class KeyIndex; + friend class KeyIndex; + class Iterator; + class KeyIterator; + + public: + // explicitly declare local typedefs for these traits types, otherwise + // the compiler may get confused + typedef typename TRAITS::element_t element_t; + typedef typename TRAITS::PTR_element_t PTR_element_t; + typedef typename TRAITS::key_t key_t; + typedef typename TRAITS::count_t count_t; + + // Constructor/destructor. SHash tables always start out empty, with no + // allocation overhead. Call Reallocate to prime with an initial size if + // desired. + + SHash(); + ~SHash(); + + // Lookup an element in the table by key. Returns NULL if no element in the table + // has the given key. Note that multiple entries for the same key may be stored - + // this will return the first element added. Use KeyIterator to find all elements + // with a given key. + + element_t Lookup(key_t key) const; + + // Pointer-based flavor of Lookup (allows efficient access to tables of structures) + + const element_t* LookupPtr(key_t key) const; + + // Add an element to the hash table. This will never replace an element; multiple + // elements may be stored with the same key. + // + // Returns 'true' on success, 'false' on failure. + + bool Add(const element_t &element); + + // Add a new element to the hash table, if no element with the same key is already + // there. Otherwise, it will replace the existing element. This has the effect of + // updating an element rather than adding a duplicate. + // + // Returns 'true' on success, 'false' on failure. + + bool AddOrReplace(const element_t & element); + + // Remove the first element matching the key from the hash table. + + void Remove(key_t key); + + // Remove the specific element. + + void Remove(Iterator& i); + void Remove(KeyIterator& i); + + // Pointer-based flavor of Remove (allows efficient access to tables of structures) + + void RemovePtr(element_t * element); + + // Remove all elements in the hashtable + + void RemoveAll(); + + // Begin and End pointers for iteration over entire table. + + Iterator Begin() const; + Iterator End() const; + + // Begin and End pointers for iteration over all elements with a given key. + + KeyIterator Begin(key_t key) const; + KeyIterator End(key_t key) const; + + // Return the number of elements currently stored in the table + + count_t GetCount() const; + + // Return the number of elements that the table is capable storing currently + + count_t GetCapacity() const; + + // Reallocates a hash table to a specific size. The size must be big enough + // to hold all elements in the table appropriately. + // + // Note that the actual table size must always be a prime number; the number + // passed in will be upward adjusted if necessary. + // + // Returns 'true' on success, 'false' on failure. + + bool Reallocate(count_t newTableSize); + + // See if it is OK to grow the hash table by one element. If not, reallocate + // the hash table. + // + // Returns 'true' on success, 'false' on failure. + + bool CheckGrowth(); + + // See if it is OK to grow the hash table by N elementsone element. If not, reallocate + // the hash table. + + bool CheckGrowth(count_t newElements); + +private: + + // Resizes a hash table for growth. The new size is computed based + // on the current population, growth factor, and maximum density factor. + // + // Returns 'true' on success, 'false' on failure. + + bool Grow(); + + // Utility function to add a new element to the hash table. Note that + // it is perfectly find for the element to be a duplicate - if so it + // is added an additional time. Returns true if a new empty spot was used; + // false if an existing deleted slot. + + static bool Add(element_t *table, count_t tableSize, const element_t &element); + + // Utility function to add a new element to the hash table, if no element with the same key + // is already there. Otherwise, it will replace the existing element. This has the effect of + // updating an element rather than adding a duplicate. + + void AddOrReplace(element_t *table, count_t tableSize, const element_t &element); + + // Utility function to find the first element with the given key in + // the hash table. + + static const element_t* Lookup(PTR_element_t table, count_t tableSize, key_t key); + + // Utility function to remove the first element with the given key + // in the hash table. + + void Remove(element_t *table, count_t tableSize, key_t key); + + // Utility function to remove the specific element. + + void RemoveElement(element_t *table, count_t tableSize, element_t *element); + + // + // Enumerator, provides a template to produce an iterator on an existing class + // with a single iteration variable. + // + + template + class Enumerator + { + private: + const SUBTYPE *This() const + { + return (const SUBTYPE *) this; + } + + SUBTYPE *This() + { + return (SUBTYPE *)this; + } + + public: + + Enumerator() + { + } + + const element_t &operator*() const + { + return This()->Get(); + } + const element_t *operator->() const + { + return &(This()->Get()); + } + SUBTYPE &operator++() + { + This()->Next(); + return *This(); + } + SUBTYPE operator++(int) + { + SUBTYPE i = *This(); + This()->Next(); + return i; + } + bool operator==(const SUBTYPE &i) const + { + return This()->Equal(i); + } + bool operator!=(const SUBTYPE &i) const + { + return !This()->Equal(i); + } + }; + + // + // Index for whole table iterator. This is also the base for the keyed iterator. + // + + class Index + { + friend class SHash; + friend class Iterator; + friend class Enumerator; + + // The methods implementation has to be here for portability + // Some compilers won't compile the separate implementation in shash.inl + protected: + + PTR_element_t m_table; + count_t m_tableSize; + count_t m_index; + + Index(const SHash *hash, bool begin) + : m_table(hash->m_table), + m_tableSize(hash->m_tableSize), + m_index(begin ? 0 : m_tableSize) + { + } + + const element_t &Get() const + { + return m_table[m_index]; + } + + void First() + { + if (m_index < m_tableSize) + if (TRAITS::IsNull(m_table[m_index]) || TRAITS::IsDeleted(m_table[m_index])) + Next(); + } + + void Next() + { + if (m_index >= m_tableSize) + return; + + for (;;) + { + m_index++; + if (m_index >= m_tableSize) + break; + if (!TRAITS::IsNull(m_table[m_index]) && !TRAITS::IsDeleted(m_table[m_index])) + break; + } + } + + bool Equal(const Index &i) const + { + return i.m_index == m_index; + } + }; + + class Iterator : public Index, public Enumerator + { + friend class SHash; + + public: + Iterator(const SHash *hash, bool begin) + : Index(hash, begin) + { + } + }; + + // + // Index for iterating elements with a given key. + // Note that the m_index field is artificially bumped to m_tableSize when the end + // of iteration is reached. This allows a canonical End iterator to be used. + // + + class KeyIndex : public Index + { + friend class SHash; + friend class KeyIterator; + friend class Enumerator; + + // The methods implementation has to be here for portability + // Some compilers won't compile the separate implementation in shash.inl + protected: + key_t m_key; + count_t m_increment; + + KeyIndex(const SHash *hash, bool begin) + : Index(hash, begin), + m_increment(0) + { + } + + void SetKey(key_t key) + { + if (m_tableSize > 0) + { + m_key = key; + count_t hash = Hash(key); + + TRAITS::m_index = hash % m_tableSize; + m_increment = (hash % (m_tableSize-1)) + 1; + + // Find first valid element + if (IsNull(m_table[TRAITS::m_index])) + TRAITS::m_index = m_tableSize; + else if (IsDeleted(m_table[TRAITS::m_index]) + || !Equals(m_key, GetKey(m_table[TRAITS::m_index]))) + Next(); + } + } + + void Next() + { + while (true) + { + TRAITS::m_index += m_increment; + if (TRAITS::m_index >= m_tableSize) + TRAITS::m_index -= m_tableSize; + + if (IsNull(m_table[TRAITS::m_index])) + { + TRAITS::m_index = m_tableSize; + break; + } + + if (!IsDeleted(m_table[TRAITS::m_index]) + && Equals(m_key, GetKey(m_table[TRAITS::m_index]))) + { + break; + } + } + } + }; + + class KeyIterator : public KeyIndex, public Enumerator + { + friend class SHash; + + public: + + operator Iterator &() + { + return *(Iterator*)this; + } + + operator const Iterator &() + { + return *(const Iterator*)this; + } + + KeyIterator(const SHash *hash, bool begin) + : KeyIndex(hash, begin) + { + } + }; + + // Test for prime number. + static bool IsPrime(count_t number); + + // Find the next prime number >= the given value. + + static count_t NextPrime(count_t number); + + // Instance members + + PTR_element_t m_table; // pointer to table + count_t m_tableSize; // allocated size of table + count_t m_tableCount; // number of elements in table + count_t m_tableOccupied; // number, includes deleted slots + count_t m_tableMax; // maximum occupied count before reallocating +}; + +// disables support for DAC marshaling. Useful for defining right-side only SHashes + +template +class NonDacAwareSHashTraits : public PARENT +{ +public: + typedef typename PARENT::element_t element_t; + typedef element_t * PTR_element_t; +}; + +// disables support for removing elements - produces slightly faster implementation + +template +class NoRemoveSHashTraits : public PARENT +{ +public: + // explicitly declare local typedefs for these traits types, otherwise + // the compiler may get confused + typedef typename PARENT::element_t element_t; + typedef typename PARENT::count_t count_t; + + static const bool s_supports_remove = false; + static const element_t Deleted() { UNREACHABLE(); } + static bool IsDeleted(const element_t &e) { UNREFERENCED_PARAMETER(e); return false; } +}; + +// PtrHashTraits is a template to provides useful defaults for pointer hash tables +// It relies on methods GetKey and Hash defined on ELEMENT + +template +class PtrSHashTraits : public DefaultSHashTraits +{ + public: + + // explicitly declare local typedefs for these traits types, otherwise + // the compiler may get confused + typedef DefaultSHashTraits PARENT; + typedef typename PARENT::element_t element_t; + typedef typename PARENT::count_t count_t; + + typedef KEY key_t; + + static key_t GetKey(const element_t &e) + { + return e->GetKey(); + } + static bool Equals(key_t k1, key_t k2) + { + return k1 == k2; + } + static count_t Hash(key_t k) + { + return ELEMENT::Hash(k); + } +}; + +template +class PtrSHash : public SHash< PtrSHashTraits > +{ +}; + +template +class KeyValuePair { + KEY key; + VALUE value; + +public: + KeyValuePair() + { + } + + KeyValuePair(const KEY& k, const VALUE& v) + : key(k), value(v) + { + } + + KEY const & Key() const + { + return key; + } + + VALUE const & Value() const + { + return value; + } +}; + +template +class MapSHashTraits : public DefaultSHashTraits< KeyValuePair > +{ +public: + // explicitly declare local typedefs for these traits types, otherwise + // the compiler may get confused + typedef typename DefaultSHashTraits< KeyValuePair >::element_t element_t; + typedef typename DefaultSHashTraits< KeyValuePair >::count_t count_t; + + typedef KEY key_t; + + static key_t GetKey(element_t e) + { + return e.Key(); + } + static bool Equals(key_t k1, key_t k2) + { + return k1 == k2; + } + static count_t Hash(key_t k) + { + return (count_t)(size_t)k; + } + + static const element_t Null() { return element_t((KEY)0,(VALUE)0); } + static bool IsNull(const element_t &e) { return e.Key() == (KEY)0; } +}; + +template +class MapSHash : public SHash< NoRemoveSHashTraits< MapSHashTraits > > +{ + typedef SHash< NoRemoveSHashTraits< MapSHashTraits > > PARENT; + +public: + void Add(KEY key, VALUE value) + { + PARENT::Add(KeyValuePair(key, value)); + } + + bool Lookup(KEY key, VALUE* pValue) + { + const KeyValuePair *pRet = PARENT::LookupPtr(key); + if (pRet == NULL) + return false; + + *pValue = pRet->Value(); + return true; + } +}; + + +// restore "Conditional expression is constant" warning to previous value +#pragma warning(pop) + +#endif // __shash_h__ diff --git a/src/coreclr/src/nativeaot/Runtime/shash.inl b/src/coreclr/src/nativeaot/Runtime/shash.inl new file mode 100644 index 0000000000000..ab39f63955786 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/shash.inl @@ -0,0 +1,470 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// disable the "Conditional expression is constant" warning +#pragma warning(disable:4127) + + +template +SHash::SHash() + : m_table(nullptr), + m_tableSize(0), + m_tableCount(0), + m_tableOccupied(0), + m_tableMax(0) +{ + C_ASSERT(TRAITS::s_growth_factor_numerator > TRAITS::s_growth_factor_denominator); + C_ASSERT(TRAITS::s_density_factor_numerator < TRAITS::s_density_factor_denominator); +} + +template +SHash::~SHash() +{ + delete [] m_table; +} + +template +typename SHash::count_t SHash::GetCount() const +{ + return m_tableCount; +} + +template +typename SHash::count_t SHash::GetCapacity() const +{ + return m_tableMax; +} + +template +typename SHash< TRAITS>::element_t SHash::Lookup(key_t key) const +{ + const element_t *pRet = Lookup(m_table, m_tableSize, key); + return ((pRet != NULL) ? (*pRet) : TRAITS::Null()); +} + +template +const typename SHash< TRAITS>::element_t* SHash::LookupPtr(key_t key) const +{ + return Lookup(m_table, m_tableSize, key); +} + +template +bool SHash::Add(const element_t &element) +{ + if (!CheckGrowth()) + return false; + + if (Add(m_table, m_tableSize, element)) + m_tableOccupied++; + m_tableCount++; + + return true; +} + +template +bool SHash::AddOrReplace(const element_t &element) +{ + if (!CheckGrowth()) + return false; + + AddOrReplace(m_table, m_tableSize, element); + return true; +} + +template +void SHash::Remove(key_t key) +{ + Remove(m_table, m_tableSize, key); +} + +template +void SHash::Remove(Iterator& i) +{ + RemoveElement(m_table, m_tableSize, (element_t*)&(*i)); +} + +template +void SHash::Remove(KeyIterator& i) +{ + RemoveElement(m_table, m_tableSize, (element_t*)&(*i)); +} + +template +void SHash::RemovePtr(element_t * p) +{ + RemoveElement(m_table, m_tableSize, p); +} + +template +void SHash::RemoveAll() +{ + delete [] m_table; + + m_table = NULL; + m_tableSize = 0; + m_tableCount = 0; + m_tableOccupied = 0; + m_tableMax = 0; +} + +template +typename SHash::Iterator SHash::Begin() const +{ + Iterator i(this, true); + i.First(); + return i; +} + +template +typename SHash::Iterator SHash::End() const +{ + return Iterator(this, false); +} + +template +typename SHash::KeyIterator SHash::Begin(key_t key) const +{ + KeyIterator k(this, true); + k.SetKey(key); + return k; +} + +template +typename SHash::KeyIterator SHash::End(key_t key) const +{ + return KeyIterator(this, false); +} + +template +bool SHash::CheckGrowth() +{ + if (m_tableOccupied == m_tableMax) + { + return Grow(); + } + + return true; +} + +template +bool SHash::Grow() +{ + count_t newSize = (count_t) (m_tableCount + * TRAITS::s_growth_factor_numerator / TRAITS::s_growth_factor_denominator + * TRAITS::s_density_factor_denominator / TRAITS::s_density_factor_numerator); + if (newSize < TRAITS::s_minimum_allocation) + newSize = TRAITS::s_minimum_allocation; + + // handle potential overflow + if (newSize < m_tableCount) + { + TRAITS::OnFailure(ftOverflow); + return false; + } + + return Reallocate(newSize); +} + +template +bool SHash::CheckGrowth(count_t newElements) +{ + count_t newCount = (m_tableCount + newElements); + + // handle potential overflow + if (newCount < newElements) + { + TRAITS::OnFailure(ftOverflow); + return false; + } + + // enough space in the table? + if (newCount < m_tableMax) + return true; + + count_t newSize = (count_t) (newCount * TRAITS::s_density_factor_denominator / TRAITS::s_density_factor_numerator) + 1; + + // handle potential overflow + if (newSize < newCount) + { + TRAITS::OnFailure(ftOverflow); + return false; + } + + // accelerate the growth to avoid unnecessary rehashing + count_t newSize2 = (m_tableCount * TRAITS::s_growth_factor_numerator / TRAITS::s_growth_factor_denominator + * TRAITS::s_density_factor_denominator / TRAITS::s_density_factor_numerator); + + if (newSize < newSize2) + newSize = newSize2; + + if (newSize < TRAITS::s_minimum_allocation) + newSize = TRAITS::s_minimum_allocation; + + return Reallocate(newSize); +} + +template +bool SHash::Reallocate(count_t newTableSize) +{ + ASSERT(newTableSize >= + (count_t) (GetCount() * TRAITS::s_density_factor_denominator / TRAITS::s_density_factor_numerator)); + + // Allocation size must be a prime number. This is necessary so that hashes uniformly + // distribute to all indices, and so that chaining will visit all indices in the hash table. + newTableSize = NextPrime(newTableSize); + if (newTableSize == 0) + { + TRAITS::OnFailure(ftOverflow); + return false; + } + + element_t *newTable = new (nothrow) element_t [newTableSize]; + if (newTable == NULL) + { + TRAITS::OnFailure(ftAllocation); + return false; + } + + element_t *p = newTable, *pEnd = newTable + newTableSize; + while (p < pEnd) + { + *p = TRAITS::Null(); + p++; + } + + // Move all entries over to new table. + + for (Iterator i = Begin(), end = End(); i != end; i++) + { + const element_t & cur = (*i); + if (!TRAITS::IsNull(cur) && !TRAITS::IsDeleted(cur)) + Add(newTable, newTableSize, cur); + } + + // @todo: + // We might want to try to delay this cleanup to allow asynchronous readers + + delete [] m_table; + + m_table = PTR_element_t(newTable); + m_tableSize = newTableSize; + m_tableMax = (count_t) (newTableSize * TRAITS::s_density_factor_numerator / TRAITS::s_density_factor_denominator); + m_tableOccupied = m_tableCount; + + return true; +} + +template +const typename SHash::element_t * SHash::Lookup(PTR_element_t table, count_t tableSize, key_t key) +{ + if (tableSize == 0) + return NULL; + + count_t hash = TRAITS::Hash(key); + count_t index = hash % tableSize; + count_t increment = 0; // delay computation + + while (true) + { + element_t& current = table[index]; + + if (TRAITS::IsNull(current)) + return NULL; + + if (!TRAITS::IsDeleted(current) + && TRAITS::Equals(key, TRAITS::GetKey(current))) + { + return ¤t; + } + + if (increment == 0) + increment = (hash % (tableSize-1)) + 1; + + index += increment; + if (index >= tableSize) + index -= tableSize; + } +} + +template +bool SHash::Add(element_t *table, count_t tableSize, const element_t &element) +{ + key_t key = TRAITS::GetKey(element); + + count_t hash = TRAITS::Hash(key); + count_t index = hash % tableSize; + count_t increment = 0; // delay computation + + while (true) + { + element_t& current = table[index]; + + if (TRAITS::IsNull(current)) + { + table[index] = element; + return true; + } + + if (TRAITS::IsDeleted(current)) + { + table[index] = element; + return false; + } + + if (increment == 0) + increment = (hash % (tableSize-1)) + 1; + + index += increment; + if (index >= tableSize) + index -= tableSize; + } +} + +template +void SHash::AddOrReplace(element_t *table, count_t tableSize, const element_t &element) +{ + ASSERT(!TRAITS::s_supports_remove); + + key_t key = TRAITS::GetKey(element); + + count_t hash = TRAITS::Hash(key); + count_t index = hash % tableSize; + count_t increment = 0; // delay computation + + while (true) + { + element_t& current = table[index]; + ASSERT(!TRAITS::IsDeleted(current)); + + if (TRAITS::IsNull(current)) + { + table[index] = element; + m_tableCount++; + m_tableOccupied++; + return; + } + else if (TRAITS::Equals(key, TRAITS::GetKey(current))) + { + table[index] = element; + return; + } + + if (increment == 0) + increment = (hash % (tableSize-1)) + 1; + + index += increment; + if (index >= tableSize) + index -= tableSize; + } +} + +#ifdef _MSC_VER +#pragma warning (disable: 4702) // Workaround bogus unreachable code warning +#endif +template +void SHash::Remove(element_t *table, count_t tableSize, key_t key) +{ + ASSERT(TRAITS::s_supports_remove); + ASSERT(Lookup(table, tableSize, key) != NULL); + + count_t hash = TRAITS::Hash(key); + count_t index = hash % tableSize; + count_t increment = 0; // delay computation + + while (true) + { + element_t& current = table[index]; + + if (TRAITS::IsNull(current)) + return; + + if (!TRAITS::IsDeleted(current) + && TRAITS::Equals(key, TRAITS::GetKey(current))) + { + table[index] = TRAITS::Deleted(); + m_tableCount--; + return; + } + + if (increment == 0) + increment = (hash % (tableSize-1)) + 1; + + index += increment; + if (index >= tableSize) + index -= tableSize; + } +} +#ifdef _MSC_VER +#pragma warning (default: 4702) +#endif + +template +void SHash::RemoveElement(element_t *table, count_t tableSize, element_t *element) +{ + ASSERT(TRAITS::s_supports_remove); + ASSERT(table <= element && element < table + tableSize); + ASSERT(!TRAITS::IsNull(*element) && !TRAITS::IsDeleted(*element)); + + *element = TRAITS::Deleted(); + m_tableCount--; +} + +template +bool SHash::IsPrime(count_t number) +{ + // This is a very low-tech check for primality, which doesn't scale very well. + // There are more efficient tests if this proves to be burdensome for larger + // tables. + + if ((number&1) == 0) + return false; + + count_t factor = 3; + while (factor * factor <= number) + { + if ((number % factor) == 0) + return false; + factor += 2; + } + + return true; +} + +namespace +{ + const UInt32 g_shash_primes[] = { + 11,17,23,29,37,47,59,71,89,107,131,163,197,239,293,353,431,521,631,761,919, + 1103,1327,1597,1931,2333,2801,3371,4049,4861,5839,7013,8419,10103,12143,14591, + 17519,21023,25229,30293,36353,43627,52361,62851,75431,90523, 108631, 130363, + 156437, 187751, 225307, 270371, 324449, 389357, 467237, 560689, 672827, 807403, + 968897, 1162687, 1395263, 1674319, 2009191, 2411033, 2893249, 3471899, 4166287, + 4999559, 5999471, 7199369 }; +} + + +// Returns a prime larger than 'number' or 0, in case of overflow +template +typename SHash::count_t SHash::NextPrime(typename SHash::count_t number) +{ + for (int i = 0; i < (int) (sizeof(g_shash_primes) / sizeof(g_shash_primes[0])); i++) + { + if (g_shash_primes[i] >= number) + return (typename SHash::count_t)(g_shash_primes[i]); + } + + if ((number&1) == 0) + number++; + + while (number != 1) + { + if (IsPrime(number)) + return number; + number += 2; + } + + return 0; +} + +// restore "Conditional expression is constant" warning to default value +#pragma warning(default:4127) + diff --git a/src/coreclr/src/nativeaot/Runtime/slist.h b/src/coreclr/src/nativeaot/Runtime/slist.h new file mode 100644 index 0000000000000..4525ba3b586ef --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/slist.h @@ -0,0 +1,124 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#ifndef __slist_h__ +#define __slist_h__ + +#include "forward_declarations.h" + +MSVC_SAVE_WARNING_STATE() +MSVC_DISABLE_WARNING(4127) // conditional expression is constant -- it's intentionally constant + +struct DoNothingFailFastPolicy +{ + static inline void FailFast(); +}; + +template +struct DefaultSListTraits : public FailFastPolicy +{ + typedef DPTR(T) PTR_T; + typedef DPTR(PTR_T) PTR_PTR_T; + + static inline PTR_PTR_T GetNextPtr(PTR_T pT); + static inline bool Equals(PTR_T pA, PTR_T pB); +}; + +//------------------------------------------------------------------------------------------------------------ +// class SList, to use a singly linked list. +// +// To use, either expose a field DPTR(T) m_pNext by adding DefaultSListTraits as a friend class, or +// define a new Traits class derived from DefaultSListTraits and override the GetNextPtr function. +// +// SList supports lockless head insert and Remove methods. However, PushHeadInterlocked and +// PopHeadInterlocked must be used very carefully, as the rest of the mutating methods are not +// interlocked. In general, code must be careful to ensure that it will never use more than one +// synchronization mechanism at any given time to control access to a resource, and this is no +// exception. In particular, if synchronized access to other SList operations (such as FindAndRemove) +// are required, than a separate synchronization mechanism (such as a critical section) must be used. +//------------------------------------------------------------------------------------------------------------ +template > +class SList : public Traits +{ +protected: + typedef typename Traits::PTR_T PTR_T; + typedef typename Traits::PTR_PTR_T PTR_PTR_T; + +public: + SList(); + + // Returns true if there are no entries in the list. + bool IsEmpty(); + + // Returns the value of (but does not remove) the first element in the list. + PTR_T GetHead(); + + // Inserts pItem at the front of the list. See class header for more information. + void PushHead(PTR_T pItem); + void PushHeadInterlocked(PTR_T pItem); + + // Removes and returns the first entry in the list. See class header for more information. + PTR_T PopHead(); + + class Iterator + { + friend SList; + + public: + Iterator(Iterator const &it); + Iterator& operator=(Iterator const &it); + + PTR_T operator->(); + PTR_T operator*(); + + Iterator & operator++(); + Iterator operator++(int); + + bool operator==(Iterator const &rhs); + bool operator==(PTR_T pT); + bool operator!=(Iterator const &rhs); + + private: + Iterator(PTR_PTR_T ppItem); + + Iterator Insert(PTR_T pItem); + Iterator Remove(); + + static Iterator End(); + PTR_PTR_T m_ppCur; +#ifdef _DEBUG + mutable bool m_fIsValid; +#endif + + PTR_T _Value() const; + + enum e_ValidateOperation + { + e_CanCompare, // Will assert in debug if m_fIsValid == false. + e_CanInsert, // i.e., not the fake End() value of m_ppCur == NULL + e_HasValue, // i.e., m_ppCur != NULL && *m_ppCur != NULL + }; + void _Validate(e_ValidateOperation op) const; + }; + + Iterator Begin(); + Iterator End(); + + // Returns iterator to first list item matching pItem + Iterator FindFirst(PTR_T pItem); + bool RemoveFirst(PTR_T pItem); + + // Inserts pItem *before* it. Returns iterator pointing to inserted item. + Iterator Insert(Iterator & it, PTR_T pItem); + + // Removes item pointed to by it from the list. Returns iterator pointing + // to following item. + Iterator Remove(Iterator & it); + +private: + PTR_T m_pHead; +}; + +MSVC_RESTORE_WARNING_STATE() + +#endif // __slist_h__ diff --git a/src/coreclr/src/nativeaot/Runtime/slist.inl b/src/coreclr/src/nativeaot/Runtime/slist.inl new file mode 100644 index 0000000000000..dc437fe1c9ba9 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/slist.inl @@ -0,0 +1,361 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +MSVC_SAVE_WARNING_STATE() +MSVC_DISABLE_WARNING(4127) // conditional expression is constant -- + // while (true) loops and compile time template constants cause this. + + +//------------------------------------------------------------------------------------------------- +namespace rh { namespace std +{ + // Specialize rh::std::find for SList iterators so that it will use _Traits::Equals. + template + inline + typename SList<_Tx, _Traits>::Iterator find( + typename SList<_Tx, _Traits>::Iterator _First, + typename SList<_Tx, _Traits>::Iterator _Last, + const _Ty& _Val) + { // find first matching _Val + for (; _First != _Last; ++_First) + if (_Traits::Equals(*_First, _Val)) + break; + return (_First); + } +} // namespace std +} // namespace rh + +//------------------------------------------------------------------------------------------------- +inline +void DoNothingFailFastPolicy::FailFast() +{ + // Intentionally a no-op. +} + +//------------------------------------------------------------------------------------------------- +template +inline +typename DefaultSListTraits::PTR_PTR_T DefaultSListTraits::GetNextPtr( + PTR_T pT) +{ + ASSERT(pT != NULL); + return dac_cast(dac_cast(pT) + offsetof(T, m_pNext)); +} + +//------------------------------------------------------------------------------------------------- +template +inline +bool DefaultSListTraits::Equals( + PTR_T pA, + PTR_T pB) +{ // Default is pointer comparison + return pA == pB; +} + +//------------------------------------------------------------------------------------------------- +template +inline +SList::SList() + : m_pHead(NULL) +{ +} + +//------------------------------------------------------------------------------------------------- +template +inline +bool SList::IsEmpty() +{ + return Begin() == End(); +} + +//------------------------------------------------------------------------------------------------- +template +inline +typename SList::PTR_T SList::GetHead() +{ + return m_pHead; +} + +//------------------------------------------------------------------------------------------------- +template +inline +void SList::PushHead( + PTR_T pItem) +{ + NO_DAC(); + Begin().Insert(pItem); +} + +//------------------------------------------------------------------------------------------------- +template +inline +void SList::PushHeadInterlocked( + PTR_T pItem) +{ + NO_DAC(); + ASSERT(pItem != NULL); + ASSERT(IS_ALIGNED(&m_pHead, sizeof(void*))); + + while (true) + { + *Traits::GetNextPtr(pItem) = *reinterpret_cast(&m_pHead); + if (PalInterlockedCompareExchangePointer( + reinterpret_cast(&m_pHead), + reinterpret_cast(pItem), + reinterpret_cast(*Traits::GetNextPtr(pItem))) == reinterpret_cast(*Traits::GetNextPtr(pItem))) + { + break; + } + } +} + +//------------------------------------------------------------------------------------------------- +template +inline +typename SList::PTR_T SList::PopHead() +{ + NO_DAC(); + PTR_T pRet = *Begin(); + Begin().Remove(); + return pRet; +} + +//------------------------------------------------------------------------------------------------- +template +inline +SList::Iterator::Iterator( + Iterator const &it) + : m_ppCur(it.m_ppCur) +#ifdef _DEBUG + , m_fIsValid(it.m_fIsValid) +#endif +{ +} + +//------------------------------------------------------------------------------------------------- +template +inline +SList::Iterator::Iterator( + PTR_PTR_T ppItem) + : m_ppCur(ppItem) +#ifdef _DEBUG + , m_fIsValid(true) +#endif +{ +} + +//------------------------------------------------------------------------------------------------- +template +inline +typename SList::Iterator& SList::Iterator::operator=( + Iterator const &it) +{ + m_ppCur = it.m_ppCur; +#ifdef _DEBUG + m_fIsValid = it.m_fIsValid; +#endif + return *this; +} + +//------------------------------------------------------------------------------------------------- +template +inline +typename SList::PTR_T SList::Iterator::operator->() +{ + _Validate(e_HasValue); + return _Value(); +} + +//------------------------------------------------------------------------------------------------- +template +inline +typename SList::PTR_T SList::Iterator::operator*() +{ + _Validate(e_HasValue); + return _Value(); +} + +//------------------------------------------------------------------------------------------------- +template +inline +typename SList::Iterator & SList::Iterator::operator++() +{ + _Validate(e_HasValue); // Having a value means we're not at the end. + m_ppCur = Traits::GetNextPtr(_Value()); + return *this; +} + +//------------------------------------------------------------------------------------------------- +template +inline +typename SList::Iterator SList::Iterator::operator++( + int) +{ + _Validate(e_HasValue); // Having a value means we're not at the end. + PTR_PTR_T ppRet = m_ppCur; + ++(*this); + return Iterator(ppRet); +} + +//------------------------------------------------------------------------------------------------- +template +inline +bool SList::Iterator::operator==( + Iterator const &rhs) +{ + _Validate(e_CanCompare); + rhs._Validate(e_CanCompare); + return Traits::Equals(_Value(), rhs._Value()); +} + +//------------------------------------------------------------------------------------------------- +template +inline +bool SList::Iterator::operator==( + PTR_T pT) +{ + _Validate(e_CanCompare); + return Traits::Equals(_Value(), pT); +} + +//------------------------------------------------------------------------------------------------- +template +inline +bool SList::Iterator::operator!=( + Iterator const &rhs) +{ + return !operator==(rhs); +} + +//------------------------------------------------------------------------------------------------- +template +inline /*static*/ +typename SList::Iterator SList::Iterator::End() +{ + return Iterator(NULL); +} + +//------------------------------------------------------------------------------------------------- +template +inline +typename SList::Iterator SList::Iterator::Insert( + PTR_T pItem) +{ + NO_DAC(); + _Validate(e_CanInsert); + *Traits::GetNextPtr(pItem) = *m_ppCur; + *m_ppCur = pItem; + Iterator itRet(m_ppCur); + ++(*this); + return itRet; +} + +//------------------------------------------------------------------------------------------------- +template +inline +typename SList::Iterator SList::Iterator::Remove() +{ + NO_DAC(); + _Validate(e_HasValue); + *m_ppCur = *Traits::GetNextPtr(*m_ppCur); + PTR_PTR_T ppRet = m_ppCur; + // Set it to End, so that subsequent misuse of this iterator will + // result in an AV rather than possible memory corruption. + *this = End(); + return Iterator(ppRet); +} + +//------------------------------------------------------------------------------------------------- +template +inline +typename SList::PTR_T SList::Iterator::_Value() const +{ + ASSERT(m_fIsValid); + return dac_cast(m_ppCur == NULL ? NULL : *m_ppCur); +} + +//------------------------------------------------------------------------------------------------- +template +inline +void SList::Iterator::_Validate(e_ValidateOperation op) const +{ + ASSERT(m_fIsValid); + ASSERT(op == e_CanCompare || op == e_CanInsert || op == e_HasValue); + + if ((op != e_CanCompare && m_ppCur == NULL) || + (op == e_HasValue && *m_ppCur == NULL)) + { + // NOTE: Default of DoNothingFailFastPolicy is a no-op, and so this function will be + // eliminated in retail builds. This is ok, as the subsequent operation will cause + // an AV, which will itself trigger a FailFast. Provide a different policy to get + // different behavior. + ASSERT_MSG(false, "Invalid SList::Iterator use."); + Traits::FailFast(); +#ifdef _DEBUG + m_fIsValid = false; +#endif + } +} + +//------------------------------------------------------------------------------------------------- +template +inline +typename SList::Iterator SList::Begin() +{ + typedef SList T_THIS; + return Iterator(dac_cast( + dac_cast(this) + offsetof(T_THIS, m_pHead))); +} + +//------------------------------------------------------------------------------------------------- +template +inline +typename SList::Iterator SList::End() +{ + return Iterator::End(); +} + +//------------------------------------------------------------------------------------------------- +template +inline +typename SList::Iterator SList::FindFirst(PTR_T pItem) +{ + return rh::std::find(Begin(), End(), pItem); +} + +//------------------------------------------------------------------------------------------------- +template +inline +bool SList::RemoveFirst(PTR_T pItem) +{ + NO_DAC(); + Iterator it = FindFirst(pItem); + if (it != End()) + { + it.Remove(); + return true; + } + else + { + return false; + } +} + +//------------------------------------------------------------------------------------------------- +template +inline +typename SList::Iterator SList::Insert(Iterator & it, PTR_T pItem) +{ + return it.Insert(pItem); +} + +//------------------------------------------------------------------------------------------------- +template +inline +typename SList::Iterator SList::Remove(Iterator & it) +{ + return it.Remove(); +} + + +MSVC_RESTORE_WARNING_STATE() + diff --git a/src/coreclr/src/nativeaot/Runtime/startup.cpp b/src/coreclr/src/nativeaot/Runtime/startup.cpp new file mode 100644 index 0000000000000..4ed81423af950 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/startup.cpp @@ -0,0 +1,463 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +#include "common.h" +#include "CommonTypes.h" +#include "CommonMacros.h" +#include "daccess.h" +#include "PalRedhawkCommon.h" +#include "PalRedhawk.h" +#include "rhassert.h" +#include "slist.h" +#include "gcrhinterface.h" +#include "varint.h" +#include "regdisplay.h" +#include "StackFrameIterator.h" +#include "thread.h" +#include "holder.h" +#include "Crst.h" +#include "event.h" +#include "RWLock.h" +#include "threadstore.h" +#include "threadstore.inl" +#include "RuntimeInstance.h" +#include "rhbinder.h" +#include "CachedInterfaceDispatch.h" +#include "RhConfig.h" +#include "stressLog.h" +#include "RestrictedCallouts.h" +#include "yieldprocessornormalized.h" + +#ifndef DACCESS_COMPILE + +#ifdef PROFILE_STARTUP +unsigned __int64 g_startupTimelineEvents[NUM_STARTUP_TIMELINE_EVENTS] = { 0 }; +#endif // PROFILE_STARTUP + +#ifdef TARGET_UNIX +Int32 RhpHardwareExceptionHandler(UIntNative faultCode, UIntNative faultAddress, PAL_LIMITED_CONTEXT* palContext, UIntNative* arg0Reg, UIntNative* arg1Reg); +#else +Int32 __stdcall RhpVectoredExceptionHandler(PEXCEPTION_POINTERS pExPtrs); +#endif + +static void CheckForPalFallback(); +static bool DetectCPUFeatures(); + +extern RhConfig * g_pRhConfig; + +EXTERN_C bool g_fHasFastFxsave = false; + +CrstStatic g_CastCacheLock; +CrstStatic g_ThunkPoolLock; + +#if defined(HOST_X86) || defined(HOST_AMD64) +// This field is inspected from the generated code to determine what intrinsics are available. +EXTERN_C int g_cpuFeatures = 0; +// This field is defined in the generated code and sets the ISA expectations. +EXTERN_C int g_requiredCpuFeatures; +#endif + +static bool InitDLL(HANDLE hPalInstance) +{ + CheckForPalFallback(); + +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH + // + // Initialize interface dispatch. + // + if (!InitializeInterfaceDispatch()) + return false; +#endif + + // + // Initialize support for registering GC and HandleTable callouts. + // + if (!RestrictedCallouts::Initialize()) + return false; + + // + // Initialize RuntimeInstance state + // + if (!RuntimeInstance::Initialize(hPalInstance)) + return false; + + // Note: The global exception handler uses RuntimeInstance +#if !defined(APP_LOCAL_RUNTIME) && !defined(USE_PORTABLE_HELPERS) +#ifndef TARGET_UNIX + PalAddVectoredExceptionHandler(1, RhpVectoredExceptionHandler); +#else + PalSetHardwareExceptionHandler(RhpHardwareExceptionHandler); +#endif +#endif // !APP_LOCAL_RUNTIME && !USE_PORTABLE_HELPERS + + InitializeYieldProcessorNormalizedCrst(); + + STARTUP_TIMELINE_EVENT(NONGC_INIT_COMPLETE); + + if (!RedhawkGCInterface::InitializeSubsystems()) + return false; + + STARTUP_TIMELINE_EVENT(GC_INIT_COMPLETE); + +#ifdef STRESS_LOG + UInt32 dwTotalStressLogSize = g_pRhConfig->GetTotalStressLogSize(); + UInt32 dwStressLogLevel = g_pRhConfig->GetStressLogLevel(); + + unsigned facility = (unsigned)LF_ALL; + unsigned dwPerThreadChunks = (dwTotalStressLogSize / 24) / STRESSLOG_CHUNK_SIZE; + if (dwTotalStressLogSize != 0) + { + StressLog::Initialize(facility, dwStressLogLevel, + dwPerThreadChunks * STRESSLOG_CHUNK_SIZE, + (unsigned)dwTotalStressLogSize, hPalInstance); + } +#endif // STRESS_LOG + +#ifndef USE_PORTABLE_HELPERS + if (!DetectCPUFeatures()) + return false; +#endif + + if (!g_CastCacheLock.InitNoThrow(CrstType::CrstCastCache)) + return false; + + if (!g_ThunkPoolLock.InitNoThrow(CrstType::CrstCastCache)) + return false; + + return true; +} + +static void CheckForPalFallback() +{ +#ifdef _DEBUG + UInt32 disallowSetting = g_pRhConfig->GetDisallowRuntimeServicesFallback(); + if (disallowSetting == 0) + return; + + // The fallback provider doesn't implement write watch, so we check for the write watch capability as a + // proxy for whether or not we're using the fallback provider since we don't have direct access to this + // information from here. + + if (disallowSetting == 1) + { + // If RH_DisallowRuntimeServicesFallback is set to 1, we want to fail fast if we discover that we're + // running against the fallback provider. + if (!PalHasCapability(WriteWatchCapability)) + RhFailFast(); + } + else if (disallowSetting == 2) + { + // If RH_DisallowRuntimeServicesFallback is set to 2, we want to fail fast if we discover that we're + // NOT running against the fallback provider. + if (PalHasCapability(WriteWatchCapability)) + RhFailFast(); + } +#endif // _DEBUG +} + +#ifndef USE_PORTABLE_HELPERS +// Should match the constants defined in the compiler in HardwareIntrinsicHelpers.cs +enum XArchIntrinsicConstants +{ + XArchIntrinsicConstants_Aes = 0x0001, + XArchIntrinsicConstants_Pclmulqdq = 0x0002, + XArchIntrinsicConstants_Sse3 = 0x0004, + XArchIntrinsicConstants_Ssse3 = 0x0008, + XArchIntrinsicConstants_Sse41 = 0x0010, + XArchIntrinsicConstants_Sse42 = 0x0020, + XArchIntrinsicConstants_Popcnt = 0x0040, + XArchIntrinsicConstants_Avx = 0x0080, + XArchIntrinsicConstants_Fma = 0x0100, + XArchIntrinsicConstants_Avx2 = 0x0200, + XArchIntrinsicConstants_Bmi1 = 0x0400, + XArchIntrinsicConstants_Bmi2 = 0x0800, + XArchIntrinsicConstants_Lzcnt = 0x1000, +}; + +bool DetectCPUFeatures() +{ +#if defined(HOST_X86) || defined(HOST_AMD64) + + unsigned char buffer[16]; + +#ifdef HOST_AMD64 + // AMD has a "fast" mode for fxsave/fxrstor, which omits the saving of xmm registers. The OS will enable this mode + // if it is supported. So if we continue to use fxsave/fxrstor, we must manually save/restore the xmm registers. + // fxsr_opt is bit 25 of EDX + getextcpuid(0, 0x80000001, buffer); + if ((buffer[15] & 0x02) != 0) + g_fHasFastFxsave = true; +#endif + + uint32_t maxCpuId = getcpuid(0, buffer); + + if (maxCpuId >= 1) + { + // getcpuid executes cpuid with eax set to its first argument, and ecx cleared. + // It returns the resulting eax in buffer[0-3], ebx in buffer[4-7], ecx in buffer[8-11], + // and edx in buffer[12-15]. + + (void)getcpuid(1, buffer); + + // If SSE/SSE2 is not enabled, there is no point in checking the rest. + // SSE is bit 25 of EDX (buffer[15] & 0x02) + // SSE2 is bit 26 of EDX (buffer[15] & 0x04) + if ((buffer[15] & 0x06) == 0x06) // SSE & SSE2 + { + if ((buffer[11] & 0x02) != 0) // AESNI + { + g_cpuFeatures |= XArchIntrinsicConstants_Aes; + } + + if ((buffer[8] & 0x02) != 0) // PCLMULQDQ + { + g_cpuFeatures |= XArchIntrinsicConstants_Pclmulqdq; + } + + if ((buffer[8] & 0x01) != 0) // SSE3 + { + g_cpuFeatures |= XArchIntrinsicConstants_Sse3; + + if ((buffer[9] & 0x02) != 0) // SSSE3 + { + g_cpuFeatures |= XArchIntrinsicConstants_Ssse3; + + if ((buffer[10] & 0x08) != 0) // SSE4.1 + { + g_cpuFeatures |= XArchIntrinsicConstants_Sse41; + + if ((buffer[10] & 0x10) != 0) // SSE4.2 + { + g_cpuFeatures |= XArchIntrinsicConstants_Sse42; + + if ((buffer[10] & 0x80) != 0) // POPCNT + { + g_cpuFeatures |= XArchIntrinsicConstants_Popcnt; + } + + if ((buffer[11] & 0x18) == 0x18) // AVX & OSXSAVE + { + if (PalIsAvxEnabled() && (xmmYmmStateSupport() == 1)) + { + g_cpuFeatures |= XArchIntrinsicConstants_Avx; + + if ((buffer[9] & 0x10) != 0) // FMA + { + g_cpuFeatures |= XArchIntrinsicConstants_Fma; + } + + if (maxCpuId >= 0x07) + { + (void)getextcpuid(0, 0x07, buffer); + + if ((buffer[4] & 0x20) != 0) // AVX2 + { + g_cpuFeatures |= XArchIntrinsicConstants_Avx2; + } + } + } + } + } + } + } + } + } + + if (maxCpuId >= 0x07) + { + (void)getextcpuid(0, 0x07, buffer); + + if ((buffer[4] & 0x08) != 0) // BMI1 + { + g_cpuFeatures |= XArchIntrinsicConstants_Bmi1; + } + + if ((buffer[5] & 0x01) != 0) // BMI2 + { + g_cpuFeatures |= XArchIntrinsicConstants_Bmi2; + } + } + } + + uint32_t maxCpuIdEx = getcpuid(0x80000000, buffer); + + if (maxCpuIdEx >= 0x80000001) + { + // getcpuid executes cpuid with eax set to its first argument, and ecx cleared. + // It returns the resulting eax in buffer[0-3], ebx in buffer[4-7], ecx in buffer[8-11], + // and edx in buffer[12-15]. + + (void)getcpuid(0x80000001, buffer); + + if ((buffer[8] & 0x20) != 0) // LZCNT + { + g_cpuFeatures |= XArchIntrinsicConstants_Lzcnt; + } + } + + if ((g_cpuFeatures & g_requiredCpuFeatures) != g_requiredCpuFeatures) + { + return false; + } +#endif // HOST_X86 || HOST_AMD64 + + return true; +} +#endif // !USE_PORTABLE_HELPERS + +#ifdef PROFILE_STARTUP +#define STD_OUTPUT_HANDLE ((UInt32)-11) + +struct RegisterModuleTrace +{ + LARGE_INTEGER Begin; + LARGE_INTEGER End; +}; + +const int NUM_REGISTER_MODULE_TRACES = 16; +int g_registerModuleCount = 0; + +RegisterModuleTrace g_registerModuleTraces[NUM_REGISTER_MODULE_TRACES] = { 0 }; + +static void AppendInt64(char * pBuffer, UInt32* pLen, UInt64 value) +{ + char localBuffer[20]; + int cch = 0; + + do + { + localBuffer[cch++] = '0' + (value % 10); + value = value / 10; + } while (value); + + for (int i = 0; i < cch; i++) + { + pBuffer[(*pLen)++] = localBuffer[cch - i - 1]; + } + + pBuffer[(*pLen)++] = ','; + pBuffer[(*pLen)++] = ' '; +} +#endif // PROFILE_STARTUP + +static void UninitDLL() +{ +#ifdef PROFILE_STARTUP + char buffer[1024]; + + UInt32 len = 0; + + AppendInt64(buffer, &len, g_startupTimelineEvents[PROCESS_ATTACH_BEGIN]); + AppendInt64(buffer, &len, g_startupTimelineEvents[NONGC_INIT_COMPLETE]); + AppendInt64(buffer, &len, g_startupTimelineEvents[GC_INIT_COMPLETE]); + AppendInt64(buffer, &len, g_startupTimelineEvents[PROCESS_ATTACH_COMPLETE]); + + for (int i = 0; i < g_registerModuleCount; i++) + { + AppendInt64(buffer, &len, g_registerModuleTraces[i].Begin.QuadPart); + AppendInt64(buffer, &len, g_registerModuleTraces[i].End.QuadPart); + } + + buffer[len++] = '\n'; + + fwrite(buffer, len, 1, stdout); +#endif // PROFILE_STARTUP +} + +volatile bool g_processShutdownHasStarted = false; + +static void DllThreadDetach() +{ + // BEWARE: loader lock is held here! + + // Should have already received a call to FiberDetach for this thread's "home" fiber. + Thread* pCurrentThread = ThreadStore::GetCurrentThreadIfAvailable(); + if (pCurrentThread != NULL && !pCurrentThread->IsDetached()) + { + // Once shutdown starts, RuntimeThreadShutdown callbacks are ignored, implying that + // it is no longer guaranteed that exiting threads will be detached. + if (!g_processShutdownHasStarted) + { + ASSERT_UNCONDITIONALLY("Detaching thread whose home fiber has not been detached"); + RhFailFast(); + } + } +} + +void RuntimeThreadShutdown(void* thread) +{ + // Note: loader lock is normally *not* held here! + // The one exception is that the loader lock may be held during the thread shutdown callback + // that is made for the single thread that runs the final stages of orderly process + // shutdown (i.e., the thread that delivers the DLL_PROCESS_DETACH notifications when the + // process is being torn down via an ExitProcess call). + + UNREFERENCED_PARAMETER(thread); + + ASSERT((Thread*)thread == ThreadStore::GetCurrentThread()); + + if (!g_processShutdownHasStarted) + { + ThreadStore::DetachCurrentThread(); + } +} + +extern "C" bool RhInitialize() +{ + if (!PalInit()) + return false; + + if (!InitDLL(PalGetModuleHandleFromPointer((void*)&RhInitialize))) + return false; + + return true; +} + +COOP_PINVOKE_HELPER(void, RhpEnableConservativeStackReporting, ()) +{ + GetRuntimeInstance()->EnableConservativeStackReporting(); +} + +// +// Currently called only from a managed executable once Main returns, this routine does whatever is needed to +// cleanup managed state before exiting. There's not a lot here at the moment since we're always about to let +// the OS tear the process down anyway. +// +// @TODO: Eventually we'll probably have a hosting API and explicit shutdown request. When that happens we'll +// something more sophisticated here since we won't be able to rely on the OS cleaning up after us. +// +COOP_PINVOKE_HELPER(void, RhpShutdown, ()) +{ + // Indicate that runtime shutdown is complete and that the caller is about to start shutting down the entire process. + g_processShutdownHasStarted = true; +} + +#ifdef _WIN32 +EXTERN_C UInt32_BOOL WINAPI RtuDllMain(HANDLE hPalInstance, UInt32 dwReason, void* /*pvReserved*/) +{ + switch (dwReason) + { + case DLL_PROCESS_ATTACH: + { + STARTUP_TIMELINE_EVENT(PROCESS_ATTACH_BEGIN); + + if (!InitDLL(hPalInstance)) + return FALSE; + + STARTUP_TIMELINE_EVENT(PROCESS_ATTACH_COMPLETE); + } + break; + + case DLL_PROCESS_DETACH: + UninitDLL(); + break; + + case DLL_THREAD_DETACH: + DllThreadDetach(); + break; + } + + return TRUE; +} +#endif // _WIN32 + +#endif // !DACCESS_COMPILE diff --git a/src/coreclr/src/nativeaot/Runtime/stressLog.cpp b/src/coreclr/src/nativeaot/Runtime/stressLog.cpp new file mode 100644 index 0000000000000..9c1d2cd33ad10 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/stressLog.cpp @@ -0,0 +1,585 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// --------------------------------------------------------------------------- +// StressLog.cpp +// +// StressLog infrastructure +// --------------------------------------------------------------------------- + +#include "common.h" +#ifdef DACCESS_COMPILE +#include +#include "sospriv.h" +#endif // DACCESS_COMPILE +#include "CommonTypes.h" +#include "CommonMacros.h" +#include "PalRedhawkCommon.h" +#include "PalRedhawk.h" +#include "daccess.h" +#include "stressLog.h" +#include "holder.h" +#include "Crst.h" +#include "rhassert.h" +#include "slist.h" +#include "gcrhinterface.h" +#include "varint.h" +#include "regdisplay.h" +#include "StackFrameIterator.h" +#include "thread.h" +#include "RWLock.h" +#include "event.h" +#include "threadstore.h" +#include "threadstore.inl" + +template inline T VolatileLoad(T const * pt) { return *(T volatile const *)pt; } +template inline void VolatileStore(T* pt, T val) { *(T volatile *)pt = val; } + +#ifdef STRESS_LOG + +typedef DPTR(StressLog) PTR_StressLog; +GPTR_IMPL(StressLog, g_pStressLog /*, &StressLog::theLog*/); + +#ifndef DACCESS_COMPILE + +/*********************************************************************************/ +#if defined(HOST_X86) + +/* This is like QueryPerformanceCounter but a lot faster. On machines with + variable-speed CPUs (for power management), this is not accurate, but may + be good enough. +*/ +inline __declspec(naked) unsigned __int64 getTimeStamp() { + + __asm { + RDTSC // read time stamp counter + ret + }; +} + +#else // HOST_X86 +unsigned __int64 getTimeStamp() { + + LARGE_INTEGER ret; + ZeroMemory(&ret, sizeof(LARGE_INTEGER)); + + PalQueryPerformanceCounter(&ret); + + return ret.QuadPart; +} + +#endif // HOST_X86 else + +/*********************************************************************************/ +/* Get the the frequency corresponding to 'getTimeStamp'. For non-x86 + architectures, this is just the performance counter frequency. +*/ +unsigned __int64 getTickFrequency() +{ + LARGE_INTEGER ret; + ZeroMemory(&ret, sizeof(LARGE_INTEGER)); + PalQueryPerformanceFrequency(&ret); + return ret.QuadPart; +} + +#endif // DACCESS_COMPILE + +StressLog StressLog::theLog = { 0, 0, 0, 0, 0, 0 }; +const static unsigned __int64 RECYCLE_AGE = 0x40000000L; // after a billion cycles, we can discard old threads + +/*********************************************************************************/ + +#ifndef DACCESS_COMPILE + +void StressLog::Initialize(unsigned facilities, unsigned level, unsigned maxBytesPerThread, + unsigned maxBytesTotal, HANDLE hMod) +{ + if (theLog.MaxSizePerThread != 0) + { + // guard ourself against multiple initialization. First init wins. + return; + } + + g_pStressLog = &theLog; + + theLog.pLock = new (nothrow) CrstStatic(); + theLog.pLock->Init(CrstStressLog); + if (maxBytesPerThread < STRESSLOG_CHUNK_SIZE) + { + maxBytesPerThread = STRESSLOG_CHUNK_SIZE; + } + theLog.MaxSizePerThread = maxBytesPerThread; + + if (maxBytesTotal < STRESSLOG_CHUNK_SIZE * 256) + { + maxBytesTotal = STRESSLOG_CHUNK_SIZE * 256; + } + theLog.MaxSizeTotal = maxBytesTotal; + theLog.totalChunk = 0; + theLog.facilitiesToLog = facilities | LF_ALWAYS; + theLog.levelToLog = level; + theLog.deadCount = 0; + + theLog.tickFrequency = getTickFrequency(); + + PalGetSystemTimeAsFileTime (&theLog.startTime); + theLog.startTimeStamp = getTimeStamp(); + + theLog.moduleOffset = (size_t)hMod; // HMODULES are base addresses. +} + +/*********************************************************************************/ +/* create a new thread stress log buffer associated with pThread */ + +ThreadStressLog* StressLog::CreateThreadStressLog(Thread * pThread) { + + if (theLog.facilitiesToLog == 0) + return NULL; + + if (pThread == NULL) + pThread = ThreadStore::GetCurrentThread(); + + ThreadStressLog* msgs = reinterpret_cast(pThread->GetThreadStressLog()); + if (msgs != NULL) + { + return msgs; + } + + // if it looks like we won't be allowed to allocate a new chunk, exit early + if (VolatileLoad(&theLog.deadCount) == 0 && !AllowNewChunk (0)) + { + return NULL; + } + + CrstHolder holder(theLog.pLock); + + msgs = CreateThreadStressLogHelper(pThread); + + return msgs; +} + +ThreadStressLog* StressLog::CreateThreadStressLogHelper(Thread * pThread) { + + bool skipInsert = FALSE; + ThreadStressLog* msgs = NULL; + + // See if we can recycle a dead thread + if (VolatileLoad(&theLog.deadCount) > 0) + { + unsigned __int64 recycleStamp = getTimeStamp() - RECYCLE_AGE; + msgs = VolatileLoad(&theLog.logs); + //find out oldest dead ThreadStressLog in case we can't find one within + //recycle age but can't create a new chunk + ThreadStressLog * oldestDeadMsg = NULL; + + while(msgs != 0) + { + if (msgs->isDead) + { + bool hasTimeStamp = msgs->curPtr != (StressMsg *)msgs->chunkListTail->EndPtr(); + if (hasTimeStamp && msgs->curPtr->timeStamp < recycleStamp) + { + skipInsert = TRUE; + PalInterlockedDecrement(&theLog.deadCount); + break; + } + + if (!oldestDeadMsg) + { + oldestDeadMsg = msgs; + } + else if (hasTimeStamp && oldestDeadMsg->curPtr->timeStamp > msgs->curPtr->timeStamp) + { + oldestDeadMsg = msgs; + } + } + + msgs = msgs->next; + } + + //if the total stress log size limit is already passed and we can't add new chunk, + //always reuse the oldest dead msg + if (!AllowNewChunk (0) && !msgs) + { + msgs = oldestDeadMsg; + skipInsert = TRUE; + PalInterlockedDecrement(&theLog.deadCount); + } + } + + if (msgs == 0) { + msgs = new (nothrow) ThreadStressLog(); + + if (msgs == 0 ||!msgs->IsValid ()) + { + delete msgs; + msgs = 0; + goto LEAVE; + } + } + + msgs->Activate (pThread); + + if (!skipInsert) { +#ifdef _DEBUG + ThreadStressLog* walk = VolatileLoad(&theLog.logs); + while (walk) + { + _ASSERTE (walk != msgs); + walk = walk->next; + } +#endif + // Put it into the stress log + msgs->next = VolatileLoad(&theLog.logs); + VolatileStore(&theLog.logs, msgs); + } + +LEAVE: + ; + return msgs; +} + +/*********************************************************************************/ +/* static */ +void StressLog::ThreadDetach(ThreadStressLog *msgs) { + + if (msgs == 0) + { + return; + } + + // We should write this message to the StressLog for deleted fiber. + msgs->LogMsg (LF_STARTUP, 0, "******* DllMain THREAD_DETACH called Thread dying *******\n"); + + msgs->isDead = TRUE; + PalInterlockedIncrement(&theLog.deadCount); +} + +bool StressLog::AllowNewChunk (long numChunksInCurThread) +{ + _ASSERTE (numChunksInCurThread <= VolatileLoad(&theLog.totalChunk)); + UInt32 perThreadLimit = theLog.MaxSizePerThread; + + if (numChunksInCurThread == 0 /*&& IsSuspendEEThread()*/) + return TRUE; + + if (ThreadStore::GetCurrentThread()->IsGCSpecial()) + { + perThreadLimit *= GC_STRESSLOG_MULTIPLY; + } + + if ((UInt32)numChunksInCurThread * STRESSLOG_CHUNK_SIZE >= perThreadLimit) + { + return FALSE; + } + + return (UInt32)VolatileLoad(&theLog.totalChunk) * STRESSLOG_CHUNK_SIZE < theLog.MaxSizeTotal; +} + +bool StressLog::ReserveStressLogChunks (unsigned chunksToReserve) +{ + Thread *pThread = ThreadStore::GetCurrentThread(); + ThreadStressLog* msgs = reinterpret_cast(pThread->GetThreadStressLog()); + if (msgs == 0) + { + msgs = CreateThreadStressLog(pThread); + + if (msgs == 0) + return FALSE; + } + + if (chunksToReserve == 0) + { + chunksToReserve = (theLog.MaxSizePerThread + STRESSLOG_CHUNK_SIZE - 1) / STRESSLOG_CHUNK_SIZE; + } + + long numTries = (long)chunksToReserve - msgs->chunkListLength; + for (long i = 0; i < numTries; i++) + { + msgs->GrowChunkList (); + } + + return msgs->chunkListLength >= (long)chunksToReserve; +} + +/*********************************************************************************/ +/* fetch a buffer that can be used to write a stress message, it is thread safe */ + +void ThreadStressLog::LogMsg ( UInt32 facility, int cArgs, const char* format, va_list Args) +{ + + // Asserts in this function cause infinite loops in the asserting mechanism. + // Just use debug breaks instead. + + ASSERT( cArgs >= 0 && cArgs <= StressMsg::maxArgCnt ); + + size_t offs = ((size_t)format - StressLog::theLog.moduleOffset); + + ASSERT(offs < StressMsg::maxOffset); + if (offs >= StressMsg::maxOffset) + { + // Set it to this string instead. + offs = +#ifdef _DEBUG + (size_t)""; +#else // _DEBUG + 0; // a 0 offset is ignored by StressLog::Dump +#endif // _DEBUG else + } + + // Get next available slot + StressMsg* msg = AdvanceWrite(cArgs); + + msg->timeStamp = getTimeStamp(); + msg->facility = facility; + msg->formatOffset = offs; + msg->numberOfArgs = cArgs; + + for ( int i = 0; i < cArgs; ++i ) + { + void* data = va_arg(Args, void*); + msg->args[i] = data; + } + + ASSERT(IsValid() && threadId == PalGetCurrentThreadIdForLogging()); +} + + +void ThreadStressLog::Activate (Thread * pThread) +{ + _ASSERTE(pThread != NULL); + //there is no need to zero buffers because we could handle garbage contents + threadId = PalGetCurrentThreadIdForLogging(); + isDead = FALSE; + curWriteChunk = chunkListTail; + curPtr = (StressMsg *)curWriteChunk->EndPtr (); + writeHasWrapped = FALSE; + this->pThread = pThread; + ASSERT(pThread->IsCurrentThread()); +} + +/* static */ +void StressLog::LogMsg (unsigned facility, int cArgs, const char* format, ... ) +{ + _ASSERTE ( cArgs >= 0 && cArgs <= StressMsg::maxArgCnt ); + + va_list Args; + va_start(Args, format); + + Thread *pThread = ThreadStore::GetCurrentThread(); + if (pThread == NULL) + return; + + ThreadStressLog* msgs = reinterpret_cast(pThread->GetThreadStressLog()); + + if (msgs == 0) { + msgs = CreateThreadStressLog(pThread); + + if (msgs == 0) + return; + } + msgs->LogMsg (facility, cArgs, format, Args); +} + +#ifdef _DEBUG + +/* static */ +void StressLog::LogCallStack(const char *const callTag){ + + size_t CallStackTrace[MAX_CALL_STACK_TRACE]; + UInt32 hash; + unsigned short stackTraceCount = PalCaptureStackBackTrace (2, MAX_CALL_STACK_TRACE, (void**)CallStackTrace, &hash); + if (stackTraceCount > MAX_CALL_STACK_TRACE) + stackTraceCount = MAX_CALL_STACK_TRACE; + LogMsgOL("Start of %s stack \n", callTag); + unsigned short i = 0; + for (;i < stackTraceCount; i++) + { + LogMsgOL("(%s stack)%pK\n", callTag, CallStackTrace[i]); + } + LogMsgOL("End of %s stack\n", callTag); +} + +#endif //_DEBUG + +#else // DACCESS_COMPILE + +bool StressLog::Initialize() +{ + ThreadStressLog* logs = 0; + + ThreadStressLog* curThreadStressLog = this->logs; + unsigned __int64 lastTimeStamp = 0; // timestamp of last log entry + while(curThreadStressLog != 0) + { + if (!curThreadStressLog->IsReadyForRead()) + { + if (curThreadStressLog->origCurPtr == NULL) + curThreadStressLog->origCurPtr = curThreadStressLog->curPtr; + + // avoid repeated calls into this function + StressLogChunk * head = curThreadStressLog->chunkListHead; + StressLogChunk * curChunk = head; + bool curPtrInitialized = false; + do + { + if (!curChunk->IsValid ()) + { + // TODO: Report corrupt chunk PTR_HOST_TO_TADDR(curChunk) + } + + if (!curPtrInitialized && curChunk == curThreadStressLog->curWriteChunk) + { + // adjust curPtr to the debugger's address space + curThreadStressLog->curPtr = (StressMsg *)((UInt8 *)curChunk + ((UInt8 *)curThreadStressLog->curPtr - (UInt8 *)PTR_HOST_TO_TADDR(curChunk))); + curPtrInitialized = true; + } + + curChunk = curChunk->next; + } while (curChunk != head); + + if (!curPtrInitialized) + { + delete curThreadStressLog; + return false; + } + + // adjust readPtr and curPtr if needed + curThreadStressLog->Activate (NULL); + } + curThreadStressLog = curThreadStressLog->next; + } + return true; +} + +void StressLog::ResetForRead() +{ + ThreadStressLog* curThreadStressLog = this->logs; + while(curThreadStressLog != 0) + { + curThreadStressLog->readPtr = NULL; + curThreadStressLog->curPtr = curThreadStressLog->origCurPtr; + curThreadStressLog = curThreadStressLog->next; + } +} + +// Initialization of the ThreadStressLog when dumping the log +inline void ThreadStressLog::Activate (Thread * /*pThread*/) +{ + // avoid repeated calls into this function + if (IsReadyForRead()) + return; + + curReadChunk = curWriteChunk; + readPtr = curPtr; + readHasWrapped = false; + // the last written log, if it wrapped around may have partially overwritten + // a previous record. Update curPtr to reflect the last safe beginning of a record, + // but curPtr shouldn't wrap around, otherwise it'll break our assumptions about stress + // log + curPtr = (StressMsg*)((char*)curPtr - StressMsg::maxMsgSize()); + if (curPtr < (StressMsg*)curWriteChunk->StartPtr()) + { + curPtr = (StressMsg *)curWriteChunk->StartPtr(); + } + // corner case: the log is empty + if (readPtr == (StressMsg *)curReadChunk->EndPtr ()) + { + AdvReadPastBoundary(); + } +} + +ThreadStressLog* StressLog::FindLatestThreadLog() const +{ + const ThreadStressLog* latestLog = 0; + for (const ThreadStressLog* ptr = this->logs; ptr != NULL; ptr = ptr->next) + { + if (ptr->readPtr != NULL) + if (latestLog == 0 || ptr->readPtr->timeStamp > latestLog->readPtr->timeStamp) + latestLog = ptr; + } + return const_cast(latestLog); +} + +// Can't refer to the types in sospriv.h because it drags in windows.h +void StressLog::EnumerateStressMsgs(/*STRESSMSGCALLBACK*/void* smcbWrapper, /*ENDTHREADLOGCALLBACK*/void* etcbWrapper, void *token) +{ + STRESSMSGCALLBACK smcb = (STRESSMSGCALLBACK)smcbWrapper; + ENDTHREADLOGCALLBACK etcb = (ENDTHREADLOGCALLBACK) etcbWrapper; + void *argsCopy[StressMsg::maxArgCnt]; + + for (;;) + { + ThreadStressLog* latestLog = this->FindLatestThreadLog(); + + if (latestLog == 0) + { + break; + } + StressMsg* latestMsg = latestLog->readPtr; + if (latestMsg->formatOffset != 0 && !latestLog->CompletedDump()) + { + char format[256]; + TADDR taFmt = (latestMsg->formatOffset) + (TADDR)(this->moduleOffset); + HRESULT hr = DacReadAll(taFmt, format, _countof(format), false); + if (hr != S_OK) + strcpy_s(format, _countof(format), "Could not read address of format string"); + + double deltaTime = ((double) (latestMsg->timeStamp - this->startTimeStamp)) / this->tickFrequency; + + // Pass a copy of the args to the callback to avoid foreign code overwriting the stress log + // entries (this was the case for %s arguments) + memcpy_s(argsCopy, sizeof(argsCopy), latestMsg->args, (latestMsg->numberOfArgs)*sizeof(void*)); + + // @TODO: CORERT: Truncating threadId to 32-bit + if (!smcb((UINT32)latestLog->threadId, deltaTime, latestMsg->facility, format, argsCopy, token)) + break; + } + + latestLog->readPtr = latestLog->AdvanceRead(); + if (latestLog->CompletedDump()) + { + latestLog->readPtr = NULL; + + // @TODO: CORERT: Truncating threadId to 32-bit + if (!etcb((UINT32)latestLog->threadId, token)) + break; + } + } +} + +typedef DPTR(SIZE_T) PTR_SIZE_T; + +// Can't refer to the types in sospriv.h because it drags in windows.h +void StressLog::EnumStressLogMemRanges(/*STRESSLOGMEMRANGECALLBACK*/void* slmrcbWrapper, void *token) +{ + STRESSLOGMEMRANGECALLBACK slmrcb = (STRESSLOGMEMRANGECALLBACK)slmrcbWrapper; + + // we go to extreme lengths to ensure we don't read in the whole memory representation + // of the stress log, but only the ranges... + // + + size_t ThreadStressLogAddr = *dac_cast(PTR_HOST_MEMBER_TADDR(StressLog, this, logs)); + while (ThreadStressLogAddr != NULL) + { + size_t ChunkListHeadAddr = *dac_cast(ThreadStressLogAddr + offsetof(ThreadStressLog, chunkListHead)); + size_t StressLogChunkAddr = ChunkListHeadAddr; + + do + { + slmrcb(StressLogChunkAddr, sizeof (StressLogChunk), token); + StressLogChunkAddr = *dac_cast(StressLogChunkAddr + offsetof (StressLogChunk, next)); + if (StressLogChunkAddr == NULL) + { + return; + } + } while (StressLogChunkAddr != ChunkListHeadAddr); + + ThreadStressLogAddr = *dac_cast(ThreadStressLogAddr + offsetof(ThreadStressLog, next)); + } +} + + +#endif // !DACCESS_COMPILE + +#endif // STRESS_LOG + diff --git a/src/coreclr/src/nativeaot/Runtime/strongname.cpp b/src/coreclr/src/nativeaot/Runtime/strongname.cpp new file mode 100644 index 0000000000000..5177ec086b491 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/strongname.cpp @@ -0,0 +1,44 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// +// Unmanaged helpers for strong name parsing. +// + +#include "common.h" +#include "CommonTypes.h" +#include "CommonMacros.h" +#include "PalRedhawkCommon.h" +#include "PalRedhawk.h" +#include "sha1.h" + +// +// Converts a public key into a public key token, by computing the SHA1 of the public key, then taking the last 8 bytes in reverse order. +// +// The only legal value for "cbPublicKeyTokenOut" is 8 - this parameter exists as defense in depth. +// + +#define PUBLIC_KEY_TOKEN_LEN 8 + +COOP_PINVOKE_HELPER(void, RhConvertPublicKeyToPublicKeyToken, (const UInt8* pbPublicKey, int cbPublicKey, UInt8 *pbPublicKeyTokenOut, int cbPublicKeyTokenOut)) +{ + ASSERT(pbPublicKey != NULL); + ASSERT(pbPublicKeyTokenOut != NULL); + + if (cbPublicKeyTokenOut != PUBLIC_KEY_TOKEN_LEN) + { + RhFailFast(); + } + + SHA1Hash sha1; + sha1.AddData(pbPublicKey, cbPublicKey); + UInt8* pHash = sha1.GetHash(); + + for (int i = 0; i < PUBLIC_KEY_TOKEN_LEN; i++) + { + pbPublicKeyTokenOut[i] = pHash[SHA1_HASH_SIZE - i - 1]; + } + + return; +} + diff --git a/src/coreclr/src/nativeaot/Runtime/thread.cpp b/src/coreclr/src/nativeaot/Runtime/thread.cpp new file mode 100644 index 0000000000000..3a4414b9a35f4 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/thread.cpp @@ -0,0 +1,1425 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +#include "common.h" +#include "CommonTypes.h" +#include "CommonMacros.h" +#include "daccess.h" +#include "PalRedhawkCommon.h" +#include "PalRedhawk.h" +#include "rhassert.h" +#include "slist.h" +#include "gcrhinterface.h" +#include "varint.h" +#include "regdisplay.h" +#include "StackFrameIterator.h" +#include "thread.h" +#include "holder.h" +#include "Crst.h" +#include "event.h" +#include "RWLock.h" +#include "threadstore.h" +#include "threadstore.inl" +#include "thread.inl" +#include "RuntimeInstance.h" +#include "shash.h" +#include "rhbinder.h" +#include "stressLog.h" +#include "RhConfig.h" + +#ifndef DACCESS_COMPILE + +EXTERN_C REDHAWK_API void* REDHAWK_CALLCONV RhpHandleAlloc(void* pObject, int type); +EXTERN_C REDHAWK_API void REDHAWK_CALLCONV RhHandleSet(void* handle, void* pObject); +EXTERN_C REDHAWK_API void REDHAWK_CALLCONV RhHandleFree(void* handle); + +static int (*g_RuntimeInitializationCallback)(); +static Thread* g_RuntimeInitializingThread; + +#ifdef _MSC_VER +extern "C" void _ReadWriteBarrier(void); +#pragma intrinsic(_ReadWriteBarrier) +#else // _MSC_VER +#define _ReadWriteBarrier() __asm__ volatile("" : : : "memory") +#endif // _MSC_VER +#endif //!DACCESS_COMPILE + +PTR_VOID Thread::GetTransitionFrame() +{ + if (ThreadStore::GetSuspendingThread() == this) + { + // This thread is in cooperative mode, so we grab the transition frame + // from the 'tunnel' location, which will have the frame from the most + // recent 'cooperative pinvoke' transition that brought us here. + ASSERT(m_pHackPInvokeTunnel != NULL); + return m_pHackPInvokeTunnel; + } + + ASSERT(m_pCachedTransitionFrame != NULL); + return m_pCachedTransitionFrame; +} + +#ifndef DACCESS_COMPILE + +PTR_VOID Thread::GetTransitionFrameForStackTrace() +{ + ASSERT_MSG(ThreadStore::GetSuspendingThread() == NULL, "Not allowed when suspended for GC."); + ASSERT_MSG(this == ThreadStore::GetCurrentThread(), "Only supported for current thread."); + ASSERT(Thread::IsCurrentThreadInCooperativeMode()); + ASSERT(m_pHackPInvokeTunnel != NULL); + return m_pHackPInvokeTunnel; +} + +void Thread::WaitForSuspend() +{ + Unhijack(); + GetThreadStore()->WaitForSuspendComplete(); +} + +void Thread::WaitForGC(void * pTransitionFrame) +{ + ASSERT(!IsDoNotTriggerGcSet()); + + do + { + m_pTransitionFrame = pTransitionFrame; + + Unhijack(); + RedhawkGCInterface::WaitForGCCompletion(); + + m_pTransitionFrame = NULL; + + // We need to prevent compiler reordering between above write and below read. + _ReadWriteBarrier(); + } + while (ThreadStore::IsTrapThreadsRequested()); +} + +// +// This is used by the suspension code when driving all threads to unmanaged code. It is performed after +// the FlushProcessWriteBuffers call so that we know that once the thread reaches unmanaged code, it won't +// reenter managed code. Therefore, the m_pTransitionFrame is stable. Except that it isn't. The return-to- +// managed sequence will temporarily overwrite the m_pTransitionFrame to be 0. As a result, we need to cache +// the non-zero m_pTransitionFrame value that we saw during suspend so that stackwalks can read this value +// without concern of sometimes reading a 0, as would be the case if they read m_pTransitionFrame directly. +// +// Returns true if it sucessfully cached the transition frame (i.e. the thread was in unmanaged). +// Returns false otherwise. +// +bool Thread::CacheTransitionFrameForSuspend() +{ + if (m_pCachedTransitionFrame != NULL) + return true; + + PTR_VOID temp = m_pTransitionFrame; // volatile read + if (temp == NULL) + return false; + + m_pCachedTransitionFrame = temp; + return true; +} + +void Thread::ResetCachedTransitionFrame() +{ + // @TODO: I don't understand this assert because ResumeAllThreads is clearly written + // to be reseting other threads' cached transition frames. + + //ASSERT((ThreadStore::GetCurrentThreadIfAvailable() == this) || + // (m_pCachedTransitionFrame != NULL)); + m_pCachedTransitionFrame = NULL; +} + +// This function simulates a PInvoke transition using a frame pointer from somewhere further up the stack that +// was passed in via the m_pHackPInvokeTunnel field. It is used to allow us to grandfather-in the set of GC +// code that runs in cooperative mode without having to rewrite it in managed code. The result is that the +// code that calls into this special mode must spill preserved registers as if it's going to PInvoke, but +// record its transition frame pointer in m_pHackPInvokeTunnel and leave the thread in the cooperative +// mode. Later on, when this function is called, we effect the state transition to 'unmanaged' using the +// previously setup transition frame. +void Thread::EnablePreemptiveMode() +{ + ASSERT(ThreadStore::GetCurrentThread() == this); +#if !defined(HOST_WASM) + ASSERT(m_pHackPInvokeTunnel != NULL); +#endif + + Unhijack(); + + // ORDERING -- this write must occur before checking the trap + m_pTransitionFrame = m_pHackPInvokeTunnel; + + // We need to prevent compiler reordering between above write and below read. Both the read and the write + // are volatile, so it's possible that the particular semantic for volatile that MSVC provides is enough, + // but if not, this barrier would be required. If so, it won't change anything to add the barrier. + _ReadWriteBarrier(); + + if (ThreadStore::IsTrapThreadsRequested()) + { + WaitForSuspend(); + } +} + +void Thread::DisablePreemptiveMode() +{ + ASSERT(ThreadStore::GetCurrentThread() == this); + + // ORDERING -- this write must occur before checking the trap + m_pTransitionFrame = NULL; + + // We need to prevent compiler reordering between above write and below read. Both the read and the write + // are volatile, so it's possible that the particular semantic for volatile that MSVC provides is enough, + // but if not, this barrier would be required. If so, it won't change anything to add the barrier. + _ReadWriteBarrier(); + + if (ThreadStore::IsTrapThreadsRequested() && (this != ThreadStore::GetSuspendingThread())) + { + WaitForGC(m_pHackPInvokeTunnel); + } +} +#endif // !DACCESS_COMPILE + +bool Thread::IsCurrentThreadInCooperativeMode() +{ +#ifndef DACCESS_COMPILE + ASSERT(ThreadStore::GetCurrentThread() == this); +#endif // !DACCESS_COMPILE + return (m_pTransitionFrame == NULL); +} + +// +// This is used by the EH system to find the place where execution left managed code when an exception leaks out of a +// pinvoke and we need to FailFast via the appropriate class library. +// +// May only be used from the same thread and while in preemptive mode with an active pinvoke on the stack. +// +#ifndef DACCESS_COMPILE +void * Thread::GetCurrentThreadPInvokeReturnAddress() +{ + ASSERT(ThreadStore::GetCurrentThread() == this); + ASSERT(!IsCurrentThreadInCooperativeMode()); + return ((PInvokeTransitionFrame*)m_pTransitionFrame)->m_RIP; +} +#endif // !DACCESS_COMPILE + + + +PTR_UInt8 Thread::GetTEB() +{ + return m_pTEB; +} + +#ifndef DACCESS_COMPILE +void Thread::SetThreadStressLog(void * ptsl) +{ + m_pThreadStressLog = ptsl; +} +#endif // DACCESS_COMPILE + +PTR_VOID Thread::GetThreadStressLog() const +{ + return m_pThreadStressLog; +} + +#if defined(FEATURE_GC_STRESS) & !defined(DACCESS_COMPILE) +void Thread::SetRandomSeed(UInt32 seed) +{ + ASSERT(!IsStateSet(TSF_IsRandSeedSet)); + m_uRand = seed; + SetState(TSF_IsRandSeedSet); +} + +// Generates pseudo random numbers in the range [0, 2^31) +// using only multiplication and addition +UInt32 Thread::NextRand() +{ + // Uses Carta's algorithm for Park-Miller's PRNG: + // x_{k+1} = 16807 * x_{k} mod (2^31-1) + + UInt32 hi,lo; + + // (high word of seed) * 16807 - at most 31 bits + hi = 16807 * (m_uRand >> 16); + // (low word of seed) * 16807 - at most 31 bits + lo = 16807 * (m_uRand & 0xFFFF); + + // Proof that below operations (multiplication and addition only) + // are equivalent to the original formula: + // x_{k+1} = 16807 * x_{k} mod (2^31-1) + // We denote hi2 as the low 15 bits in hi, + // and hi1 as the remaining 16 bits in hi: + // (hi * 2^16 + lo) mod (2^31-1) = + // ((hi1 * 2^15 + hi2) * 2^16 + lo) mod (2^31-1) = + // ( hi1 * 2^31 + hi2 * 2^16 + lo) mod (2^31-1) = + // ( hi1 * (2^31-1) + hi1 + hi2 * 2^16 + lo) mod (2^31-1) = + // ( hi2 * 2^16 + hi1 + lo ) mod (2^31-1) + + // lo + (hi2 * 2^16) + lo += (hi & 0x7FFF) << 16; + // lo + (hi2 * 2^16) + hi1 + lo += (hi >> 15); + // modulo (2^31-1) + if (lo > 0x7fffFFFF) + lo -= 0x7fffFFFF; + + m_uRand = lo; + + return m_uRand; +} + +bool Thread::IsRandInited() +{ + return IsStateSet(TSF_IsRandSeedSet); +} +#endif // FEATURE_GC_STRESS & !DACCESS_COMPILE + +PTR_ExInfo Thread::GetCurExInfo() +{ + ValidateExInfoStack(); + return m_pExInfoStackHead; +} + +///////////////////////////////////////////////////////////////////////////////////////////////////////////// +///////////////////////////////////////////////////////////////////////////////////////////////////////////// +///////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#ifndef DACCESS_COMPILE + +void Thread::Construct() +{ +#ifndef USE_PORTABLE_HELPERS + C_ASSERT(OFFSETOF__Thread__m_pTransitionFrame == + (offsetof(Thread, m_pTransitionFrame))); +#endif // USE_PORTABLE_HELPERS + + m_numDynamicTypesTlsCells = 0; + m_pDynamicTypesTlsCells = NULL; + + m_pThreadLocalModuleStatics = NULL; + m_numThreadLocalModuleStatics = 0; + + // NOTE: We do not explicitly defer to the GC implementation to initialize the alloc_context. The + // alloc_context will be initialized to 0 via the static initialization of tls_CurrentThread. If the + // alloc_context ever needs different initialization, a matching change to the tls_CurrentThread + // static initialization will need to be made. + + m_uPalThreadIdForLogging = PalGetCurrentThreadIdForLogging(); + m_threadId.SetToCurrentThread(); + + HANDLE curProcessPseudo = PalGetCurrentProcess(); + HANDLE curThreadPseudo = PalGetCurrentThread(); + + // This can fail! Users of m_hPalThread must be able to handle INVALID_HANDLE_VALUE!! + PalDuplicateHandle(curProcessPseudo, curThreadPseudo, curProcessPseudo, &m_hPalThread, + 0, // ignored + FALSE, // inherit + DUPLICATE_SAME_ACCESS); + + if (!PalGetMaximumStackBounds(&m_pStackLow, &m_pStackHigh)) + RhFailFast(); + + m_pTEB = PalNtCurrentTeb(); + +#ifdef STRESS_LOG + if (StressLog::StressLogOn(~0u, 0)) + m_pThreadStressLog = StressLog::CreateThreadStressLog(this); +#endif // STRESS_LOG + + m_threadAbortException = NULL; +} + +bool Thread::IsInitialized() +{ + return (m_ThreadStateFlags != TSF_Unknown); +} + +// ----------------------------------------------------------------------------------------------------------- +// GC support APIs - do not use except from GC itself +// +void Thread::SetGCSpecial(bool isGCSpecial) +{ + if (!IsInitialized()) + Construct(); + if (isGCSpecial) + SetState(TSF_IsGcSpecialThread); + else + ClearState(TSF_IsGcSpecialThread); +} + +bool Thread::IsGCSpecial() +{ + return IsStateSet(TSF_IsGcSpecialThread); +} + +bool Thread::CatchAtSafePoint() +{ + // This is only called by the GC on a background GC worker thread that's explicitly interested in letting + // a foreground GC proceed at that point. So it's always safe to return true. + ASSERT(IsGCSpecial()); + return true; +} + +UInt64 Thread::GetPalThreadIdForLogging() +{ + return m_uPalThreadIdForLogging; +} + +bool Thread::IsCurrentThread() +{ + return m_threadId.IsCurrentThread(); +} + +void Thread::Destroy() +{ + if (m_hPalThread != INVALID_HANDLE_VALUE) + PalCloseHandle(m_hPalThread); + + if (m_pDynamicTypesTlsCells != NULL) + { + for (UInt32 i = 0; i < m_numDynamicTypesTlsCells; i++) + { + if (m_pDynamicTypesTlsCells[i] != NULL) + delete[] m_pDynamicTypesTlsCells[i]; + } + delete[] m_pDynamicTypesTlsCells; + } + + if (m_pThreadLocalModuleStatics != NULL) + { + for (UInt32 i = 0; i < m_numThreadLocalModuleStatics; i++) + { + if (m_pThreadLocalModuleStatics[i] != NULL) + { + RhHandleFree(m_pThreadLocalModuleStatics[i]); + } + } + delete[] m_pThreadLocalModuleStatics; + } + + RedhawkGCInterface::ReleaseAllocContext(GetAllocContext()); + + // Thread::Destroy is called when the thread's "home" fiber dies. We mark the thread as "detached" here + // so that we can validate, in our DLL_THREAD_DETACH handler, that the thread was already destroyed at that + // point. + SetDetached(); +} + +#ifdef HOST_WASM +extern RtuObjectRef * t_pShadowStackTop; +extern RtuObjectRef * t_pShadowStackBottom; + +void GcScanWasmShadowStack(void * pfnEnumCallback, void * pvCallbackData) +{ + // Wasm does not permit iteration of stack frames so is uses a shadow stack instead + RedhawkGCInterface::EnumGcRefsInRegionConservatively(t_pShadowStackBottom, t_pShadowStackTop, pfnEnumCallback, pvCallbackData); +} +#endif + +void Thread::GcScanRoots(void * pfnEnumCallback, void * pvCallbackData) +{ +#ifdef HOST_WASM + GcScanWasmShadowStack(pfnEnumCallback, pvCallbackData); +#else + StackFrameIterator frameIterator(this, GetTransitionFrame()); + GcScanRootsWorker(pfnEnumCallback, pvCallbackData, frameIterator); +#endif +} + +#endif // !DACCESS_COMPILE + +#ifdef DACCESS_COMPILE +// A trivial wrapper that unpacks the DacScanCallbackData and calls the callback provided to GcScanRoots +void GcScanRootsCallbackWrapper(PTR_RtuObjectRef ppObject, DacScanCallbackData* callbackData, UInt32 flags) +{ + Thread::GcScanRootsCallbackFunc * pfnUserCallback = (Thread::GcScanRootsCallbackFunc *)callbackData->pfnUserCallback; + pfnUserCallback(ppObject, callbackData->token, flags); +} + +bool Thread::GcScanRoots(GcScanRootsCallbackFunc * pfnEnumCallback, void * token, PTR_PAL_LIMITED_CONTEXT pInitialContext) +{ + DacScanCallbackData callbackDataWrapper; + callbackDataWrapper.thread_under_crawl = this; + callbackDataWrapper.promotion = true; + callbackDataWrapper.token = token; + callbackDataWrapper.pfnUserCallback = pfnEnumCallback; + //When debugging we might be trying to enumerate with or without a transition frame + //on top of the stack. If there is one use it, otherwise the debugger provides a set of initial registers + //to use. + PTR_VOID pTransitionFrame = GetTransitionFrame(); + if(pTransitionFrame != NULL) + { + StackFrameIterator frameIterator(this, GetTransitionFrame()); + GcScanRootsWorker(&GcScanRootsCallbackWrapper, &callbackDataWrapper, frameIterator); + } + else + { + if(pInitialContext == NULL) + return false; + StackFrameIterator frameIterator(this, pInitialContext); + GcScanRootsWorker(&GcScanRootsCallbackWrapper, &callbackDataWrapper, frameIterator); + } + return true; +} +#endif //DACCESS_COMPILE + +void Thread::GcScanRootsWorker(void * pfnEnumCallback, void * pvCallbackData, StackFrameIterator & frameIterator) +{ + PTR_RtuObjectRef pHijackedReturnValue = NULL; + GCRefKind returnValueKind = GCRK_Unknown; + + if (frameIterator.GetHijackedReturnValueLocation(&pHijackedReturnValue, &returnValueKind)) + { +#ifdef TARGET_ARM64 + GCRefKind reg0Kind = ExtractReg0ReturnKind(returnValueKind); + GCRefKind reg1Kind = ExtractReg1ReturnKind(returnValueKind); + + // X0 and X1 are saved next to each other in this order + if (reg0Kind != GCRK_Scalar) + { + RedhawkGCInterface::EnumGcRef(pHijackedReturnValue, reg0Kind, pfnEnumCallback, pvCallbackData); + } + if (reg1Kind != GCRK_Scalar) + { + RedhawkGCInterface::EnumGcRef(pHijackedReturnValue + 1, reg1Kind, pfnEnumCallback, pvCallbackData); + } +#else + RedhawkGCInterface::EnumGcRef(pHijackedReturnValue, returnValueKind, pfnEnumCallback, pvCallbackData); +#endif + } + +#ifndef DACCESS_COMPILE + if (GetRuntimeInstance()->IsConservativeStackReportingEnabled()) + { + if (frameIterator.IsValid()) + { + PTR_VOID pLowerBound = dac_cast(frameIterator.GetRegisterSet()->GetSP()); + + // Transition frame may contain callee saved registers that need to be reported as well + PTR_VOID pTransitionFrame = GetTransitionFrame(); + ASSERT(pTransitionFrame != NULL); + if (pTransitionFrame < pLowerBound) + pLowerBound = pTransitionFrame; + + PTR_VOID pUpperBound = m_pStackHigh; + + RedhawkGCInterface::EnumGcRefsInRegionConservatively( + dac_cast(pLowerBound), + dac_cast(pUpperBound), + pfnEnumCallback, + pvCallbackData); + } + } + else +#endif // !DACCESS_COMPILE + { + while (frameIterator.IsValid()) + { + frameIterator.CalculateCurrentMethodState(); + + STRESS_LOG1(LF_GCROOTS, LL_INFO1000, "Scanning method %pK\n", (void*)frameIterator.GetRegisterSet()->IP); + + if (!frameIterator.ShouldSkipRegularGcReporting()) + { + RedhawkGCInterface::EnumGcRefs(frameIterator.GetCodeManager(), + frameIterator.GetMethodInfo(), + frameIterator.GetEffectiveSafePointAddress(), + frameIterator.GetRegisterSet(), + pfnEnumCallback, + pvCallbackData); + } + + // Each enumerated frame (including the first one) may have an associated stack range we need to + // report conservatively (every pointer aligned value that looks like it might be a GC reference is + // reported as a pinned interior reference). This occurs in an edge case where a managed method whose + // signature the runtime is not aware of calls into the runtime which subsequently calls back out + // into managed code (allowing the possibility of a garbage collection). This can happen in certain + // interface invocation slow paths for instance. Since the original managed call may have passed GC + // references which are unreported by any managed method on the stack at the time of the GC we + // identify (again conservatively) the range of the stack that might contain these references and + // report everything. Since it should be a very rare occurrence indeed that we actually have to do + // this this, it's considered a better trade-off than storing signature metadata for every potential + // callsite of the type described above. + if (frameIterator.HasStackRangeToReportConservatively()) + { + PTR_RtuObjectRef pLowerBound; + PTR_RtuObjectRef pUpperBound; + frameIterator.GetStackRangeToReportConservatively(&pLowerBound, &pUpperBound); + RedhawkGCInterface::EnumGcRefsInRegionConservatively(pLowerBound, + pUpperBound, + pfnEnumCallback, + pvCallbackData); + } + + frameIterator.Next(); + } + } + + // ExInfos hold exception objects that are not reported by anyone else. In fact, sometimes they are in + // logically dead parts of the stack that the typical GC stackwalk skips. (This happens in the case where + // one exception dispatch superseded a previous one.) We keep them alive as long as they are in the + // ExInfo chain to aid in post-mortem debugging. SOS will access them through the DAC and the exported + // API, RhGetExceptionsForCurrentThread, will access them at runtime to gather additional information to + // add to a dump file during FailFast. + for (PTR_ExInfo curExInfo = GetCurExInfo(); curExInfo != NULL; curExInfo = curExInfo->m_pPrevExInfo) + { + PTR_RtuObjectRef pExceptionObj = dac_cast(&curExInfo->m_exception); + RedhawkGCInterface::EnumGcRef(pExceptionObj, GCRK_Object, pfnEnumCallback, pvCallbackData); + } + + // Keep alive the ThreadAbortException that's stored in the target thread during thread abort + PTR_RtuObjectRef pThreadAbortExceptionObj = dac_cast(&m_threadAbortException); + RedhawkGCInterface::EnumGcRef(pThreadAbortExceptionObj, GCRK_Object, pfnEnumCallback, pvCallbackData); +} + +#ifndef DACCESS_COMPILE + +#ifndef TARGET_ARM64 +EXTERN_C void FASTCALL RhpGcProbeHijackScalar(); +EXTERN_C void FASTCALL RhpGcProbeHijackObject(); +EXTERN_C void FASTCALL RhpGcProbeHijackByref(); + +static void* NormalHijackTargets[3] = +{ + reinterpret_cast(RhpGcProbeHijackScalar), // GCRK_Scalar = 0, + reinterpret_cast(RhpGcProbeHijackObject), // GCRK_Object = 1, + reinterpret_cast(RhpGcProbeHijackByref) // GCRK_Byref = 2, +}; +#else // TARGET_ARM64 +EXTERN_C void FASTCALL RhpGcProbeHijack(); + +static void* NormalHijackTargets[1] = +{ + reinterpret_cast(RhpGcProbeHijack) +}; +#endif // TARGET_ARM64 + +#ifdef FEATURE_GC_STRESS +#ifndef TARGET_ARM64 +EXTERN_C void FASTCALL RhpGcStressHijackScalar(); +EXTERN_C void FASTCALL RhpGcStressHijackObject(); +EXTERN_C void FASTCALL RhpGcStressHijackByref(); + +static void* GcStressHijackTargets[3] = +{ + reinterpret_cast(RhpGcStressHijackScalar), // GCRK_Scalar = 0, + reinterpret_cast(RhpGcStressHijackObject), // GCRK_Object = 1, + reinterpret_cast(RhpGcStressHijackByref) // GCRK_Byref = 2, +}; +#else // TARGET_ARM64 +EXTERN_C void FASTCALL RhpGcStressHijack(); + +static void* GcStressHijackTargets[1] = +{ + reinterpret_cast(RhpGcStressHijack) +}; +#endif // TARGET_ARM64 +#endif // FEATURE_GC_STRESS + +// static +bool Thread::IsHijackTarget(void * address) +{ + for (int i = 0; i < COUNTOF(NormalHijackTargets); i++) + { + if (NormalHijackTargets[i] == address) + return true; + } +#ifdef FEATURE_GC_STRESS + for (int i = 0; i < COUNTOF(GcStressHijackTargets); i++) + { + if (GcStressHijackTargets[i] == address) + return true; + } +#endif // FEATURE_GC_STRESS + return false; +} + +bool Thread::Hijack() +{ + ASSERT(ThreadStore::GetCurrentThread() == ThreadStore::GetSuspendingThread()); + + ASSERT_MSG(ThreadStore::GetSuspendingThread() != this, "You may not hijack a thread from itself."); + + if (m_hPalThread == INVALID_HANDLE_VALUE) + { + // cannot proceed + return false; + } + + // requires THREAD_SUSPEND_RESUME / THREAD_GET_CONTEXT / THREAD_SET_CONTEXT permissions + + return PalHijack(m_hPalThread, HijackCallback, this) == 0; +} + +UInt32_BOOL Thread::HijackCallback(HANDLE /*hThread*/, PAL_LIMITED_CONTEXT* pThreadContext, void* pCallbackContext) +{ + Thread* pThread = (Thread*) pCallbackContext; + + // + // WARNING: The hijack operation will take a read lock on the RuntimeInstance's module list. + // (This is done to find a Module based on an IP.) Therefore, if the thread we've just + // suspended owns the write lock on the module list, we'll deadlock with it when we try to + // take the read lock below. So we must attempt a non-blocking acquire of the read lock + // early and fail the hijack if we can't get it. This will cause us to simply retry later. + // + if (GetRuntimeInstance()->m_ModuleListLock.DangerousTryPulseReadLock()) + { + if (pThread->CacheTransitionFrameForSuspend()) + { + // IMPORTANT: GetThreadContext should not be trusted arbitrarily. We are careful here to recheck + // the thread's state flag that indicates whether or not it has made it to unmanaged code. If + // it has reached unmanaged code (even our own wait helper routines), then we cannot trust the + // context returned by it. This is due to various races that occur updating the reported context + // during syscalls. + return TRUE; + } + else + { + return pThread->InternalHijack(pThreadContext, NormalHijackTargets) ? TRUE : FALSE; + } + } + + return FALSE; +} + +#ifdef FEATURE_GC_STRESS +// This is a helper called from RhpHijackForGcStress which will place a GC Stress +// hijack on this thread's call stack. This is never called from another thread. +// static +void Thread::HijackForGcStress(PAL_LIMITED_CONTEXT * pSuspendCtx) +{ + Thread * pCurrentThread = ThreadStore::GetCurrentThread(); + + // don't hijack for GC stress if we're in a "no GC stress" region + if (pCurrentThread->IsSuppressGcStressSet()) + return; + + RuntimeInstance * pInstance = GetRuntimeInstance(); + + UIntNative ip = pSuspendCtx->GetIp(); + + bool bForceGC = g_pRhConfig->GetGcStressThrottleMode() == 0; + // we enable collecting statistics by callsite even for stochastic-only + // stress mode. this will force a stack walk, but it's worthwhile for + // collecting data (we only actually need the IP when + // (g_pRhConfig->GetGcStressThrottleMode() & 1) != 0) + if (!bForceGC) + { + StackFrameIterator sfi(pCurrentThread, pSuspendCtx); + if (sfi.IsValid()) + { + pCurrentThread->Unhijack(); + sfi.CalculateCurrentMethodState(); + // unwind to method below the one whose epilog set up the hijack + sfi.Next(); + if (sfi.IsValid()) + { + ip = sfi.GetRegisterSet()->GetIP(); + } + } + } + if (bForceGC || pInstance->ShouldHijackCallsiteForGcStress(ip)) + { + pCurrentThread->InternalHijack(pSuspendCtx, GcStressHijackTargets); + } +} +#endif // FEATURE_GC_STRESS + +// This function is called in one of two scenarios: +// 1) from a thread to place a return hijack onto its own stack. This is only done for GC stress cases +// via Thread::HijackForGcStress above. +// 2) from another thread to place a return hijack onto this thread's stack. In this case the target +// thread is OS suspended someplace in managed code. The only constraint on the suspension is that the +// stack be crawlable enough to yield the location of the return address. +bool Thread::InternalHijack(PAL_LIMITED_CONTEXT * pSuspendCtx, void * pvHijackTargets[]) +{ + bool fSuccess = false; + + if (IsDoNotTriggerGcSet()) + return false; + + StackFrameIterator frameIterator(this, pSuspendCtx); + + if (frameIterator.IsValid()) + { + frameIterator.CalculateCurrentMethodState(); + + frameIterator.GetCodeManager()->UnsynchronizedHijackMethodLoops(frameIterator.GetMethodInfo()); + + PTR_PTR_VOID ppvRetAddrLocation; + GCRefKind retValueKind; + + if (frameIterator.GetCodeManager()->GetReturnAddressHijackInfo(frameIterator.GetMethodInfo(), + frameIterator.GetRegisterSet(), + &ppvRetAddrLocation, + &retValueKind)) + { + // ARM64 epilogs have a window between loading the hijackable return address into LR and the RET instruction. + // We cannot hijack or unhijack a thread while it is suspended in that window unless we implement hijacking + // via LR register modification. Therefore it is important to check our ability to hijack the thread before + // unhijacking it. + CrossThreadUnhijack(); + + void* pvRetAddr = *ppvRetAddrLocation; + ASSERT(ppvRetAddrLocation != NULL); + ASSERT(pvRetAddr != NULL); + + ASSERT(StackFrameIterator::IsValidReturnAddress(pvRetAddr)); + + m_ppvHijackedReturnAddressLocation = ppvRetAddrLocation; + m_pvHijackedReturnAddress = pvRetAddr; +#ifdef TARGET_ARM64 + m_uHijackedReturnValueFlags = ReturnKindToTransitionFrameFlags(retValueKind); + *ppvRetAddrLocation = pvHijackTargets[0]; +#else + void* pvHijackTarget = pvHijackTargets[retValueKind]; + ASSERT_MSG(IsHijackTarget(pvHijackTarget), "unexpected method used as hijack target"); + *ppvRetAddrLocation = pvHijackTarget; +#endif + fSuccess = true; + } + } + + STRESS_LOG3(LF_STACKWALK, LL_INFO10000, "InternalHijack: TgtThread = %llx, IP = %p, result = %d\n", + GetPalThreadIdForLogging(), pSuspendCtx->GetIp(), fSuccess); + + return fSuccess; +} + +// This is the standard Unhijack, which is only allowed to be called on your own thread. +// Note that all the asm-implemented Unhijacks should also only be operating on their +// own thread. +void Thread::Unhijack() +{ + ASSERT(ThreadStore::GetCurrentThread() == this); + UnhijackWorker(); +} + +// This unhijack routine is only called from Thread::InternalHijack() to undo a possibly existing +// hijack before placing a new one. Although there are many code sequences (here and in asm) to +// perform an unhijack operation, they will never execute concurrently. A thread may unhijack itself +// at any time so long as it does so from unmanaged code. This ensures that another thread will not +// suspend it and attempt to unhijack it, since we only suspend threads that are executing managed +// code. +void Thread::CrossThreadUnhijack() +{ + ASSERT((ThreadStore::GetCurrentThread() == this) || DebugIsSuspended()); + UnhijackWorker(); +} + +// This is the hijack worker routine which merely implements the hijack mechanism. +// DO NOT USE DIRECTLY. Use Unhijack() or CrossThreadUnhijack() instead. +void Thread::UnhijackWorker() +{ + if (m_pvHijackedReturnAddress == NULL) + { + ASSERT(m_ppvHijackedReturnAddressLocation == NULL); + return; + } + + // Restore the original return address. + ASSERT(m_ppvHijackedReturnAddressLocation != NULL); + *m_ppvHijackedReturnAddressLocation = m_pvHijackedReturnAddress; + + // Clear the hijack state. + m_ppvHijackedReturnAddressLocation = NULL; + m_pvHijackedReturnAddress = NULL; +#ifdef TARGET_ARM64 + m_uHijackedReturnValueFlags = 0; +#endif +} + +#if _DEBUG +bool Thread::DebugIsSuspended() +{ + ASSERT(ThreadStore::GetCurrentThread() != this); +#if 0 + PalSuspendThread(m_hPalThread); + UInt32 suspendCount = PalResumeThread(m_hPalThread); + return (suspendCount > 0); +#else + // @TODO: I don't trust the above implementation, so I want to implement this myself + // by marking the thread state as "yes, we suspended it" and checking that state here. + return true; +#endif +} +#endif + +// @TODO: it would be very, very nice if we did not have to bleed knowledge of hijacking +// and hijack state to other components in the runtime. For now, these are only used +// when getting EH info during exception dispatch. We should find a better way to encapsulate +// this. +bool Thread::IsHijacked() +{ + // Note: this operation is only valid from the current thread. If one thread invokes + // this on another then it may be racing with other changes to the thread's hijack state. + ASSERT(ThreadStore::GetCurrentThread() == this); + + return m_pvHijackedReturnAddress != NULL; +} + +// +// WARNING: This method must ONLY be called during stackwalks when we believe that all threads are +// synchronized and there is no other thread racing with us trying to apply hijacks. +// +bool Thread::DangerousCrossThreadIsHijacked() +{ + // If we have a CachedTransitionFrame available, then we're in the proper state. Otherwise, this method + // was called from an improper state. + ASSERT(GetTransitionFrame() != NULL); + return m_pvHijackedReturnAddress != NULL; +} + +void * Thread::GetHijackedReturnAddress() +{ + // Note: this operation is only valid from the current thread. If one thread invokes + // this on another then it may be racing with other changes to the thread's hijack state. + ASSERT(IsHijacked()); + ASSERT(ThreadStore::GetCurrentThread() == this); + + return m_pvHijackedReturnAddress; +} + +void * Thread::GetUnhijackedReturnAddress(void ** ppvReturnAddressLocation) +{ + ASSERT(ThreadStore::GetCurrentThread() == this); + + void * pvReturnAddress; + if (m_ppvHijackedReturnAddressLocation == ppvReturnAddressLocation) + pvReturnAddress = m_pvHijackedReturnAddress; + else + pvReturnAddress = *ppvReturnAddressLocation; + + ASSERT(NULL != GetRuntimeInstance()->FindCodeManagerByAddress(pvReturnAddress)); + return pvReturnAddress; +} + +void Thread::SetState(ThreadStateFlags flags) +{ + PalInterlockedOr(&m_ThreadStateFlags, flags); +} + +void Thread::ClearState(ThreadStateFlags flags) +{ + PalInterlockedAnd(&m_ThreadStateFlags, ~flags); +} + +bool Thread::IsStateSet(ThreadStateFlags flags) +{ + return ((m_ThreadStateFlags & flags) == (UInt32) flags); +} + +bool Thread::IsSuppressGcStressSet() +{ + return IsStateSet(TSF_SuppressGcStress); +} + +void Thread::SetSuppressGcStress() +{ + ASSERT(!IsStateSet(TSF_SuppressGcStress)); + SetState(TSF_SuppressGcStress); +} + +void Thread::ClearSuppressGcStress() +{ + ASSERT(IsStateSet(TSF_SuppressGcStress)); + ClearState(TSF_SuppressGcStress); +} + +#endif //!DACCESS_COMPILE + +#ifndef DACCESS_COMPILE +#ifdef FEATURE_GC_STRESS +#ifdef HOST_X86 // the others are implemented in assembly code to avoid trashing the argument registers +EXTERN_C void FASTCALL RhpSuppressGcStress() +{ + ThreadStore::GetCurrentThread()->SetSuppressGcStress(); +} +#endif // HOST_X86 + +EXTERN_C void FASTCALL RhpUnsuppressGcStress() +{ + ThreadStore::GetCurrentThread()->ClearSuppressGcStress(); +} +#else +EXTERN_C void FASTCALL RhpSuppressGcStress() +{ +} +EXTERN_C void FASTCALL RhpUnsuppressGcStress() +{ +} +#endif // FEATURE_GC_STRESS + +// Standard calling convention variant and actual implementation for RhpWaitForSuspend +EXTERN_C NOINLINE void FASTCALL RhpWaitForSuspend2() +{ + // The wait operation below may trash the last win32 error. We save the error here so that it can be + // restored after the wait operation; + Int32 lastErrorOnEntry = PalGetLastError(); + + ThreadStore::GetCurrentThread()->WaitForSuspend(); + + // Restore the saved error + PalSetLastError(lastErrorOnEntry); +} + +// Standard calling convention variant and actual implementation for RhpWaitForGC +EXTERN_C NOINLINE void FASTCALL RhpWaitForGC2(PInvokeTransitionFrame * pFrame) +{ + + Thread * pThread = pFrame->m_pThread; + + if (pThread->IsDoNotTriggerGcSet()) + return; + + // The wait operation below may trash the last win32 error. We save the error here so that it can be + // restored after the wait operation; + Int32 lastErrorOnEntry = PalGetLastError(); + + pThread->WaitForGC(pFrame); + + // Restore the saved error + PalSetLastError(lastErrorOnEntry); +} + +void Thread::PushExInfo(ExInfo * pExInfo) +{ + ValidateExInfoStack(); + + pExInfo->m_pPrevExInfo = m_pExInfoStackHead; + m_pExInfoStackHead = pExInfo; +} + +void Thread::ValidateExInfoPop(ExInfo * pExInfo, void * limitSP) +{ +#ifdef _DEBUG + ValidateExInfoStack(); + ASSERT_MSG(pExInfo == m_pExInfoStackHead, "not popping the head element"); + pExInfo = pExInfo->m_pPrevExInfo; + + while (pExInfo && pExInfo < limitSP) + { + ASSERT_MSG(pExInfo->m_kind & EK_SupersededFlag, "popping a non-superseded ExInfo"); + pExInfo = pExInfo->m_pPrevExInfo; + } +#else + UNREFERENCED_PARAMETER(pExInfo); + UNREFERENCED_PARAMETER(limitSP); +#endif // _DEBUG +} + +COOP_PINVOKE_HELPER(void, RhpValidateExInfoPop, (Thread * pThread, ExInfo * pExInfo, void * limitSP)) +{ + pThread->ValidateExInfoPop(pExInfo, limitSP); +} + +bool Thread::IsDoNotTriggerGcSet() +{ + return IsStateSet(TSF_DoNotTriggerGc); +} + +void Thread::SetDoNotTriggerGc() +{ + ASSERT(!IsStateSet(TSF_DoNotTriggerGc)); + SetState(TSF_DoNotTriggerGc); +} + +void Thread::ClearDoNotTriggerGc() +{ + // Allowing unmatched clears simplifies the EH dispatch code, so we do not assert anything here. + ClearState(TSF_DoNotTriggerGc); +} + +bool Thread::IsDetached() +{ + return IsStateSet(TSF_Detached); +} + +void Thread::SetDetached() +{ + ASSERT(!IsStateSet(TSF_Detached)); + SetState(TSF_Detached); +} + +#endif // !DACCESS_COMPILE + +void Thread::ValidateExInfoStack() +{ +#ifndef DACCESS_COMPILE +#ifdef _DEBUG + ExInfo temp; + + ExInfo* pCur = m_pExInfoStackHead; + while (pCur) + { + ASSERT_MSG((this != ThreadStore::GetCurrentThread()) || (pCur > &temp), "an entry in the ExInfo chain points into dead stack"); + ASSERT_MSG(pCur < m_pStackHigh, "an entry in the ExInfo chain isn't on this stack"); + pCur = pCur->m_pPrevExInfo; + } +#endif // _DEBUG +#endif // !DACCESS_COMPILE +} + + + +// Retrieve the start of the TLS storage block allocated for the given thread for a specific module identified +// by the TLS slot index allocated to that module and the offset into the OS allocated block at which +// Redhawk-specific data is stored. +PTR_UInt8 Thread::GetThreadLocalStorage(UInt32 uTlsIndex, UInt32 uTlsStartOffset) +{ +#if 0 + return (*(UInt8***)(m_pTEB + OFFSETOF__TEB__ThreadLocalStoragePointer))[uTlsIndex] + uTlsStartOffset; +#else + return (*dac_cast(dac_cast(m_pTEB) + OFFSETOF__TEB__ThreadLocalStoragePointer))[uTlsIndex] + uTlsStartOffset; +#endif +} + +PTR_UInt8 Thread::GetThreadLocalStorageForDynamicType(UInt32 uTlsTypeOffset) +{ + // Note: When called from GC root enumeration, no changes can be made by the AllocateThreadLocalStorageForDynamicType to + // the 2 variables accessed here because AllocateThreadLocalStorageForDynamicType is called in cooperative mode. + + uTlsTypeOffset &= ~DYNAMIC_TYPE_TLS_OFFSET_FLAG; + return dac_cast(uTlsTypeOffset < m_numDynamicTypesTlsCells ? m_pDynamicTypesTlsCells[uTlsTypeOffset] : NULL); +} + +#ifndef DACCESS_COMPILE +PTR_UInt8 Thread::AllocateThreadLocalStorageForDynamicType(UInt32 uTlsTypeOffset, UInt32 tlsStorageSize, UInt32 numTlsCells) +{ + uTlsTypeOffset &= ~DYNAMIC_TYPE_TLS_OFFSET_FLAG; + + if (m_pDynamicTypesTlsCells == NULL || m_numDynamicTypesTlsCells <= uTlsTypeOffset) + { + // Keep at least a 2x grow so that we don't have to reallocate everytime a new type with TLS statics is created + if (numTlsCells < 2 * m_numDynamicTypesTlsCells) + numTlsCells = 2 * m_numDynamicTypesTlsCells; + + PTR_UInt8* pTlsCells = new (nothrow) PTR_UInt8[numTlsCells]; + if (pTlsCells == NULL) + return NULL; + + memset(&pTlsCells[m_numDynamicTypesTlsCells], 0, sizeof(PTR_UInt8) * (numTlsCells - m_numDynamicTypesTlsCells)); + + if (m_pDynamicTypesTlsCells != NULL) + { + memcpy(pTlsCells, m_pDynamicTypesTlsCells, sizeof(PTR_UInt8) * m_numDynamicTypesTlsCells); + delete[] m_pDynamicTypesTlsCells; + } + + m_pDynamicTypesTlsCells = pTlsCells; + m_numDynamicTypesTlsCells = numTlsCells; + } + + ASSERT(uTlsTypeOffset < m_numDynamicTypesTlsCells); + + if (m_pDynamicTypesTlsCells[uTlsTypeOffset] == NULL) + { + UInt8* pTlsStorage = new (nothrow) UInt8[tlsStorageSize]; + if (pTlsStorage == NULL) + return NULL; + + // Initialize storage to 0's before returning it + memset(pTlsStorage, 0, tlsStorageSize); + + m_pDynamicTypesTlsCells[uTlsTypeOffset] = pTlsStorage; + } + + return m_pDynamicTypesTlsCells[uTlsTypeOffset]; +} + +#ifndef TARGET_UNIX +EXTERN_C REDHAWK_API UInt32 __cdecl RhCompatibleReentrantWaitAny(UInt32_BOOL alertable, UInt32 timeout, UInt32 count, HANDLE* pHandles) +{ + return PalCompatibleWaitAny(alertable, timeout, count, pHandles, /*allowReentrantWait:*/ TRUE); +} +#endif // TARGET_UNIX + +FORCEINLINE bool Thread::InlineTryFastReversePInvoke(ReversePInvokeFrame * pFrame) +{ + // Do we need to attach the thread? + if (!IsStateSet(TSF_Attached)) + return false; // thread is not attached + + // If the thread is already in cooperative mode, this is a bad transition that will be a fail fast unless we are in + // a do not trigger mode. The exception to the rule allows us to have [UnmanagedCallersOnly] methods that are called via + // the "restricted GC callouts" as well as from native, which is necessary because the methods are CCW vtable + // methods on interfaces passed to native. + if (IsCurrentThreadInCooperativeMode()) + { + if (IsDoNotTriggerGcSet()) + { + // RhpTrapThreads will always be set in this case, so we must skip that check. We must be sure to + // zero-out our 'previous transition frame' state first, however. + pFrame->m_savedPInvokeTransitionFrame = NULL; + return true; + } + + return false; // bad transition + } + + // save the previous transition frame + pFrame->m_savedPInvokeTransitionFrame = m_pTransitionFrame; + + // set our mode to cooperative + m_pTransitionFrame = NULL; + + // We need to prevent compiler reordering between above write and below read. + _ReadWriteBarrier(); + + // now check if we need to trap the thread + if (ThreadStore::IsTrapThreadsRequested()) + { + // put the previous frame back (sets us back to preemptive mode) + m_pTransitionFrame = pFrame->m_savedPInvokeTransitionFrame; + return false; // need to trap the thread + } + + return true; +} + +EXTERN_C void RhSetRuntimeInitializationCallback(int (*fPtr)()) +{ + g_RuntimeInitializationCallback = fPtr; +} + +void Thread::ReversePInvokeAttachOrTrapThread(ReversePInvokeFrame * pFrame) +{ + if (!IsStateSet(TSF_Attached)) + { + if (g_RuntimeInitializationCallback != NULL && g_RuntimeInitializingThread != this) + { + EnsureRuntimeInitialized(); + } + + ThreadStore::AttachCurrentThread(); + } + + // If the thread is already in cooperative mode, this is a bad transition. + if (IsCurrentThreadInCooperativeMode()) + { + // The TSF_DoNotTriggerGc mode is handled by the fast path (InlineTryFastReversePInvoke or equivalent assembly code) + ASSERT(!IsDoNotTriggerGcSet()); + + // The platform specific assembly PInvoke helpers will route this fault to the class library inferred from the return + // address for nicer error reporting. For configurations without assembly helpers, we are going to fail fast without + // going through the class library here. + // RhpReversePInvokeBadTransition(return address); + RhFailFast(); + } + + // save the previous transition frame + pFrame->m_savedPInvokeTransitionFrame = m_pTransitionFrame; + + // set our mode to cooperative + m_pTransitionFrame = NULL; + + // We need to prevent compiler reordering between above write and below read. + _ReadWriteBarrier(); + + // now check if we need to trap the thread + if (ThreadStore::IsTrapThreadsRequested()) + { + WaitForGC(pFrame->m_savedPInvokeTransitionFrame); + } +} + +void Thread::EnsureRuntimeInitialized() +{ + while (PalInterlockedCompareExchangePointer((void *volatile *)&g_RuntimeInitializingThread, this, NULL) != NULL) + { + PalSleep(1); + } + + if (g_RuntimeInitializationCallback != NULL) + { + if (g_RuntimeInitializationCallback() != 0) + RhFailFast(); + + g_RuntimeInitializationCallback = NULL; + } + + PalInterlockedExchangePointer((void *volatile *)&g_RuntimeInitializingThread, NULL); +} + +FORCEINLINE void Thread::InlineReversePInvokeReturn(ReversePInvokeFrame * pFrame) +{ + m_pTransitionFrame = pFrame->m_savedPInvokeTransitionFrame; + if (ThreadStore::IsTrapThreadsRequested()) + { + RhpWaitForSuspend2(); + } +} + +FORCEINLINE void Thread::InlinePInvoke(PInvokeTransitionFrame * pFrame) +{ + pFrame->m_pThread = this; + // set our mode to preemptive + m_pTransitionFrame = pFrame; + + // We need to prevent compiler reordering between above write and below read. + _ReadWriteBarrier(); + + // now check if we need to trap the thread + if (ThreadStore::IsTrapThreadsRequested()) + { + RhpWaitForSuspend2(); + } +} + +FORCEINLINE void Thread::InlinePInvokeReturn(PInvokeTransitionFrame * pFrame) +{ + m_pTransitionFrame = NULL; + if (ThreadStore::IsTrapThreadsRequested()) + { + RhpWaitForGC2(pFrame); + } +} + +Object * Thread::GetThreadAbortException() +{ + return m_threadAbortException; +} + +void Thread::SetThreadAbortException(Object *exception) +{ + m_threadAbortException = exception; +} + +COOP_PINVOKE_HELPER(Object *, RhpGetThreadAbortException, ()) +{ + Thread * pCurThread = ThreadStore::RawGetCurrentThread(); + return pCurThread->GetThreadAbortException(); +} + +Object* Thread::GetThreadStaticStorageForModule(UInt32 moduleIndex) +{ + // Return a pointer to the TLS storage if it has already been + // allocated for the specified module. + if (moduleIndex < m_numThreadLocalModuleStatics) + { + Object** threadStaticsStorageHandle = (Object**)m_pThreadLocalModuleStatics[moduleIndex]; + if (threadStaticsStorageHandle != NULL) + { + return *threadStaticsStorageHandle; + } + } + + return NULL; +} + +Boolean Thread::SetThreadStaticStorageForModule(Object * pStorage, UInt32 moduleIndex) +{ + // Grow thread local storage if needed. + if (m_numThreadLocalModuleStatics <= moduleIndex) + { + UInt32 newSize = moduleIndex + 1; + if (newSize < moduleIndex) + { + return FALSE; + } + + PTR_PTR_VOID pThreadLocalModuleStatics = new (nothrow) PTR_VOID[newSize]; + if (pThreadLocalModuleStatics == NULL) + { + return FALSE; + } + + memset(&pThreadLocalModuleStatics[m_numThreadLocalModuleStatics], 0, sizeof(PTR_VOID) * (newSize - m_numThreadLocalModuleStatics)); + + if (m_pThreadLocalModuleStatics != NULL) + { + memcpy(pThreadLocalModuleStatics, m_pThreadLocalModuleStatics, sizeof(PTR_VOID) * m_numThreadLocalModuleStatics); + delete[] m_pThreadLocalModuleStatics; + } + + m_pThreadLocalModuleStatics = pThreadLocalModuleStatics; + m_numThreadLocalModuleStatics = newSize; + } + + if (m_pThreadLocalModuleStatics[moduleIndex] != NULL) + { + RhHandleSet(m_pThreadLocalModuleStatics[moduleIndex], pStorage); + } + else + { + void* threadStaticsStorageHandle = RhpHandleAlloc(pStorage, 2 /* Normal */); + if (threadStaticsStorageHandle == NULL) + { + return FALSE; + } + m_pThreadLocalModuleStatics[moduleIndex] = threadStaticsStorageHandle; + } + + return TRUE; +} + +COOP_PINVOKE_HELPER(Object*, RhGetThreadStaticStorageForModule, (UInt32 moduleIndex)) +{ + Thread * pCurrentThread = ThreadStore::RawGetCurrentThread(); + return pCurrentThread->GetThreadStaticStorageForModule(moduleIndex); +} + +COOP_PINVOKE_HELPER(Boolean, RhSetThreadStaticStorageForModule, (Array * pStorage, UInt32 moduleIndex)) +{ + Thread * pCurrentThread = ThreadStore::RawGetCurrentThread(); + return pCurrentThread->SetThreadStaticStorageForModule((Object*)pStorage, moduleIndex); +} + +// This is function is used to quickly query a value that can uniquely identify a thread +COOP_PINVOKE_HELPER(UInt8*, RhCurrentNativeThreadId, ()) +{ +#ifndef TARGET_UNIX + return PalNtCurrentTeb(); +#else + return (UInt8*)ThreadStore::RawGetCurrentThread(); +#endif // TARGET_UNIX +} + +// This function is used to get the OS thread identifier for the current thread. +COOP_PINVOKE_HELPER(UInt64, RhCurrentOSThreadId, ()) +{ + return PalGetCurrentThreadIdForLogging(); +} + +// Standard calling convention variant and actual implementation for RhpReversePInvokeAttachOrTrapThread +EXTERN_C NOINLINE void FASTCALL RhpReversePInvokeAttachOrTrapThread2(ReversePInvokeFrame * pFrame) +{ + ASSERT(pFrame->m_savedThread == ThreadStore::RawGetCurrentThread()); + pFrame->m_savedThread->ReversePInvokeAttachOrTrapThread(pFrame); +} + +// +// PInvoke +// + +// Standard calling convention variant of RhpReversePInvoke +COOP_PINVOKE_HELPER(void, RhpReversePInvoke2, (ReversePInvokeFrame * pFrame)) +{ + Thread * pCurThread = ThreadStore::RawGetCurrentThread(); + pFrame->m_savedThread = pCurThread; + if (pCurThread->InlineTryFastReversePInvoke(pFrame)) + return; + + RhpReversePInvokeAttachOrTrapThread2(pFrame); +} + +// Standard calling convention variant of RhpReversePInvokeReturn +COOP_PINVOKE_HELPER(void, RhpReversePInvokeReturn2, (ReversePInvokeFrame * pFrame)) +{ + pFrame->m_savedThread->InlineReversePInvokeReturn(pFrame); +} + +#ifdef USE_PORTABLE_HELPERS + +COOP_PINVOKE_HELPER(void, RhpPInvoke2, (PInvokeTransitionFrame* pFrame)) +{ + Thread * pCurThread = ThreadStore::RawGetCurrentThread(); + pCurThread->InlinePInvoke(pFrame); +} + +COOP_PINVOKE_HELPER(void, RhpPInvokeReturn2, (PInvokeTransitionFrame* pFrame)) +{ + //reenter cooperative mode + pFrame->m_pThread->InlinePInvokeReturn(pFrame); +} + +#endif //USE_PORTABLE_HELPERS + +#endif // !DACCESS_COMPILE diff --git a/src/coreclr/src/nativeaot/Runtime/thread.h b/src/coreclr/src/nativeaot/Runtime/thread.h new file mode 100644 index 0000000000000..50e24e27a28e6 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/thread.h @@ -0,0 +1,305 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +#include "forward_declarations.h" + +struct gc_alloc_context; +class RuntimeInstance; +class ThreadStore; +class CLREventStatic; +class Thread; + +// The offsets of some fields in the thread (in particular, m_pTransitionFrame) are known to the compiler and get +// inlined into the code. Let's make sure they don't change just because we enable/disable server GC in a particular +// runtime build. +#define KEEP_THREAD_LAYOUT_CONSTANT + +#ifndef HOST_64BIT +# if defined(FEATURE_SVR_GC) || defined(KEEP_THREAD_LAYOUT_CONSTANT) +# define SIZEOF_ALLOC_CONTEXT 40 +# else +# define SIZEOF_ALLOC_CONTEXT 28 +# endif +#else // HOST_64BIT +# if defined(FEATURE_SVR_GC) || defined(KEEP_THREAD_LAYOUT_CONSTANT) +# define SIZEOF_ALLOC_CONTEXT 56 +# else +# define SIZEOF_ALLOC_CONTEXT 40 +# endif +#endif // HOST_64BIT + +#define TOP_OF_STACK_MARKER ((PTR_VOID)(UIntNative)(IntNative)-1) + +#define DYNAMIC_TYPE_TLS_OFFSET_FLAG 0x80000000 + + +enum SyncRequestResult +{ + TryAgain, + SuccessUnmanaged, + SuccessManaged, +}; + +typedef DPTR(PAL_LIMITED_CONTEXT) PTR_PAL_LIMITED_CONTEXT; + +struct ExInfo; +typedef DPTR(ExInfo) PTR_ExInfo; + + +// Also defined in ExceptionHandling.cs, layouts must match. +// When adding new fields to this struct, ensure they get properly initialized in the exception handling +// assembly stubs +struct ExInfo +{ + + PTR_ExInfo m_pPrevExInfo; + PTR_PAL_LIMITED_CONTEXT m_pExContext; + PTR_Object m_exception; // actual object reference, specially reported by GcScanRootsWorker + ExKind m_kind; + UInt8 m_passNumber; + UInt32 m_idxCurClause; + StackFrameIterator m_frameIter; + volatile void* m_notifyDebuggerSP; +}; + +struct ThreadBuffer +{ + UInt8 m_rgbAllocContextBuffer[SIZEOF_ALLOC_CONTEXT]; + UInt32 volatile m_ThreadStateFlags; // see Thread::ThreadStateFlags enum +#if DACCESS_COMPILE + PTR_VOID m_pTransitionFrame; +#else + PTR_VOID volatile m_pTransitionFrame; +#endif + PTR_VOID m_pHackPInvokeTunnel; // see Thread::EnablePreemptiveMode + PTR_VOID m_pCachedTransitionFrame; + PTR_Thread m_pNext; // used by ThreadStore's SList + HANDLE m_hPalThread; // WARNING: this may legitimately be INVALID_HANDLE_VALUE + void ** m_ppvHijackedReturnAddressLocation; + void * m_pvHijackedReturnAddress; +#ifdef HOST_64BIT + UIntNative m_uHijackedReturnValueFlags; // used on ARM64 only; however, ARM64 and AMD64 share field offsets +#endif // HOST_64BIT + PTR_ExInfo m_pExInfoStackHead; + Object* m_threadAbortException; // ThreadAbortException instance -set only during thread abort + PTR_VOID m_pStackLow; + PTR_VOID m_pStackHigh; + PTR_UInt8 m_pTEB; // Pointer to OS TEB structure for this thread + UInt64 m_uPalThreadIdForLogging; // @TODO: likely debug-only + EEThreadId m_threadId; + PTR_VOID m_pThreadStressLog; // pointer to head of thread's StressLogChunks +#ifdef FEATURE_GC_STRESS + UInt32 m_uRand; // current per-thread random number +#endif // FEATURE_GC_STRESS + + // Thread Statics Storage for dynamic types + UInt32 m_numDynamicTypesTlsCells; + PTR_PTR_UInt8 m_pDynamicTypesTlsCells; + + PTR_PTR_VOID m_pThreadLocalModuleStatics; + UInt32 m_numThreadLocalModuleStatics; +}; + +struct ReversePInvokeFrame +{ + void* m_savedPInvokeTransitionFrame; + Thread* m_savedThread; +}; + +class Thread : private ThreadBuffer +{ + friend class AsmOffsets; + friend struct DefaultSListTraits; + friend class ThreadStore; + IN_DAC(friend class ClrDataAccess;) + +public: + enum ThreadStateFlags + { + TSF_Unknown = 0x00000000, // Threads are created in this state + TSF_Attached = 0x00000001, // Thread was inited by first U->M transition on this thread + TSF_Detached = 0x00000002, // Thread was detached by DllMain + TSF_SuppressGcStress = 0x00000008, // Do not allow gc stress on this thread, used in DllMain + // ...and on the Finalizer thread + TSF_DoNotTriggerGc = 0x00000010, // Do not allow hijacking of this thread, also intended to + // ...be checked during allocations in debug builds. + TSF_IsGcSpecialThread = 0x00000020, // Set to indicate a GC worker thread used for background GC +#ifdef FEATURE_GC_STRESS + TSF_IsRandSeedSet = 0x00000040, // set to indicate the random number generator for GCStress was inited +#endif // FEATURE_GC_STRESS + }; +private: + + void Construct(); + + void SetState(ThreadStateFlags flags); + void ClearState(ThreadStateFlags flags); + bool IsStateSet(ThreadStateFlags flags); + + static UInt32_BOOL HijackCallback(HANDLE hThread, PAL_LIMITED_CONTEXT* pThreadContext, void* pCallbackContext); + bool InternalHijack(PAL_LIMITED_CONTEXT * pSuspendCtx, void * pvHijackTargets[]); + + bool CacheTransitionFrameForSuspend(); + void ResetCachedTransitionFrame(); + void CrossThreadUnhijack(); + void UnhijackWorker(); + void EnsureRuntimeInitialized(); +#ifdef _DEBUG + bool DebugIsSuspended(); +#endif + + // + // SyncState members + // + PTR_VOID GetTransitionFrame(); + + void GcScanRootsWorker(void * pfnEnumCallback, void * pvCallbackData, StackFrameIterator & sfIter); + +public: + + + void Destroy(); + + bool IsInitialized(); + + gc_alloc_context * GetAllocContext(); // @TODO: I would prefer to not expose this in this way + +#ifndef DACCESS_COMPILE + UInt64 GetPalThreadIdForLogging(); + bool IsCurrentThread(); + + void GcScanRoots(void * pfnEnumCallback, void * pvCallbackData); +#else + typedef void GcScanRootsCallbackFunc(PTR_RtuObjectRef ppObject, void* token, UInt32 flags); + bool GcScanRoots(GcScanRootsCallbackFunc * pfnCallback, void * token, PTR_PAL_LIMITED_CONTEXT pInitialContext); +#endif + + bool Hijack(); + void Unhijack(); +#ifdef FEATURE_GC_STRESS + static void HijackForGcStress(PAL_LIMITED_CONTEXT * pSuspendCtx); +#endif // FEATURE_GC_STRESS + bool IsHijacked(); + void * GetHijackedReturnAddress(); + void * GetUnhijackedReturnAddress(void** ppvReturnAddressLocation); + bool DangerousCrossThreadIsHijacked(); + + bool IsSuppressGcStressSet(); + void SetSuppressGcStress(); + void ClearSuppressGcStress(); + bool IsWithinStackBounds(PTR_VOID p); + + void GetStackBounds(PTR_VOID * ppStackLow, PTR_VOID * ppStackHigh); + + PTR_UInt8 AllocateThreadLocalStorageForDynamicType(UInt32 uTlsTypeOffset, UInt32 tlsStorageSize, UInt32 numTlsCells); + // mrt100 Debugger (dac) has dependencies on the GetThreadLocalStorageForDynamicType method. + PTR_UInt8 GetThreadLocalStorageForDynamicType(UInt32 uTlsTypeOffset); + PTR_UInt8 GetThreadLocalStorage(UInt32 uTlsIndex, UInt32 uTlsStartOffset); + PTR_UInt8 GetTEB(); + + void PushExInfo(ExInfo * pExInfo); + void ValidateExInfoPop(ExInfo * pExInfo, void * limitSP); + void ValidateExInfoStack(); + bool IsDoNotTriggerGcSet(); + void SetDoNotTriggerGc(); + void ClearDoNotTriggerGc(); + + bool IsDetached(); + void SetDetached(); + + PTR_VOID GetThreadStressLog() const; +#ifndef DACCESS_COMPILE + void SetThreadStressLog(void * ptsl); +#endif // DACCESS_COMPILE +#ifdef FEATURE_GC_STRESS + void SetRandomSeed(UInt32 seed); + UInt32 NextRand(); + bool IsRandInited(); +#endif // FEATURE_GC_STRESS + PTR_ExInfo GetCurExInfo(); + + bool IsCurrentThreadInCooperativeMode(); + + PTR_VOID GetTransitionFrameForStackTrace(); + void * GetCurrentThreadPInvokeReturnAddress(); + + static bool IsHijackTarget(void * address); + + // + // The set of operations used to support unmanaged code running in cooperative mode + // + void EnablePreemptiveMode(); + void DisablePreemptiveMode(); + + // Set the m_pHackPInvokeTunnel field for GC allocation helpers that setup transition frame + // in assembly code. Do not use anywhere else. + void SetCurrentThreadPInvokeTunnelForGcAlloc(void * pTransitionFrame); + + // Setup the m_pHackPInvokeTunnel field for GC helpers entered via regular PInvoke. + // Do not use anywhere else. + void SetupHackPInvokeTunnel(); + + // + // GC support APIs - do not use except from GC itself + // + void SetGCSpecial(bool isGCSpecial); + bool IsGCSpecial(); + bool CatchAtSafePoint(); + + // + // Managed/unmanaged interop transitions support APIs + // + void WaitForSuspend(); + void WaitForGC(void * pTransitionFrame); + + void ReversePInvokeAttachOrTrapThread(ReversePInvokeFrame * pFrame); + + bool InlineTryFastReversePInvoke(ReversePInvokeFrame * pFrame); + void InlineReversePInvokeReturn(ReversePInvokeFrame * pFrame); + + void InlinePInvoke(PInvokeTransitionFrame * pFrame); + void InlinePInvokeReturn(PInvokeTransitionFrame * pFrame); + + Object * GetThreadAbortException(); + void SetThreadAbortException(Object *exception); + + Object* GetThreadStaticStorageForModule(UInt32 moduleIndex); + Boolean SetThreadStaticStorageForModule(Object * pStorage, UInt32 moduleIndex); +}; + +#ifndef __GCENV_BASE_INCLUDED__ +typedef DPTR(Object) PTR_Object; +typedef DPTR(PTR_Object) PTR_PTR_Object; +#endif // !__GCENV_BASE_INCLUDED__ +#ifdef DACCESS_COMPILE + +// The DAC uses DebuggerEnumGcRefContext in place of a GCCONTEXT when doing reference +// enumeration. The GC passes through additional data in the ScanContext which the debugger +// neither has nor needs. While we could refactor the GC code to make an interface +// with less coupling, that might affect perf or make integration messier. Instead +// we use some typedefs so DAC and runtime can get strong yet distinct types. + + +// Ideally we wouldn't need this wrapper, but PromoteCarefully needs access to the +// thread and a promotion field. We aren't assuming the user's token will have this data. +struct DacScanCallbackData +{ + Thread* thread_under_crawl; // the thread being scanned + bool promotion; // are we emulating the GC promote phase or relocate phase? + // different references are reported for each + void* token; // the callback data passed to GCScanRoots + void* pfnUserCallback; // the callback passed in to GcScanRoots + uintptr_t stack_limit; // Lowest point on the thread stack that the scanning logic is permitted to read +}; + +typedef DacScanCallbackData EnumGcRefScanContext; +typedef void EnumGcRefCallbackFunc(PTR_PTR_Object, EnumGcRefScanContext* callbackData, UInt32 flags); + +#else // DACCESS_COMPILE +#ifndef __GCENV_BASE_INCLUDED__ +struct ScanContext; +typedef void promote_func(PTR_PTR_Object, ScanContext*, unsigned); +#endif // !__GCENV_BASE_INCLUDED__ +typedef promote_func EnumGcRefCallbackFunc; +typedef ScanContext EnumGcRefScanContext; + +#endif // DACCESS_COMPILE diff --git a/src/coreclr/src/nativeaot/Runtime/thread.inl b/src/coreclr/src/nativeaot/Runtime/thread.inl new file mode 100644 index 0000000000000..cf0127d135b2c --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/thread.inl @@ -0,0 +1,31 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#ifndef DACCESS_COMPILE +inline void Thread::SetCurrentThreadPInvokeTunnelForGcAlloc(void * pTransitionFrame) +{ + ASSERT(ThreadStore::GetCurrentThread() == this); + ASSERT(Thread::IsCurrentThreadInCooperativeMode()); + m_pHackPInvokeTunnel = pTransitionFrame; +} + +inline void Thread::SetupHackPInvokeTunnel() +{ + ASSERT(ThreadStore::GetCurrentThread() == this); + ASSERT(!Thread::IsCurrentThreadInCooperativeMode()); + m_pHackPInvokeTunnel = m_pTransitionFrame; +} +#endif // DACCESS_COMPILE + +inline bool Thread::IsWithinStackBounds(PTR_VOID p) +{ + ASSERT((m_pStackLow != 0) && (m_pStackHigh != 0)); + return (m_pStackLow <= p) && (p < m_pStackHigh); +} + +inline void Thread::GetStackBounds(PTR_VOID * ppStackLow, PTR_VOID * ppStackHigh) +{ + ASSERT((m_pStackLow != 0) && (m_pStackHigh != 0)); + *ppStackLow = m_pStackLow; + *ppStackHigh = m_pStackHigh; +} diff --git a/src/coreclr/src/nativeaot/Runtime/threadstore.cpp b/src/coreclr/src/nativeaot/Runtime/threadstore.cpp new file mode 100644 index 0000000000000..bababb625493a --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/threadstore.cpp @@ -0,0 +1,540 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +#include "common.h" +#include "gcenv.h" +#include "gcheaputilities.h" +#include "CommonTypes.h" +#include "CommonMacros.h" +#include "daccess.h" +#include "PalRedhawkCommon.h" +#include "PalRedhawk.h" +#include "rhassert.h" +#include "slist.h" +#include "gcrhinterface.h" +#include "varint.h" +#include "regdisplay.h" +#include "StackFrameIterator.h" +#include "thread.h" +#include "holder.h" +#include "rhbinder.h" +#include "RWLock.h" +#include "threadstore.h" +#include "threadstore.inl" +#include "RuntimeInstance.h" +#include "TargetPtrs.h" +#include "yieldprocessornormalized.h" + +#include "slist.inl" +#include "GCMemoryHelpers.h" + +#include "Debug.h" +#include "DebugEventSource.h" +#include "DebugFuncEval.h" + +EXTERN_C volatile UInt32 RhpTrapThreads = (UInt32)TrapThreadsFlags::None; + +GVAL_IMPL_INIT(PTR_Thread, RhpSuspendingThread, 0); + +ThreadStore * GetThreadStore() +{ + return GetRuntimeInstance()->GetThreadStore(); +} + +ThreadStore::Iterator::Iterator() : + m_readHolder(&GetThreadStore()->m_Lock), + m_pCurrentPosition(GetThreadStore()->m_ThreadList.GetHead()) +{ +} + +ThreadStore::Iterator::~Iterator() +{ +} + +PTR_Thread ThreadStore::Iterator::GetNext() +{ + PTR_Thread pResult = m_pCurrentPosition; + if (NULL != pResult) + m_pCurrentPosition = pResult->m_pNext; + return pResult; +} + +//static +PTR_Thread ThreadStore::GetSuspendingThread() +{ + return (RhpSuspendingThread); +} + +#ifndef DACCESS_COMPILE + + +ThreadStore::ThreadStore() : + m_ThreadList(), + m_Lock(true /* writers (i.e. attaching/detaching threads) should wait on GC event */) +{ + SaveCurrentThreadOffsetForDAC(); +} + +ThreadStore::~ThreadStore() +{ +} + +// static +ThreadStore * ThreadStore::Create(RuntimeInstance * pRuntimeInstance) +{ + NewHolder pNewThreadStore = new (nothrow) ThreadStore(); + if (NULL == pNewThreadStore) + return NULL; + + if (!pNewThreadStore->m_SuspendCompleteEvent.CreateManualEventNoThrow(true)) + return NULL; + + pNewThreadStore->m_pRuntimeInstance = pRuntimeInstance; + + pNewThreadStore.SuppressRelease(); + return pNewThreadStore; +} + +void ThreadStore::Destroy() +{ + delete this; +} + +// static +void ThreadStore::AttachCurrentThread(bool fAcquireThreadStoreLock) +{ + // + // step 1: ThreadStore::InitCurrentThread + // step 2: add this thread to the ThreadStore + // + + // The thread has been constructed, during which some data is initialized (like which RuntimeInstance the + // thread belongs to), but it hasn't been added to the thread store because doing so takes a lock, which + // we want to avoid at construction time because the loader lock is held then. + Thread * pAttachingThread = RawGetCurrentThread(); + + // The thread was already initialized, so it is already attached + if (pAttachingThread->IsInitialized()) + { + return; + } + + PalAttachThread(pAttachingThread); + + // + // Init the thread buffer + // + pAttachingThread->Construct(); + ASSERT(pAttachingThread->m_ThreadStateFlags == Thread::TSF_Unknown); + + // The runtime holds the thread store lock for the duration of thread suspension for GC, so let's check to + // see if that's going on and, if so, use a proper wait instead of the RWL's spinning. NOTE: when we are + // called with fAcquireThreadStoreLock==false, we are being called in a situation where the GC is trying to + // init a GC thread, so we must honor the flag to mean "do not block on GC" or else we will deadlock. + if (fAcquireThreadStoreLock && (RhpTrapThreads != (UInt32)TrapThreadsFlags::None)) + RedhawkGCInterface::WaitForGCCompletion(); + + ThreadStore* pTS = GetThreadStore(); + ReaderWriterLock::WriteHolder write(&pTS->m_Lock, fAcquireThreadStoreLock); + + // + // Set thread state to be attached + // + ASSERT(pAttachingThread->m_ThreadStateFlags == Thread::TSF_Unknown); + pAttachingThread->m_ThreadStateFlags = Thread::TSF_Attached; + + pTS->m_ThreadList.PushHead(pAttachingThread); +} + +// static +void ThreadStore::AttachCurrentThread() +{ + AttachCurrentThread(true); +} + +void ThreadStore::DetachCurrentThread() +{ + // The thread may not have been initialized because it may never have run managed code before. + Thread * pDetachingThread = RawGetCurrentThread(); + + // The thread was not initialized yet, so it was not attached + if (!pDetachingThread->IsInitialized()) + { + return; + } + + if (!PalDetachThread(pDetachingThread)) + { + return; + } + +#ifdef STRESS_LOG + ThreadStressLog * ptsl = reinterpret_cast( + pDetachingThread->GetThreadStressLog()); + StressLog::ThreadDetach(ptsl); +#endif // STRESS_LOG + + ThreadStore* pTS = GetThreadStore(); + ReaderWriterLock::WriteHolder write(&pTS->m_Lock); + ASSERT(rh::std::count(pTS->m_ThreadList.Begin(), pTS->m_ThreadList.End(), pDetachingThread) == 1); + pTS->m_ThreadList.RemoveFirst(pDetachingThread); + pDetachingThread->Destroy(); +} + +// Used by GC to prevent new threads during a GC. New threads must take a write lock to +// modify the list, but they won't be allowed to until all outstanding read locks are +// released. This way, the GC always enumerates a consistent set of threads each time +// it enumerates threads between SuspendAllThreads and ResumeAllThreads. +// +// @TODO: Investigate if this requirement is actually necessary. Threads already may +// not enter managed code during GC, so if new threads are added to the thread store, +// but haven't yet entered managed code, is that really a problem? +// +// @TODO: Investigate the suspend/resume algorithm's dependence on this lock's side- +// effect of being a memory barrier. +void ThreadStore::LockThreadStore() +{ + m_Lock.AcquireReadLock(); +} + +void ThreadStore::UnlockThreadStore() +{ + m_Lock.ReleaseReadLock(); +} + +void ThreadStore::SuspendAllThreads(bool waitForGCEvent) +{ + ThreadStore::SuspendAllThreads(waitForGCEvent, /* fireDebugEvent = */ true); +} + +void ThreadStore::SuspendAllThreads(bool waitForGCEvent, bool fireDebugEvent) +{ + // + // SuspendAllThreads requires all threads running + // + // Threads are by default frozen by the debugger during FuncEval + // Therefore, in case of FuncEval, we need to inform the debugger + // to unfreeze the threads. + // + if (fireDebugEvent && DebugFuncEval::GetMostRecentFuncEvalHijackInstructionPointer() != 0) + { + struct DebuggerFuncEvalCrossThreadDependencyNotification crossThreadDependencyEventPayload; + crossThreadDependencyEventPayload.kind = DebuggerResponseKind::FuncEvalCrossThreadDependency; + crossThreadDependencyEventPayload.payload = 0; + DebugEventSource::SendCustomEvent(&crossThreadDependencyEventPayload, sizeof(struct DebuggerFuncEvalCrossThreadDependencyNotification)); + } + + Thread * pThisThread = GetCurrentThreadIfAvailable(); + + LockThreadStore(); + + RhpSuspendingThread = pThisThread; + + if (waitForGCEvent) + { + GCHeapUtilities::GetGCHeap()->ResetWaitForGCEvent(); + } + m_SuspendCompleteEvent.Reset(); + + // set the global trap for pinvoke leave and return + RhpTrapThreads |= (UInt32)TrapThreadsFlags::TrapThreads; + + // Set each module's loop hijack flag + GetRuntimeInstance()->SetLoopHijackFlags(RhpTrapThreads); + + // Our lock-free algorithm depends on flushing write buffers of all processors running RH code. The + // reason for this is that we essentially implement Dekker's algorithm, which requires write ordering. + PalFlushProcessWriteBuffers(); + + bool keepWaiting; + YieldProcessorNormalizationInfo normalizationInfo; + do + { + keepWaiting = false; + FOREACH_THREAD(pTargetThread) + { + if (pTargetThread == pThisThread) + continue; + + if (!pTargetThread->CacheTransitionFrameForSuspend()) + { + // We drive all threads to preemptive mode by hijacking them with both a + // return-address hijack and loop hijacks. + keepWaiting = true; + pTargetThread->Hijack(); + } + else if (pTargetThread->DangerousCrossThreadIsHijacked()) + { + // Once a thread is safely in preemptive mode, we must wait until it is also + // unhijacked. This is done because, otherwise, we might race on into the + // stackwalk and find the hijack still on the stack, which will cause the + // stackwalking code to crash. + keepWaiting = true; + } + } + END_FOREACH_THREAD + + if (keepWaiting) + { + if (PalSwitchToThread() == 0 && g_RhSystemInfo.dwNumberOfProcessors > 1) + { + // No threads are scheduled on this processor. Perhaps we're waiting for a thread + // that's scheduled on another processor. If so, let's give it a little time + // to make forward progress. + // Note that we do not call Sleep, because the minimum granularity of Sleep is much + // too long (we probably don't need a 15ms wait here). Instead, we'll just burn some + // cycles. + // @TODO: need tuning for spin + YieldProcessorNormalizedForPreSkylakeCount(normalizationInfo, 10000); + } + } + + } while (keepWaiting); + + m_SuspendCompleteEvent.Set(); +} + +void ThreadStore::ResumeAllThreads(bool waitForGCEvent) +{ + FOREACH_THREAD(pTargetThread) + { + pTargetThread->ResetCachedTransitionFrame(); + } + END_FOREACH_THREAD + + RhpTrapThreads &= ~(UInt32)TrapThreadsFlags::TrapThreads; + + // Reset module's hijackLoops flag + GetRuntimeInstance()->SetLoopHijackFlags(0); + + RhpSuspendingThread = NULL; + if (waitForGCEvent) + { + GCHeapUtilities::GetGCHeap()->SetWaitForGCEvent(); + } + UnlockThreadStore(); +} // ResumeAllThreads + +void ThreadStore::WaitForSuspendComplete() +{ + UInt32 waitResult = m_SuspendCompleteEvent.Wait(INFINITE, false); + if (waitResult == WAIT_FAILED) + RhFailFast(); +} + +#ifndef DACCESS_COMPILE + +void ThreadStore::InitiateThreadAbort(Thread* targetThread, Object * threadAbortException, bool doRudeAbort) +{ + SuspendAllThreads(/* waitForGCEvent = */ false, /* fireDebugEvent = */ false); + // TODO: consider enabling multiple thread aborts running in parallel on different threads + ASSERT((RhpTrapThreads & (UInt32)TrapThreadsFlags::AbortInProgress) == 0); + RhpTrapThreads |= (UInt32)TrapThreadsFlags::AbortInProgress; + + targetThread->SetThreadAbortException(threadAbortException); + + // TODO: Stage 2: Queue APC to the target thread to break out of possible wait + + bool initiateAbort = false; + + if (!doRudeAbort) + { + // TODO: Stage 3: protected regions (finally, catch) handling + // If it was in a protected region, set the "throw at protected region end" flag on the native Thread object + // TODO: Stage 4: reverse PInvoke handling + // If there was a reverse Pinvoke frame between the current frame and the funceval frame of the target thread, + // find the outermost reverse Pinvoke frame below the funceval frame and set the thread abort flag in its transition frame. + // If both of these cases happened at once, find out which one of the outermost frame of the protected region + // and the outermost reverse Pinvoke frame is closer to the funceval frame and perform one of the two actions + // described above based on the one that's closer. + initiateAbort = true; + } + else + { + initiateAbort = true; + } + + if (initiateAbort) + { + PInvokeTransitionFrame* transitionFrame = reinterpret_cast(targetThread->GetTransitionFrame()); + transitionFrame->m_Flags |= PTFF_THREAD_ABORT; + } + + ResumeAllThreads(/* waitForGCEvent = */ false); +} + +void ThreadStore::CancelThreadAbort(Thread* targetThread) +{ + SuspendAllThreads(/* waitForGCEvent = */ false, /* fireDebugEvent = */ false); + + ASSERT((RhpTrapThreads & (UInt32)TrapThreadsFlags::AbortInProgress) != 0); + RhpTrapThreads &= ~(UInt32)TrapThreadsFlags::AbortInProgress; + + PInvokeTransitionFrame* transitionFrame = reinterpret_cast(targetThread->GetTransitionFrame()); + if (transitionFrame != nullptr) + { + transitionFrame->m_Flags &= ~PTFF_THREAD_ABORT; + } + + targetThread->SetThreadAbortException(nullptr); + + ResumeAllThreads(/* waitForGCEvent = */ false); +} + +COOP_PINVOKE_HELPER(void *, RhpGetCurrentThread, ()) +{ + return ThreadStore::GetCurrentThread(); +} + +COOP_PINVOKE_HELPER(void, RhpInitiateThreadAbort, (void* thread, Object * threadAbortException, Boolean doRudeAbort)) +{ + GetThreadStore()->InitiateThreadAbort((Thread*)thread, threadAbortException, doRudeAbort); +} + +COOP_PINVOKE_HELPER(void, RhpCancelThreadAbort, (void* thread)) +{ + GetThreadStore()->CancelThreadAbort((Thread*)thread); +} + +#endif // DACCESS_COMPILE + +C_ASSERT(sizeof(Thread) == sizeof(ThreadBuffer)); + +EXTERN_C DECLSPEC_THREAD ThreadBuffer tls_CurrentThread = +{ + { 0 }, // m_rgbAllocContextBuffer + Thread::TSF_Unknown, // m_ThreadStateFlags + TOP_OF_STACK_MARKER, // m_pTransitionFrame + TOP_OF_STACK_MARKER, // m_pHackPInvokeTunnel + 0, // m_pCachedTransitionFrame + 0, // m_pNext + INVALID_HANDLE_VALUE, // m_hPalThread + 0, // m_ppvHijackedReturnAddressLocation + 0, // m_pvHijackedReturnAddress + 0, // all other fields are initialized by zeroes +}; + +#endif // !DACCESS_COMPILE + +#ifdef _WIN32 + +#ifndef DACCESS_COMPILE + +// Keep a global variable in the target process which contains +// the address of _tls_index. This is the breadcrumb needed +// by DAC to read _tls_index since we don't control the +// declaration of _tls_index directly. + +// volatile to prevent the compiler from removing the unused global variable +volatile UInt32 * p_tls_index; +volatile UInt32 SECTIONREL__tls_CurrentThread; + +EXTERN_C UInt32 _tls_index; +#if defined(TARGET_ARM64) +// ARM64TODO: Re-enable optimization +#pragma optimize("", off) +#endif +void ThreadStore::SaveCurrentThreadOffsetForDAC() +{ + p_tls_index = &_tls_index; + + UInt8 * pTls = *(UInt8 **)(PalNtCurrentTeb() + OFFSETOF__TEB__ThreadLocalStoragePointer); + + UInt8 * pOurTls = *(UInt8 **)(pTls + (_tls_index * sizeof(void*))); + + SECTIONREL__tls_CurrentThread = (UInt32)((UInt8 *)&tls_CurrentThread - pOurTls); +} +#if defined(TARGET_ARM64) +#pragma optimize("", on) +#endif +#else // DACCESS_COMPILE + +GPTR_IMPL(UInt32, p_tls_index); +GVAL_IMPL(UInt32, SECTIONREL__tls_CurrentThread); + +// +// This routine supports the !Thread debugger extension routine +// +typedef DPTR(TADDR) PTR_TADDR; +// static +PTR_Thread ThreadStore::GetThreadFromTEB(TADDR pTEB) +{ + if (pTEB == NULL) + return NULL; + + UInt32 tlsIndex = *p_tls_index; + TADDR pTls = *(PTR_TADDR)(pTEB + OFFSETOF__TEB__ThreadLocalStoragePointer); + if (pTls == NULL) + return NULL; + + TADDR pOurTls = *(PTR_TADDR)(pTls + (tlsIndex * sizeof(void*))); + if (pOurTls == NULL) + return NULL; + + return (PTR_Thread)(pOurTls + SECTIONREL__tls_CurrentThread); +} + +#endif // DACCESS_COMPILE + +#else // _WIN32 + +void ThreadStore::SaveCurrentThreadOffsetForDAC() +{ +} + +#endif // _WIN32 + + +#ifndef DACCESS_COMPILE + +// internal static extern unsafe bool RhGetExceptionsForCurrentThread(Exception[] outputArray, out int writtenCountOut); +COOP_PINVOKE_HELPER(Boolean, RhGetExceptionsForCurrentThread, (Array* pOutputArray, Int32* pWrittenCountOut)) +{ + return GetThreadStore()->GetExceptionsForCurrentThread(pOutputArray, pWrittenCountOut); +} + +Boolean ThreadStore::GetExceptionsForCurrentThread(Array* pOutputArray, Int32* pWrittenCountOut) +{ + Int32 countWritten = 0; + Object** pArrayElements; + Thread * pThread = GetCurrentThread(); + + for (PTR_ExInfo pInfo = pThread->m_pExInfoStackHead; pInfo != NULL; pInfo = pInfo->m_pPrevExInfo) + { + if (pInfo->m_exception == NULL) + continue; + + countWritten++; + } + + // No input array provided, or it was of the wrong kind. We'll fill out the count and return false. + if ((pOutputArray == NULL) || (pOutputArray->get_EEType()->get_ComponentSize() != POINTER_SIZE)) + goto Error; + + // Input array was not big enough. We don't even partially fill it. + if (pOutputArray->GetArrayLength() < (UInt32)countWritten) + goto Error; + + *pWrittenCountOut = countWritten; + + // Success, but nothing to report. + if (countWritten == 0) + return Boolean_true; + + pArrayElements = (Object**)pOutputArray->GetArrayData(); + for (PTR_ExInfo pInfo = pThread->m_pExInfoStackHead; pInfo != NULL; pInfo = pInfo->m_pPrevExInfo) + { + if (pInfo->m_exception == NULL) + continue; + + *pArrayElements = pInfo->m_exception; + pArrayElements++; + } + + RhpBulkWriteBarrier(pArrayElements, countWritten * POINTER_SIZE); + return Boolean_true; + +Error: + *pWrittenCountOut = countWritten; + return Boolean_false; +} +#endif // DACCESS_COMPILE diff --git a/src/coreclr/src/nativeaot/Runtime/threadstore.h b/src/coreclr/src/nativeaot/Runtime/threadstore.h new file mode 100644 index 0000000000000..dbc113a303f20 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/threadstore.h @@ -0,0 +1,80 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +class Thread; +class CLREventStatic; +class RuntimeInstance; +class Array; +typedef DPTR(RuntimeInstance) PTR_RuntimeInstance; + +enum class TrapThreadsFlags +{ + None = 0, + AbortInProgress = 1, + TrapThreads = 2 +}; + +class ThreadStore +{ + SList m_ThreadList; + PTR_RuntimeInstance m_pRuntimeInstance; + CLREventStatic m_SuspendCompleteEvent; + ReaderWriterLock m_Lock; + +private: + ThreadStore(); + + void LockThreadStore(); + void UnlockThreadStore(); + void SuspendAllThreads(bool waitForGCEvent, bool fireDebugEvent); + +public: + class Iterator + { + ReaderWriterLock::ReadHolder m_readHolder; + PTR_Thread m_pCurrentPosition; + public: + Iterator(); + ~Iterator(); + PTR_Thread GetNext(); + }; + + ~ThreadStore(); + static ThreadStore * Create(RuntimeInstance * pRuntimeInstance); + static Thread * RawGetCurrentThread(); + static Thread * GetCurrentThread(); + static Thread * GetCurrentThreadIfAvailable(); + static PTR_Thread GetSuspendingThread(); + static void AttachCurrentThread(); + static void AttachCurrentThread(bool fAcquireThreadStoreLock); + static void DetachCurrentThread(); +#ifndef DACCESS_COMPILE + static void SaveCurrentThreadOffsetForDAC(); + void InitiateThreadAbort(Thread* targetThread, Object * threadAbortException, bool doRudeAbort); + void CancelThreadAbort(Thread* targetThread); +#else + static PTR_Thread GetThreadFromTEB(TADDR pvTEB); +#endif + Boolean GetExceptionsForCurrentThread(Array* pOutputArray, Int32* pWrittenCountOut); + + void Destroy(); + void SuspendAllThreads(bool waitForGCEvent); + void ResumeAllThreads(bool waitForGCEvent); + + static bool IsTrapThreadsRequested(); + void WaitForSuspendComplete(); +}; +typedef DPTR(ThreadStore) PTR_ThreadStore; + +ThreadStore * GetThreadStore(); + +#define FOREACH_THREAD(p_thread_name) \ +{ \ + ThreadStore::Iterator __threads; \ + Thread * p_thread_name; \ + while ((p_thread_name = __threads.GetNext()) != NULL) \ + { \ + +#define END_FOREACH_THREAD \ + } \ +} \ + diff --git a/src/coreclr/src/nativeaot/Runtime/threadstore.inl b/src/coreclr/src/nativeaot/Runtime/threadstore.inl new file mode 100644 index 0000000000000..811d1e91e2541 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/threadstore.inl @@ -0,0 +1,39 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +EXTERN_C DECLSPEC_THREAD ThreadBuffer tls_CurrentThread; + +// static +inline Thread * ThreadStore::RawGetCurrentThread() +{ + return (Thread *) &tls_CurrentThread; +} + +// static +inline Thread * ThreadStore::GetCurrentThread() +{ + Thread * pCurThread = RawGetCurrentThread(); + + // If this assert fires, and you only need the Thread pointer if the thread has ever previously + // entered the runtime, then you should be using GetCurrentThreadIfAvailable instead. + ASSERT(pCurThread->IsInitialized()); + return pCurThread; +} + +// static +inline Thread * ThreadStore::GetCurrentThreadIfAvailable() +{ + Thread * pCurThread = RawGetCurrentThread(); + if (pCurThread->IsInitialized()) + return pCurThread; + + return NULL; +} + +EXTERN_C volatile UInt32 RhpTrapThreads; + +// static +inline bool ThreadStore::IsTrapThreadsRequested() +{ + return (RhpTrapThreads & (UInt32)TrapThreadsFlags::TrapThreads) != 0; +} diff --git a/src/coreclr/src/nativeaot/Runtime/unix/AsmOffsets.cpp b/src/coreclr/src/nativeaot/Runtime/unix/AsmOffsets.cpp new file mode 100644 index 0000000000000..1dd26302cde49 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/unix/AsmOffsets.cpp @@ -0,0 +1,9 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#define HASH_DEFINE #define +#define PLAT_ASM_OFFSET(offset, cls, member) HASH_DEFINE OFFSETOF__##cls##__##member 0x##offset +#define PLAT_ASM_SIZEOF(size, cls ) HASH_DEFINE SIZEOF__##cls 0x##size +#define PLAT_ASM_CONST(constant, expr) HASH_DEFINE expr 0x##constant + +#include diff --git a/src/coreclr/src/nativeaot/Runtime/unix/HardwareExceptions.cpp b/src/coreclr/src/nativeaot/Runtime/unix/HardwareExceptions.cpp new file mode 100644 index 0000000000000..7374a2b477087 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/unix/HardwareExceptions.cpp @@ -0,0 +1,647 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include "CommonTypes.h" +#include "PalRedhawkCommon.h" +#include "CommonMacros.h" +#include "config.h" +#include "daccess.h" +#include "regdisplay.h" +#include "UnixContext.h" + +#include +#include "HardwareExceptions.h" + +#if !HAVE_SIGINFO_T +#error Cannot handle hardware exceptions on this platform +#endif + +#define REDHAWK_PALEXPORT extern "C" +#define REDHAWK_PALAPI + +#define EXCEPTION_ACCESS_VIOLATION 0xC0000005u +#define EXCEPTION_DATATYPE_MISALIGNMENT 0x80000002u +#define EXCEPTION_BREAKPOINT 0x80000003u +#define EXCEPTION_SINGLE_STEP 0x80000004u +#define EXCEPTION_ARRAY_BOUNDS_EXCEEDED 0xC000008Cu +#define EXCEPTION_FLT_DENORMAL_OPERAND 0xC000008Du +#define EXCEPTION_FLT_DIVIDE_BY_ZERO 0xC000008Eu +#define EXCEPTION_FLT_INEXACT_RESULT 0xC000008Fu +#define EXCEPTION_FLT_INVALID_OPERATION 0xC0000090u +#define EXCEPTION_FLT_OVERFLOW 0xC0000091u +#define EXCEPTION_FLT_STACK_CHECK 0xC0000092u +#define EXCEPTION_FLT_UNDERFLOW 0xC0000093u +#define EXCEPTION_INT_DIVIDE_BY_ZERO 0xC0000094u +#define EXCEPTION_INT_OVERFLOW 0xC0000095u +#define EXCEPTION_PRIV_INSTRUCTION 0xC0000096u +#define EXCEPTION_IN_PAGE_ERROR 0xC0000006u +#define EXCEPTION_ILLEGAL_INSTRUCTION 0xC000001Du +#define EXCEPTION_NONCONTINUABLE_EXCEPTION 0xC0000025u +#define EXCEPTION_STACK_OVERFLOW 0xC00000FDu +#define EXCEPTION_INVALID_DISPOSITION 0xC0000026u +#define EXCEPTION_GUARD_PAGE 0x80000001u +#define EXCEPTION_INVALID_HANDLE 0xC0000008u + +#define EXCEPTION_CONTINUE_EXECUTION (-1) +#define EXCEPTION_CONTINUE_SEARCH (0) +#define EXCEPTION_EXECUTE_HANDLER (1) + +struct sigaction g_previousSIGSEGV; +struct sigaction g_previousSIGFPE; + +typedef void (*SignalHandler)(int code, siginfo_t *siginfo, void *context); + +// Exception handler for hardware exceptions +static PHARDWARE_EXCEPTION_HANDLER g_hardwareExceptionHandler = NULL; + +#ifdef HOST_AMD64 + +// Get value of an instruction operand represented by the ModR/M field +// Parameters: +// uint8_t rex : REX prefix, 0 if there was none +// uint8_t* ip : instruction pointer pointing to the ModR/M field +// void* context : context containing the registers +// bool is8Bit : true if the operand size is 8 bit +// bool hasOpSizePrefix : true if the instruction has op size prefix (0x66) +uint64_t GetModRMOperandValue(uint8_t rex, uint8_t* ip, void* context, bool is8Bit, bool hasOpSizePrefix) +{ + uint64_t result; + uint64_t resultReg; + + uint8_t rex_b = (rex & 0x1); // high bit to modrm r/m field or SIB base field + uint8_t rex_x = (rex & 0x2) >> 1; // high bit to sib index field + uint8_t rex_r = (rex & 0x4) >> 2; // high bit to modrm reg field + uint8_t rex_w = (rex & 0x8) >> 3; // 1 = 64 bit operand size, 0 = operand size determined by hasOpSizePrefix + + uint8_t modrm = *ip++; + + ASSERT(modrm != 0); + + uint8_t mod = (modrm & 0xC0) >> 6; + uint8_t reg = (modrm & 0x38) >> 3; + uint8_t rm = (modrm & 0x07); + + reg |= (rex_r << 3); + uint8_t rmIndex = rm | (rex_b << 3); + + // 8 bit idiv without the REX prefix uses registers AH, CH, DH, BH for rm 4..8 + // which is an exception from the regular register indexes. + bool isAhChDhBh = is8Bit && (rex == 0) && (rm >= 4); + + // See: Tables A-15,16,17 in AMD Dev Manual 3 for information + // about how the ModRM/SIB/REX uint8_ts interact. + + switch (mod) + { + case 0: + case 1: + case 2: + if (rm == 4) // we have an SIB uint8_t following + { + // + // Get values from the SIB uint8_t + // + uint8_t sib = *ip++; + + ASSERT(sib != 0); + + uint8_t ss = (sib & 0xC0) >> 6; + uint8_t index = (sib & 0x38) >> 3; + uint8_t base = (sib & 0x07); + + index |= (rex_x << 3); + base |= (rex_b << 3); + + // + // Get starting value + // + if ((mod == 0) && (base == 5)) + { + result = 0; + } + else + { + result = GetRegisterValueByIndex(context, base); + } + + // + // Add in the [index] + // + if (index != 4) + { + result += GetRegisterValueByIndex(context, index) << ss; + } + + // + // Finally add in the offset + // + if (mod == 0) + { + if (base == 5) + { + result += *((int32_t*)ip); + } + } + else if (mod == 1) + { + result += *((int8_t*)ip); + } + else // mod == 2 + { + result += *((int32_t*)ip); + } + + } + else + { + // + // Get the value we need from the register. + // + + // Check for RIP-relative addressing mode. + if ((mod == 0) && (rm == 5)) + { + result = (uint64_t)ip + sizeof(int32_t) + *(int32_t*)ip; + } + else + { + result = GetRegisterValueByIndex(context, rmIndex); + + if (mod == 1) + { + result += *((int8_t*)ip); + } + else if (mod == 2) + { + result += *((int32_t*)ip); + } + } + } + + break; + + case 3: + default: + // The operand is stored in a register. + if (isAhChDhBh) + { + // 8 bit idiv without the REX prefix uses registers AH, CH, DH or BH for rm 4..8. + // So we shift the register index to get the real register index. + rmIndex -= 4; + } + + resultReg = GetRegisterValueByIndex(context, rmIndex); + result = (uint64_t)&resultReg; + + if (isAhChDhBh) + { + // Move one uint8_t higher to get an address of the AH, CH, DH or BH + result++; + } + + break; + + } + + // Now dereference thru the result to get the resulting value. + if (is8Bit) + { + result = *((uint8_t*)result); + } + else if (rex_w != 0) + { + result = *((uint64_t*)result); + } + else if (hasOpSizePrefix) + { + result = *((uint16_t*)result); + } + else + { + result = *((uint32_t*)result); + } + + return result; +} + +// Skip all prefixes until the instruction code or the REX prefix is found +// Parameters: +// uint8_t** ip : Pointer to the current instruction pointer. Updated +// as the function walks the codes. +// bool* hasOpSizePrefix : Pointer to bool, on exit set to true if a op size prefix +// was found. +// Return value : +// Code of the REX prefix or the instruction code after the prefixes. +uint8_t SkipPrefixes(uint8_t **ip, bool* hasOpSizePrefix) +{ + *hasOpSizePrefix = false; + + while (true) + { + uint8_t code = *(*ip)++; + + switch (code) + { + case 0x66: // Operand-Size + *hasOpSizePrefix = true; + break; + + // Segment overrides + case 0x26: // ES + case 0x2E: // CS + case 0x36: // SS + case 0x3E: // DS + case 0x64: // FS + case 0x65: // GS + + // Size overrides + case 0x67: // Address-Size + + // Lock + case 0xf0: + + // String REP prefixes + case 0xf2: // REPNE/REPNZ + case 0xf3: + break; + + default: + // Return address of the nonprefix code + return code; + } + } +} + +// Check if a division by zero exception is in fact a division overflow. The +// x64 processor generate the same exception in both cases for the IDIV / DIV +// instruction. So we need to decode the instruction argument and check +// whether it was zero or not. +bool IsDivByZeroAnIntegerOverflow(void* context) +{ + uint8_t * ip = (uint8_t*)GetPC(context); + uint8_t rex = 0; + bool hasOpSizePrefix = false; + + uint8_t code = SkipPrefixes(&ip, &hasOpSizePrefix); + + // The REX prefix must directly preceed the instruction code + if ((code & 0xF0) == 0x40) + { + rex = code; + code = *ip++; + } + + uint64_t divisor = 0; + + // Check if the instruction is IDIV or DIV. The instruction code includes the three + // 'reg' bits in the ModRM uint8_t. These are 7 for IDIV and 6 for DIV + uint8_t regBits = (*ip & 0x38) >> 3; + if ((code == 0xF7 || code == 0xF6) && (regBits == 7 || regBits == 6)) + { + bool is8Bit = (code == 0xF6); + divisor = GetModRMOperandValue(rex, ip, context, is8Bit, hasOpSizePrefix); + } + else + { + ASSERT_UNCONDITIONALLY("Invalid instruction (expected IDIV or DIV)"); + } + + // If the division operand is zero, it was division by zero. Otherwise the failure + // must have been an overflow. + return divisor != 0; +} +#endif //HOST_AMD64 + +// Translates signal and context information to a Win32 exception code. +uint32_t GetExceptionCodeForSignal(const siginfo_t *siginfo, const void *context) +{ + // IMPORTANT NOTE: This function must not call any signal unsafe functions + // since it is called from signal handlers. +#ifdef ILL_ILLOPC + switch (siginfo->si_signo) + { + case SIGILL: + switch (siginfo->si_code) + { + case ILL_ILLOPC: // Illegal opcode + case ILL_ILLOPN: // Illegal operand + case ILL_ILLADR: // Illegal addressing mode + case ILL_ILLTRP: // Illegal trap + case ILL_COPROC: // Co-processor error + return EXCEPTION_ILLEGAL_INSTRUCTION; + case ILL_PRVOPC: // Privileged opcode + case ILL_PRVREG: // Privileged register + return EXCEPTION_PRIV_INSTRUCTION; + case ILL_BADSTK: // Internal stack error + return EXCEPTION_STACK_OVERFLOW; + default: + break; + } + break; + case SIGFPE: + switch (siginfo->si_code) + { + case FPE_INTDIV: + return EXCEPTION_INT_DIVIDE_BY_ZERO; + case FPE_INTOVF: + return EXCEPTION_INT_OVERFLOW; + case FPE_FLTDIV: + return EXCEPTION_FLT_DIVIDE_BY_ZERO; + case FPE_FLTOVF: + return EXCEPTION_FLT_OVERFLOW; + case FPE_FLTUND: + return EXCEPTION_FLT_UNDERFLOW; + case FPE_FLTRES: + return EXCEPTION_FLT_INEXACT_RESULT; + case FPE_FLTINV: + return EXCEPTION_FLT_INVALID_OPERATION; + case FPE_FLTSUB: + return EXCEPTION_FLT_INVALID_OPERATION; + default: + break; + } + break; + case SIGSEGV: + switch (siginfo->si_code) + { + case SI_USER: // User-generated signal, sometimes sent + // for SIGSEGV under normal circumstances + case SEGV_MAPERR: // Address not mapped to object + case SEGV_ACCERR: // Invalid permissions for mapped object + return EXCEPTION_ACCESS_VIOLATION; + +#ifdef SI_KERNEL + case SI_KERNEL: + { + return EXCEPTION_ACCESS_VIOLATION; + } +#endif + default: + break; + } + break; + case SIGBUS: + switch (siginfo->si_code) + { + case BUS_ADRALN: // Invalid address alignment + return EXCEPTION_DATATYPE_MISALIGNMENT; + case BUS_ADRERR: // Non-existent physical address + return EXCEPTION_ACCESS_VIOLATION; + case BUS_OBJERR: // Object-specific hardware error + default: + break; + } + case SIGTRAP: + switch (siginfo->si_code) + { +#ifdef SI_KERNEL + case SI_KERNEL: +#endif + case SI_USER: + case TRAP_BRKPT: // Process breakpoint + return EXCEPTION_BREAKPOINT; + case TRAP_TRACE: // Process trace trap + return EXCEPTION_SINGLE_STEP; + default: + // Got unknown SIGTRAP signal with code siginfo->si_code; + return EXCEPTION_ILLEGAL_INSTRUCTION; + } + default: + break; + } + + // Got unknown signal number siginfo->si_signo with code siginfo->si_code; + return EXCEPTION_ILLEGAL_INSTRUCTION; +#else // ILL_ILLOPC + int trap; + + if (siginfo->si_signo == SIGFPE) + { + // Floating point exceptions are mapped by their si_code. + switch (siginfo->si_code) + { + case FPE_INTDIV : + return EXCEPTION_INT_DIVIDE_BY_ZERO; + case FPE_INTOVF : + return EXCEPTION_INT_OVERFLOW; + case FPE_FLTDIV : + return EXCEPTION_FLT_DIVIDE_BY_ZERO; + case FPE_FLTOVF : + return EXCEPTION_FLT_OVERFLOW; + case FPE_FLTUND : + return EXCEPTION_FLT_UNDERFLOW; + case FPE_FLTRES : + return EXCEPTION_FLT_INEXACT_RESULT; + case FPE_FLTINV : + return EXCEPTION_FLT_INVALID_OPERATION; + case FPE_FLTSUB :/* subscript out of range */ + return EXCEPTION_FLT_INVALID_OPERATION; + default: + // Got unknown signal code siginfo->si_code; + return 0; + } + } + + trap = ((ucontext_t*)context)->uc_mcontext.mc_trapno; + switch (trap) + { + case T_PRIVINFLT : /* privileged instruction */ + return EXCEPTION_PRIV_INSTRUCTION; + case T_BPTFLT : /* breakpoint instruction */ + return EXCEPTION_BREAKPOINT; + case T_ARITHTRAP : /* arithmetic trap */ + return 0; /* let the caller pick an exception code */ +#ifdef T_ASTFLT + case T_ASTFLT : /* system forced exception : ^C, ^\. SIGINT signal + handler shouldn't be calling this function, since + it doesn't need an exception code */ + // Trap code T_ASTFLT received, shouldn't get here; + return 0; +#endif // T_ASTFLT + case T_PROTFLT : /* protection fault */ + return EXCEPTION_ACCESS_VIOLATION; + case T_TRCTRAP : /* debug exception (sic) */ + return EXCEPTION_SINGLE_STEP; + case T_PAGEFLT : /* page fault */ + return EXCEPTION_ACCESS_VIOLATION; + case T_ALIGNFLT : /* alignment fault */ + return EXCEPTION_DATATYPE_MISALIGNMENT; + case T_DIVIDE : + return EXCEPTION_INT_DIVIDE_BY_ZERO; + case T_NMI : /* non-maskable trap */ + return EXCEPTION_ILLEGAL_INSTRUCTION; + case T_OFLOW : + return EXCEPTION_INT_OVERFLOW; + case T_BOUND : /* bound instruction fault */ + return EXCEPTION_ARRAY_BOUNDS_EXCEEDED; + case T_DNA : /* device not available fault */ + return EXCEPTION_ILLEGAL_INSTRUCTION; + case T_DOUBLEFLT : /* double fault */ + return EXCEPTION_ILLEGAL_INSTRUCTION; + case T_FPOPFLT : /* fp coprocessor operand fetch fault */ + return EXCEPTION_FLT_INVALID_OPERATION; + case T_TSSFLT : /* invalid tss fault */ + return EXCEPTION_ILLEGAL_INSTRUCTION; + case T_SEGNPFLT : /* segment not present fault */ + return EXCEPTION_ACCESS_VIOLATION; + case T_STKFLT : /* stack fault */ + return EXCEPTION_STACK_OVERFLOW; + case T_MCHK : /* machine check trap */ + return EXCEPTION_ILLEGAL_INSTRUCTION; + case T_RESERVED : /* reserved (unknown) */ + return EXCEPTION_ILLEGAL_INSTRUCTION; + default: + // Got unknown trap code trap; + break; + } + return EXCEPTION_ILLEGAL_INSTRUCTION; +#endif // ILL_ILLOPC +} + +// Common handler for hardware exception signals +bool HardwareExceptionHandler(int code, siginfo_t *siginfo, void *context, void* faultAddress) +{ + if (g_hardwareExceptionHandler != NULL) + { + UIntNative faultCode = GetExceptionCodeForSignal(siginfo, context); + +#ifdef HOST_AMD64 + // It is possible that an overflow was mapped to a divide-by-zero exception. + // This happens when we try to divide the maximum negative value of a + // signed integer with -1. + // + // Thus, we will attempt to decode the instruction @ RIP to determine if that + // is the case using the faulting context. + if ((faultCode == EXCEPTION_INT_DIVIDE_BY_ZERO) && IsDivByZeroAnIntegerOverflow(context)) + { + // The exception was an integer overflow, so augment the fault code. + faultCode = EXCEPTION_INT_OVERFLOW; + } +#endif //HOST_AMD64 + + PAL_LIMITED_CONTEXT palContext; + NativeContextToPalContext(context, &palContext); + + UIntNative arg0Reg; + UIntNative arg1Reg; + Int32 disposition = g_hardwareExceptionHandler(faultCode, (UIntNative)faultAddress, &palContext, &arg0Reg, &arg1Reg); + if (disposition == EXCEPTION_CONTINUE_EXECUTION) + { + // TODO: better name + RedirectNativeContext(context, &palContext, arg0Reg, arg1Reg); + return true; + } + } + + return false; +} + +// Add handler for hardware exception signal +bool AddSignalHandler(int signal, SignalHandler handler, struct sigaction* previousAction) +{ + struct sigaction newAction; + + newAction.sa_flags = SA_RESTART; + newAction.sa_handler = NULL; + newAction.sa_sigaction = handler; + newAction.sa_flags |= SA_SIGINFO; + + sigemptyset(&newAction.sa_mask); + + if (sigaction(signal, NULL, previousAction) == -1) + { + ASSERT_UNCONDITIONALLY("Failed to get previous signal handler"); + return false; + } + + if (previousAction->sa_flags & SA_ONSTACK) + { + // If the previous signal handler uses an alternate stack, we need to use it too + // so that when we chain-call the previous handler, it is called on the kind of + // stack it expects. + // We also copy the signal mask to make sure that if some signals were blocked + // from execution on the alternate stack by the previous action, we honor that. + newAction.sa_flags |= SA_ONSTACK; + newAction.sa_mask = previousAction->sa_mask; + } + + if (sigaction(signal, &newAction, previousAction) == -1) + { + ASSERT_UNCONDITIONALLY("Failed to install signal handler"); + return false; + } + + return true; +} + +// Restore original handler for hardware exception signal +void RestoreSignalHandler(int signal_id, struct sigaction *previousAction) +{ + if (-1 == sigaction(signal_id, previousAction, NULL)) + { + ASSERT_UNCONDITIONALLY("RestoreSignalHandler: sigaction() call failed"); + } +} + +// Handler for the SIGSEGV signal +void SIGSEGVHandler(int code, siginfo_t *siginfo, void *context) +{ + bool isHandled = HardwareExceptionHandler(code, siginfo, context, siginfo->si_addr); + if (isHandled) + { + return; + } + + if (g_previousSIGSEGV.sa_sigaction != NULL) + { + g_previousSIGSEGV.sa_sigaction(code, siginfo, context); + } + else + { + // Restore the original or default handler and restart h/w exception + RestoreSignalHandler(code, &g_previousSIGSEGV); + } +} + +// Handler for the SIGFPE signal +void SIGFPEHandler(int code, siginfo_t *siginfo, void *context) +{ + bool isHandled = HardwareExceptionHandler(code, siginfo, context, NULL); + if (isHandled) + { + return; + } + + if (g_previousSIGFPE.sa_sigaction != NULL) + { + g_previousSIGFPE.sa_sigaction(code, siginfo, context); + } + else + { + // Restore the original or default handler and restart h/w exception + RestoreSignalHandler(code, &g_previousSIGFPE); + } +} + +// Initialize hardware exception handling +bool InitializeHardwareExceptionHandling() +{ + if (!AddSignalHandler(SIGSEGV, SIGSEGVHandler, &g_previousSIGSEGV)) + { + return false; + } + + if (!AddSignalHandler(SIGFPE, SIGFPEHandler, &g_previousSIGFPE)) + { + return false; + } + + return true; +} + +// Set CoreRT hardware exception handler +REDHAWK_PALEXPORT void REDHAWK_PALAPI PalSetHardwareExceptionHandler(PHARDWARE_EXCEPTION_HANDLER handler) +{ + ASSERT_MSG(g_hardwareExceptionHandler == NULL, "Hardware exception handler already set") + g_hardwareExceptionHandler = handler; +} diff --git a/src/coreclr/src/nativeaot/Runtime/unix/HardwareExceptions.h b/src/coreclr/src/nativeaot/Runtime/unix/HardwareExceptions.h new file mode 100644 index 0000000000000..8a4a18af1a64d --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/unix/HardwareExceptions.h @@ -0,0 +1,10 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#ifndef __HARDWARE_EXCEPTIONS_H__ +#define __HARDWARE_EXCEPTIONS_H__ + +// Initialize hardware exception handling +bool InitializeHardwareExceptionHandling(); + +#endif // __HARDWARE_EXCEPTIONS_H__ diff --git a/src/coreclr/src/nativeaot/Runtime/unix/PalRedhawkInline.h b/src/coreclr/src/nativeaot/Runtime/unix/PalRedhawkInline.h new file mode 100644 index 0000000000000..bbc0e2b5cbde5 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/unix/PalRedhawkInline.h @@ -0,0 +1,102 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// Implementation of Redhawk PAL inline functions + +#include + +FORCEINLINE Int32 PalInterlockedIncrement(_Inout_ _Interlocked_operand_ Int32 volatile *pDst) +{ + return __sync_add_and_fetch(pDst, 1); +} + +FORCEINLINE Int32 PalInterlockedDecrement(_Inout_ _Interlocked_operand_ Int32 volatile *pDst) +{ + return __sync_sub_and_fetch(pDst, 1); +} + +FORCEINLINE UInt32 PalInterlockedOr(_Inout_ _Interlocked_operand_ UInt32 volatile *pDst, UInt32 iValue) +{ + return __sync_or_and_fetch(pDst, iValue); +} + +FORCEINLINE UInt32 PalInterlockedAnd(_Inout_ _Interlocked_operand_ UInt32 volatile *pDst, UInt32 iValue) +{ + return __sync_and_and_fetch(pDst, iValue); +} + +FORCEINLINE Int32 PalInterlockedExchange(_Inout_ _Interlocked_operand_ Int32 volatile *pDst, Int32 iValue) +{ + return __sync_swap(pDst, iValue); +} + +FORCEINLINE Int64 PalInterlockedExchange64(_Inout_ _Interlocked_operand_ Int64 volatile *pDst, Int64 iValue) +{ + return __sync_swap(pDst, iValue); +} + +FORCEINLINE Int32 PalInterlockedCompareExchange(_Inout_ _Interlocked_operand_ Int32 volatile *pDst, Int32 iValue, Int32 iComparand) +{ + return __sync_val_compare_and_swap(pDst, iComparand, iValue); +} + +FORCEINLINE Int64 PalInterlockedCompareExchange64(_Inout_ _Interlocked_operand_ Int64 volatile *pDst, Int64 iValue, Int64 iComparand) +{ + return __sync_val_compare_and_swap(pDst, iComparand, iValue); +} + +#if defined(HOST_AMD64) || defined(HOST_ARM64) +FORCEINLINE UInt8 PalInterlockedCompareExchange128(_Inout_ _Interlocked_operand_ Int64 volatile *pDst, Int64 iValueHigh, Int64 iValueLow, Int64 *pComparandAndResult) +{ + __int128_t iComparand = ((__int128_t)pComparandAndResult[1] << 64) + (UInt64)pComparandAndResult[0]; + __int128_t iResult = __sync_val_compare_and_swap((__int128_t volatile*)pDst, iComparand, ((__int128_t)iValueHigh << 64) + (UInt64)iValueLow); + pComparandAndResult[0] = (Int64)iResult; pComparandAndResult[1] = (Int64)(iResult >> 64); + return iComparand == iResult; +} +#endif // HOST_AMD64 + +#ifdef HOST_64BIT + +#define PalInterlockedExchangePointer(_pDst, _pValue) \ + ((void *)PalInterlockedExchange64((Int64 volatile *)(_pDst), (Int64)(size_t)(_pValue))) + +#define PalInterlockedCompareExchangePointer(_pDst, _pValue, _pComparand) \ + ((void *)PalInterlockedCompareExchange64((Int64 volatile *)(_pDst), (Int64)(size_t)(_pValue), (Int64)(size_t)(_pComparand))) + +#else // HOST_64BIT + +#define PalInterlockedExchangePointer(_pDst, _pValue) \ + ((void *)PalInterlockedExchange((Int32 volatile *)(_pDst), (Int32)(size_t)(_pValue))) + +#define PalInterlockedCompareExchangePointer(_pDst, _pValue, _pComparand) \ + ((void *)PalInterlockedCompareExchange((Int32 volatile *)(_pDst), (Int32)(size_t)(_pValue), (Int32)(size_t)(_pComparand))) + +#endif // HOST_64BIT + + +FORCEINLINE void PalYieldProcessor() +{ +#if defined(HOST_X86) || defined(HOST_AMD64) + __asm__ __volatile__( + "rep\n" + "nop" + ); +#endif +} + +FORCEINLINE void PalMemoryBarrier() +{ + __sync_synchronize(); +} + +#define PalDebugBreak() abort() + +FORCEINLINE Int32 PalGetLastError() +{ + return errno; +} + +FORCEINLINE void PalSetLastError(Int32 error) +{ + errno = error; +} diff --git a/src/coreclr/src/nativeaot/Runtime/unix/PalRedhawkUnix.cpp b/src/coreclr/src/nativeaot/Runtime/unix/PalRedhawkUnix.cpp new file mode 100644 index 0000000000000..16c5fe81deda0 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/unix/PalRedhawkUnix.cpp @@ -0,0 +1,1344 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// +// Implementation of the Redhawk Platform Abstraction Layer (PAL) library when Unix is the platform. +// + +#include +#include +#include +#include +#include "config.h" +#include "UnixHandle.h" +#include +#include "gcenv.h" +#include "holder.h" +#include "HardwareExceptions.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if HAVE_PTHREAD_GETTHREADID_NP +#include +#endif + +#if HAVE_LWP_SELF +#include +#endif + +#if HAVE_SYSCONF +// already included above +#elif HAVE_SYSCTL +#include +#else +#error Either sysctl or sysconf is required for GetSystemInfo. +#endif + +#if HAVE_SYS_VMPARAM_H +#include +#endif // HAVE_SYS_VMPARAM_H + +#if HAVE_MACH_VM_TYPES_H +#include +#endif // HAVE_MACH_VM_TYPES_H + +#if HAVE_MACH_VM_PARAM_H +#include +#endif // HAVE_MACH_VM_PARAM_H + +#ifdef __APPLE__ +#include +#include +#include +#include +#include +#endif // __APPLE__ + +#if HAVE_MACH_ABSOLUTE_TIME +#include +static mach_timebase_info_data_t s_TimebaseInfo; +#endif + +using std::nullptr_t; + +#ifndef __APPLE__ +#if HAVE_SYSCONF && HAVE__SC_AVPHYS_PAGES +#define SYSCONF_PAGES _SC_AVPHYS_PAGES +#elif HAVE_SYSCONF && HAVE__SC_PHYS_PAGES +#define SYSCONF_PAGES _SC_PHYS_PAGES +#else +#error Dont know how to get page-size on this architecture! +#endif +#endif // __APPLE__ + +#if defined(HOST_ARM) || defined(HOST_ARM64) +#define SYSCONF_GET_NUMPROCS _SC_NPROCESSORS_CONF +#define SYSCONF_GET_NUMPROCS_NAME "_SC_NPROCESSORS_CONF" +#else +#define SYSCONF_GET_NUMPROCS _SC_NPROCESSORS_ONLN +#define SYSCONF_GET_NUMPROCS_NAME "_SC_NPROCESSORS_ONLN" +#endif + +#define PalRaiseFailFastException RaiseFailFastException + +#define FATAL_ASSERT(e, msg) \ + do \ + { \ + if (!(e)) \ + { \ + fprintf(stderr, "FATAL ERROR: " msg); \ + RhFailFast(); \ + } \ + } \ + while(0) + +#define INVALID_HANDLE_VALUE ((HANDLE)(IntNative)-1) + +#define PAGE_NOACCESS 0x01 +#define PAGE_READWRITE 0x04 +#define PAGE_EXECUTE_READ 0x20 +#define PAGE_EXECUTE_READWRITE 0x40 +#define MEM_COMMIT 0x1000 +#define MEM_RESERVE 0x2000 +#define MEM_DECOMMIT 0x4000 +#define MEM_RELEASE 0x8000 + +#define WAIT_OBJECT_0 0 +#define WAIT_TIMEOUT 258 +#define WAIT_FAILED 0xFFFFFFFF + +static const int tccSecondsToMilliSeconds = 1000; +static const int tccSecondsToMicroSeconds = 1000000; +static const int tccSecondsToNanoSeconds = 1000000000; +static const int tccMilliSecondsToMicroSeconds = 1000; +static const int tccMilliSecondsToNanoSeconds = 1000000; +static const int tccMicroSecondsToNanoSeconds = 1000; + +static uint32_t g_dwPALCapabilities; +static UInt32 g_cNumProcs = 0; + +// HACK: the gcenv.h declares OS_PAGE_SIZE as a call instead of a constant, but we need a constant +#undef OS_PAGE_SIZE +#define OS_PAGE_SIZE 0x1000 + +// Helper memory page used by the FlushProcessWriteBuffers +static uint8_t g_helperPage[OS_PAGE_SIZE] __attribute__((aligned(OS_PAGE_SIZE))); + +// Mutex to make the FlushProcessWriteBuffersMutex thread safe +pthread_mutex_t g_flushProcessWriteBuffersMutex; + +bool QueryLogicalProcessorCount(); +bool InitializeFlushProcessWriteBuffers(); + +extern "C" void RaiseFailFastException(PEXCEPTION_RECORD arg1, PCONTEXT arg2, UInt32 arg3) +{ + // Abort aborts the process and causes creation of a crash dump + abort(); +} + +static void TimeSpecAdd(timespec* time, uint32_t milliseconds) +{ + uint64_t nsec = time->tv_nsec + (uint64_t)milliseconds * tccMilliSecondsToNanoSeconds; + if (nsec >= tccSecondsToNanoSeconds) + { + time->tv_sec += nsec / tccSecondsToNanoSeconds; + nsec %= tccSecondsToNanoSeconds; + } + + time->tv_nsec = nsec; +} + +// Convert nanoseconds to the timespec structure +// Parameters: +// nanoseconds - time in nanoseconds to convert +// t - the target timespec structure +static void NanosecondsToTimeSpec(uint64_t nanoseconds, timespec* t) +{ + t->tv_sec = nanoseconds / tccSecondsToNanoSeconds; + t->tv_nsec = nanoseconds % tccSecondsToNanoSeconds; +} + +void ReleaseCondAttr(pthread_condattr_t* condAttr) +{ + int st = pthread_condattr_destroy(condAttr); + ASSERT_MSG(st == 0, "Failed to destroy pthread_condattr_t object"); +} + +class PthreadCondAttrHolder : public Wrapper +{ +public: + PthreadCondAttrHolder(pthread_condattr_t* attrs) + : Wrapper(attrs) + { + } +}; + +class UnixEvent +{ + pthread_cond_t m_condition; + pthread_mutex_t m_mutex; + bool m_manualReset; + bool m_state; + bool m_isValid; + +public: + + UnixEvent(bool manualReset, bool initialState) + : m_manualReset(manualReset), + m_state(initialState), + m_isValid(false) + { + } + + bool Initialize() + { + pthread_condattr_t attrs; + int st = pthread_condattr_init(&attrs); + if (st != 0) + { + ASSERT_UNCONDITIONALLY("Failed to initialize UnixEvent condition attribute"); + return false; + } + + PthreadCondAttrHolder attrsHolder(&attrs); + +#if HAVE_PTHREAD_CONDATTR_SETCLOCK && !HAVE_MACH_ABSOLUTE_TIME + // Ensure that the pthread_cond_timedwait will use CLOCK_MONOTONIC + st = pthread_condattr_setclock(&attrs, CLOCK_MONOTONIC); + if (st != 0) + { + ASSERT_UNCONDITIONALLY("Failed to set UnixEvent condition variable wait clock"); + return false; + } +#endif // HAVE_PTHREAD_CONDATTR_SETCLOCK && !HAVE_MACH_ABSOLUTE_TIME + + st = pthread_mutex_init(&m_mutex, NULL); + if (st != 0) + { + ASSERT_UNCONDITIONALLY("Failed to initialize UnixEvent mutex"); + return false; + } + + st = pthread_cond_init(&m_condition, &attrs); + if (st != 0) + { + ASSERT_UNCONDITIONALLY("Failed to initialize UnixEvent condition variable"); + + st = pthread_mutex_destroy(&m_mutex); + ASSERT_MSG(st == 0, "Failed to destroy UnixEvent mutex"); + return false; + } + + m_isValid = true; + + return true; + } + + bool Destroy() + { + bool success = true; + + if (m_isValid) + { + int st = pthread_mutex_destroy(&m_mutex); + ASSERT_MSG(st == 0, "Failed to destroy UnixEvent mutex"); + success = success && (st == 0); + + st = pthread_cond_destroy(&m_condition); + ASSERT_MSG(st == 0, "Failed to destroy UnixEvent condition variable"); + success = success && (st == 0); + } + + return success; + } + + uint32_t Wait(uint32_t milliseconds) + { + timespec endTime; +#if HAVE_MACH_ABSOLUTE_TIME + uint64_t endMachTime; + if (milliseconds != INFINITE) + { + uint64_t nanoseconds = (uint64_t)milliseconds * tccMilliSecondsToNanoSeconds; + NanosecondsToTimeSpec(nanoseconds, &endTime); + endMachTime = mach_absolute_time() + nanoseconds * s_TimebaseInfo.denom / s_TimebaseInfo.numer; + } +#elif HAVE_PTHREAD_CONDATTR_SETCLOCK + if (milliseconds != INFINITE) + { + clock_gettime(CLOCK_MONOTONIC, &endTime); + TimeSpecAdd(&endTime, milliseconds); + } +#else +#error Don't know how to perform timed wait on this platform +#endif + + int st = 0; + + pthread_mutex_lock(&m_mutex); + while (!m_state) + { + if (milliseconds == INFINITE) + { + st = pthread_cond_wait(&m_condition, &m_mutex); + } + else + { +#if HAVE_MACH_ABSOLUTE_TIME + // Since OSX doesn't support CLOCK_MONOTONIC, we use relative variant of the + // timed wait and we need to handle spurious wakeups properly. + st = pthread_cond_timedwait_relative_np(&m_condition, &m_mutex, &endTime); + if ((st == 0) && !m_state) + { + uint64_t machTime = mach_absolute_time(); + if (machTime < endMachTime) + { + // The wake up was spurious, recalculate the relative endTime + uint64_t remainingNanoseconds = (endMachTime - machTime) * s_TimebaseInfo.numer / s_TimebaseInfo.denom; + NanosecondsToTimeSpec(remainingNanoseconds, &endTime); + } + else + { + // Although the timed wait didn't report a timeout, time calculated from the + // mach time shows we have already reached the end time. It can happen if + // the wait was spuriously woken up right before the timeout. + st = ETIMEDOUT; + } + } +#else // HAVE_MACH_ABSOLUTE_TIME + st = pthread_cond_timedwait(&m_condition, &m_mutex, &endTime); +#endif // HAVE_MACH_ABSOLUTE_TIME + // Verify that if the wait timed out, the event was not set + ASSERT((st != ETIMEDOUT) || !m_state); + } + + if (st != 0) + { + // wait failed or timed out + break; + } + } + + if ((st == 0) && !m_manualReset) + { + // Clear the state for auto-reset events so that only one waiter gets released + m_state = false; + } + + pthread_mutex_unlock(&m_mutex); + + uint32_t waitStatus; + + if (st == 0) + { + waitStatus = WAIT_OBJECT_0; + } + else if (st == ETIMEDOUT) + { + waitStatus = WAIT_TIMEOUT; + } + else + { + waitStatus = WAIT_FAILED; + } + + return waitStatus; + } + + void Set() + { + pthread_mutex_lock(&m_mutex); + m_state = true; + pthread_mutex_unlock(&m_mutex); + + // Unblock all threads waiting for the condition variable + pthread_cond_broadcast(&m_condition); + } + + void Reset() + { + pthread_mutex_lock(&m_mutex); + m_state = false; + pthread_mutex_unlock(&m_mutex); + } +}; + +class EventUnixHandle : public UnixHandle +{ +public: + EventUnixHandle(UnixEvent event) + : UnixHandle(event) + { + } + + virtual bool Destroy() + { + return m_object.Destroy(); + } +}; + +typedef UnixHandle ThreadUnixHandle; + +#if !HAVE_THREAD_LOCAL +extern "C" int __cxa_thread_atexit(void (*)(void*), void*, void *); +extern "C" void *__dso_handle; +#endif + +// This functions configures behavior of the signals that are not +// related to hardware exception handling. +void ConfigureSignals() +{ + // The default action for SIGPIPE is process termination. + // Since SIGPIPE can be signaled when trying to write on a socket for which + // the connection has been dropped, we need to tell the system we want + // to ignore this signal. + // Instead of terminating the process, the system call which would had + // issued a SIGPIPE will, instead, report an error and set errno to EPIPE. + signal(SIGPIPE, SIG_IGN); +} + +// The Redhawk PAL must be initialized before any of its exports can be called. Returns true for a successful +// initialization and false on failure. +REDHAWK_PALEXPORT bool REDHAWK_PALAPI PalInit() +{ + g_dwPALCapabilities = 0; + + if (!QueryLogicalProcessorCount()) + return false; + +#if HAVE_MACH_ABSOLUTE_TIME + kern_return_t machRet; + if ((machRet = mach_timebase_info(&s_TimebaseInfo)) != KERN_SUCCESS) + { + return false; + } +#endif + + if (!InitializeFlushProcessWriteBuffers()) + { + return false; + } +#ifndef USE_PORTABLE_HELPERS + if (!InitializeHardwareExceptionHandling()) + { + return false; + } +#endif // !USE_PORTABLE_HELPERS + + ConfigureSignals(); + + return true; +} + +// Given a mask of capabilities return true if all of them are supported by the current PAL. +REDHAWK_PALEXPORT bool REDHAWK_PALAPI PalHasCapability(PalCapability capability) +{ + return (g_dwPALCapabilities & (uint32_t)capability) == (uint32_t)capability; +} + +#if HAVE_THREAD_LOCAL + +struct TlsDestructionMonitor +{ + void* m_thread = nullptr; + + void SetThread(void* thread) + { + m_thread = thread; + } + + ~TlsDestructionMonitor() + { + if (m_thread != nullptr) + { + RuntimeThreadShutdown(m_thread); + } + } +}; + +// This thread local object is used to detect thread shutdown. Its destructor +// is called when a thread is being shut down. +thread_local TlsDestructionMonitor tls_destructionMonitor; + +#endif // HAVE_THREAD_LOCAL + +// This thread local variable is used for delegate marshalling +DECLSPEC_THREAD intptr_t tls_thunkData; + +// Attach thread to PAL. +// It can be called multiple times for the same thread. +// It fails fast if a different thread was already registered. +// Parameters: +// thread - thread to attach +extern "C" void PalAttachThread(void* thread) +{ +#if HAVE_THREAD_LOCAL + tls_destructionMonitor.SetThread(thread); +#else + __cxa_thread_atexit(RuntimeThreadShutdown, thread, &__dso_handle); +#endif +} + +// Detach thread from PAL. +// It fails fast if some other thread value was attached to PAL. +// Parameters: +// thread - thread to detach +// Return: +// true if the thread was detached, false if there was no attached thread +extern "C" bool PalDetachThread(void* thread) +{ + UNREFERENCED_PARAMETER(thread); + if (g_threadExitCallback != nullptr) + { + g_threadExitCallback(); + } + return true; +} + +#if !defined(USE_PORTABLE_HELPERS) && !defined(FEATURE_RX_THUNKS) +REDHAWK_PALEXPORT UInt32_BOOL REDHAWK_PALAPI PalAllocateThunksFromTemplate(HANDLE hTemplateModule, uint32_t templateRva, size_t templateSize, void** newThunksOut) +{ + PORTABILITY_ASSERT("UNIXTODO: Implement this function"); +} + +REDHAWK_PALEXPORT UInt32_BOOL REDHAWK_PALAPI PalFreeThunksFromTemplate(void *pBaseAddress) +{ + PORTABILITY_ASSERT("UNIXTODO: Implement this function"); +} +#endif // !USE_PORTABLE_HELPERS && !FEATURE_RX_THUNKS + +REDHAWK_PALEXPORT UInt32_BOOL REDHAWK_PALAPI PalMarkThunksAsValidCallTargets( + void *virtualAddress, + int thunkSize, + int thunksPerBlock, + int thunkBlockSize, + int thunkBlocksPerMapping) +{ + return UInt32_TRUE; +} + +REDHAWK_PALEXPORT void REDHAWK_PALAPI PalSleep(uint32_t milliseconds) +{ +#if HAVE_CLOCK_NANOSLEEP + timespec endTime; + clock_gettime(CLOCK_MONOTONIC, &endTime); + TimeSpecAdd(&endTime, milliseconds); + while (clock_nanosleep(CLOCK_MONOTONIC, TIMER_ABSTIME, &endTime, NULL) == EINTR) + { + } +#else // HAVE_CLOCK_NANOSLEEP + timespec requested; + requested.tv_sec = milliseconds / tccSecondsToMilliSeconds; + requested.tv_nsec = (milliseconds - requested.tv_sec * tccSecondsToMilliSeconds) * tccMilliSecondsToNanoSeconds; + + timespec remaining; + while (nanosleep(&requested, &remaining) == EINTR) + { + requested = remaining; + } +#endif // HAVE_CLOCK_NANOSLEEP +} + +REDHAWK_PALEXPORT UInt32_BOOL REDHAWK_PALAPI __stdcall PalSwitchToThread() +{ + // sched_yield yields to another thread in the current process. This implementation + // won't work well for cross-process synchronization. + return sched_yield() == 0; +} + +extern "C" UInt32_BOOL CloseHandle(HANDLE handle) +{ + if ((handle == NULL) || (handle == INVALID_HANDLE_VALUE)) + { + return UInt32_FALSE; + } + + UnixHandleBase* handleBase = (UnixHandleBase*)handle; + + bool success = handleBase->Destroy(); + + delete handleBase; + + return success ? UInt32_TRUE : UInt32_FALSE; +} + +REDHAWK_PALEXPORT HANDLE REDHAWK_PALAPI PalCreateEventW(_In_opt_ LPSECURITY_ATTRIBUTES pEventAttributes, UInt32_BOOL manualReset, UInt32_BOOL initialState, _In_opt_z_ const wchar_t* pName) +{ + UnixEvent event = UnixEvent(manualReset, initialState); + if (!event.Initialize()) + { + return INVALID_HANDLE_VALUE; + } + + EventUnixHandle* handle = new (nothrow) EventUnixHandle(event); + + if (handle == NULL) + { + return INVALID_HANDLE_VALUE; + } + + return handle; +} + +typedef UInt32(__stdcall *BackgroundCallback)(_In_opt_ void* pCallbackContext); + +REDHAWK_PALEXPORT bool REDHAWK_PALAPI PalStartBackgroundWork(_In_ BackgroundCallback callback, _In_opt_ void* pCallbackContext, UInt32_BOOL highPriority) +{ +#ifdef HOST_WASM + // No threads, so we can't start one + ASSERT(false); +#endif // HOST_WASM + pthread_attr_t attrs; + + int st = pthread_attr_init(&attrs); + ASSERT(st == 0); + + static const int NormalPriority = 0; + static const int HighestPriority = -20; + + // TODO: Figure out which scheduler to use, the default one doesn't seem to + // support per thread priorities. +#if 0 + sched_param params; + memset(¶ms, 0, sizeof(params)); + + params.sched_priority = highPriority ? HighestPriority : NormalPriority; + + // Set the priority of the thread + st = pthread_attr_setschedparam(&attrs, ¶ms); + ASSERT(st == 0); +#endif + // Create the thread as detached, that means not joinable + st = pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED); + ASSERT(st == 0); + + pthread_t threadId; + st = pthread_create(&threadId, &attrs, (void *(*)(void*))callback, pCallbackContext); + + int st2 = pthread_attr_destroy(&attrs); + ASSERT(st2 == 0); + + return st == 0; +} + +REDHAWK_PALEXPORT bool REDHAWK_PALAPI PalStartBackgroundGCThread(_In_ BackgroundCallback callback, _In_opt_ void* pCallbackContext) +{ + return PalStartBackgroundWork(callback, pCallbackContext, UInt32_FALSE); +} + +REDHAWK_PALEXPORT bool REDHAWK_PALAPI PalStartFinalizerThread(_In_ BackgroundCallback callback, _In_opt_ void* pCallbackContext) +{ +#ifdef HOST_WASM + // WASMTODO: No threads so we can't start the finalizer thread + return true; +#else // HOST_WASM + return PalStartBackgroundWork(callback, pCallbackContext, UInt32_TRUE); +#endif // HOST_WASM +} + +// Returns a 64-bit tick count with a millisecond resolution. It tries its best +// to return monotonically increasing counts and avoid being affected by changes +// to the system clock (either due to drift or due to explicit changes to system +// time). +REDHAWK_PALEXPORT UInt64 REDHAWK_PALAPI PalGetTickCount64() +{ + UInt64 retval = 0; + +#if HAVE_MACH_ABSOLUTE_TIME + { + retval = (mach_absolute_time() * s_TimebaseInfo.numer / s_TimebaseInfo.denom) / tccMilliSecondsToNanoSeconds; + } +#elif HAVE_CLOCK_MONOTONIC + { + clockid_t clockType = +#if HAVE_CLOCK_MONOTONIC_COARSE + CLOCK_MONOTONIC_COARSE; // good enough resolution, fastest speed +#else + CLOCK_MONOTONIC; +#endif + struct timespec ts; + if (clock_gettime(clockType, &ts) == 0) + { + retval = (ts.tv_sec * tccSecondsToMilliSeconds) + (ts.tv_nsec / tccMilliSecondsToNanoSeconds); + } + else + { + ASSERT_UNCONDITIONALLY("clock_gettime(CLOCK_MONOTONIC) failed\n"); + } + } +#else + { + struct timeval tv; + if (gettimeofday(&tv, NULL) == 0) + { + retval = (tv.tv_sec * tccSecondsToMilliSeconds) + (tv.tv_usec / tccMilliSecondsToMicroSeconds); + } + else + { + ASSERT_UNCONDITIONALLY("gettimeofday() failed\n"); + } + } +#endif + + return retval; +} + +REDHAWK_PALEXPORT UInt32 REDHAWK_PALAPI PalGetTickCount() +{ + return (UInt32)PalGetTickCount64(); +} + +REDHAWK_PALEXPORT HANDLE REDHAWK_PALAPI PalGetModuleHandleFromPointer(_In_ void* pointer) +{ + HANDLE moduleHandle = NULL; + + // Emscripten's implementation of dladdr corrupts memory, + // but always returns 0 for the module handle, so just skip the call +#if !defined(HOST_WASM) + Dl_info info; + int st = dladdr(pointer, &info); + if (st != 0) + { + moduleHandle = info.dli_fbase; + } +#endif //!defined(HOST_WASM) + + return moduleHandle; +} + +REDHAWK_PALEXPORT bool REDHAWK_PALAPI PalIsAvxEnabled() +{ + return true; +} + +REDHAWK_PALEXPORT void PalPrintFatalError(const char* message) +{ + // Write the message using lowest-level OS API available. This is used to print the stack overflow + // message, so there is not much that can be done here. + write(STDERR_FILENO, message, sizeof(message)); +} + +bool QueryLogicalProcessorCount() +{ +#if HAVE_SYSCONF + g_cNumProcs = sysconf(SYSCONF_GET_NUMPROCS); + if (g_cNumProcs < 1) + { + ASSERT_UNCONDITIONALLY("sysconf failed for " SYSCONF_GET_NUMPROCS_NAME "\n"); + return false; + } +#elif HAVE_SYSCTL + size_t sz = sizeof(g_cNumProcs); + + int st = 0; + if (sysctlbyname("hw.logicalcpu_max", &g_cNumProcs, &sz, NULL, 0) != 0) + { + ASSERT_UNCONDITIONALLY("sysctl failed for hw.logicalcpu_max\n"); + return false; + } + +#endif // HAVE_SYSCONF + + return true; +} + +static int W32toUnixAccessControl(uint32_t flProtect) +{ + int prot = 0; + + switch (flProtect & 0xff) + { + case PAGE_NOACCESS: + prot = PROT_NONE; + break; + case PAGE_READWRITE: + prot = PROT_READ | PROT_WRITE; + break; + case PAGE_EXECUTE_READ: + prot = PROT_READ | PROT_EXEC; + break; + case PAGE_EXECUTE_READWRITE: + prot = PROT_READ | PROT_WRITE | PROT_EXEC; + break; + default: + ASSERT(false); + break; + } + return prot; +} + +REDHAWK_PALEXPORT _Ret_maybenull_ _Post_writable_byte_size_(size) void* REDHAWK_PALAPI PalVirtualAlloc(_In_opt_ void* pAddress, size_t size, uint32_t allocationType, uint32_t protect) +{ + // TODO: thread safety! + + if ((allocationType & ~(MEM_RESERVE | MEM_COMMIT)) != 0) + { + // TODO: Implement + return NULL; + } + + ASSERT(((size_t)pAddress & (OS_PAGE_SIZE - 1)) == 0); + + // Align size to whole pages + size = (size + (OS_PAGE_SIZE - 1)) & ~(OS_PAGE_SIZE - 1); + int unixProtect = W32toUnixAccessControl(protect); + + if (allocationType & (MEM_RESERVE | MEM_COMMIT)) + { + // For Windows compatibility, let the PalVirtualAlloc reserve memory with 64k alignment. + static const size_t Alignment = 64 * 1024; + + size_t alignedSize = size + (Alignment - OS_PAGE_SIZE); + + void * pRetVal = mmap(pAddress, alignedSize, unixProtect, MAP_ANON | MAP_PRIVATE, -1, 0); + + if (pRetVal != NULL) + { + void * pAlignedRetVal = (void *)(((size_t)pRetVal + (Alignment - 1)) & ~(Alignment - 1)); + size_t startPadding = (size_t)pAlignedRetVal - (size_t)pRetVal; + if (startPadding != 0) + { + int ret = munmap(pRetVal, startPadding); + ASSERT(ret == 0); + } + + size_t endPadding = alignedSize - (startPadding + size); + if (endPadding != 0) + { + int ret = munmap((void *)((size_t)pAlignedRetVal + size), endPadding); + ASSERT(ret == 0); + } + + pRetVal = pAlignedRetVal; + } + + return pRetVal; + } + + if (allocationType & MEM_COMMIT) + { + int ret = mprotect(pAddress, size, unixProtect); + return (ret == 0) ? pAddress : NULL; + } + + return NULL; +} + +REDHAWK_PALEXPORT UInt32_BOOL REDHAWK_PALAPI PalVirtualFree(_In_ void* pAddress, size_t size, uint32_t freeType) +{ + ASSERT(((freeType & MEM_RELEASE) != MEM_RELEASE) || size == 0); + ASSERT((freeType & (MEM_RELEASE | MEM_DECOMMIT)) != (MEM_RELEASE | MEM_DECOMMIT)); + ASSERT(freeType != 0); + + // UNIXTODO: Implement this function + return UInt32_TRUE; +} + +REDHAWK_PALEXPORT UInt32_BOOL REDHAWK_PALAPI PalVirtualProtect(_In_ void* pAddress, size_t size, uint32_t protect) +{ + int unixProtect = W32toUnixAccessControl(protect); + + return mprotect(pAddress, size, unixProtect) == 0; +} + +REDHAWK_PALEXPORT _Ret_maybenull_ void* REDHAWK_PALAPI PalSetWerDataBuffer(_In_ void* pNewBuffer) +{ + static void* pBuffer; + return _InterlockedExchangePointer(&pBuffer, pNewBuffer); +} + +extern "C" HANDLE GetCurrentProcess() +{ + return (HANDLE)-1; +} + +extern "C" uint32_t GetCurrentProcessId() +{ + return getpid(); +} + +extern "C" HANDLE GetCurrentThread() +{ + return (HANDLE)-2; +} + +extern "C" UInt32_BOOL DuplicateHandle( + HANDLE hSourceProcessHandle, + HANDLE hSourceHandle, + HANDLE hTargetProcessHandle, + HANDLE * lpTargetHandle, + UInt32 dwDesiredAccess, + UInt32_BOOL bInheritHandle, + UInt32 dwOptions) +{ + // We can only duplicate the current thread handle. That is all that the MRT uses. + ASSERT(hSourceProcessHandle == GetCurrentProcess()); + ASSERT(hTargetProcessHandle == GetCurrentProcess()); + ASSERT(hSourceHandle == GetCurrentThread()); + *lpTargetHandle = new (nothrow) ThreadUnixHandle(pthread_self()); + + return lpTargetHandle != nullptr; +} + +extern "C" UInt32_BOOL InitializeCriticalSection(CRITICAL_SECTION * lpCriticalSection) +{ + return pthread_mutex_init(&lpCriticalSection->mutex, NULL) == 0; +} + +extern "C" UInt32_BOOL InitializeCriticalSectionEx(CRITICAL_SECTION * lpCriticalSection, UInt32 arg2, UInt32 arg3) +{ + return InitializeCriticalSection(lpCriticalSection); +} + + +extern "C" void DeleteCriticalSection(CRITICAL_SECTION * lpCriticalSection) +{ + pthread_mutex_destroy(&lpCriticalSection->mutex); +} + +extern "C" void EnterCriticalSection(CRITICAL_SECTION * lpCriticalSection) +{ + pthread_mutex_lock(&lpCriticalSection->mutex);; +} + +extern "C" void LeaveCriticalSection(CRITICAL_SECTION * lpCriticalSection) +{ + pthread_mutex_unlock(&lpCriticalSection->mutex); +} + +extern "C" UInt32_BOOL IsDebuggerPresent() +{ +#ifdef HOST_WASM + // For now always true since the browser will handle it in case of WASM. + return UInt32_TRUE; +#else + // UNIXTODO: Implement this function + return UInt32_FALSE; +#endif +} + +extern "C" void TerminateProcess(HANDLE arg1, UInt32 arg2) +{ + // TODO: change it to TerminateCurrentProcess + // Then if we modified the signature of the DuplicateHandle too, we can + // get rid of the PalGetCurrentProcess. + PORTABILITY_ASSERT("UNIXTODO: Implement this function"); +} + +extern "C" UInt32_BOOL SetEvent(HANDLE event) +{ + EventUnixHandle* unixHandle = (EventUnixHandle*)event; + unixHandle->GetObject()->Set(); + + return UInt32_TRUE; +} + +extern "C" UInt32_BOOL ResetEvent(HANDLE event) +{ + EventUnixHandle* unixHandle = (EventUnixHandle*)event; + unixHandle->GetObject()->Reset(); + + return UInt32_TRUE; +} + +extern "C" UInt32 GetEnvironmentVariableA(const char * name, char * buffer, UInt32 size) +{ + // Using std::getenv instead of getenv since it is guaranteed to be thread safe w.r.t. other + // std::getenv calls in C++11 + const char* value = std::getenv(name); + if (value == NULL) + { + return 0; + } + + size_t valueLen = strlen(value); + + if (valueLen < size) + { + strcpy(buffer, value); + return valueLen; + } + + // return required size including the null character or 0 if the size doesn't fit into UInt32 + return (valueLen < UINT32_MAX) ? (valueLen + 1) : 0; +} + +extern "C" UInt16 RtlCaptureStackBackTrace(UInt32 arg1, UInt32 arg2, void* arg3, UInt32* arg4) +{ + // UNIXTODO: Implement this function + return 0; +} + +typedef UInt32 (__stdcall *HijackCallback)(HANDLE hThread, _In_ PAL_LIMITED_CONTEXT* pThreadContext, _In_opt_ void* pCallbackContext); + +REDHAWK_PALEXPORT UInt32 REDHAWK_PALAPI PalHijack(HANDLE hThread, _In_ HijackCallback callback, _In_opt_ void* pCallbackContext) +{ + // UNIXTODO: Implement PalHijack + return E_FAIL; +} + +extern "C" UInt32 WaitForSingleObjectEx(HANDLE handle, UInt32 milliseconds, UInt32_BOOL alertable) +{ + // The handle can only represent an event here + // TODO: encapsulate this stuff + UnixHandleBase* handleBase = (UnixHandleBase*)handle; + ASSERT(handleBase->GetType() == UnixHandleType::Event); + EventUnixHandle* unixHandle = (EventUnixHandle*)handleBase; + + return unixHandle->GetObject()->Wait(milliseconds); +} + +REDHAWK_PALEXPORT uint32_t REDHAWK_PALAPI PalCompatibleWaitAny(UInt32_BOOL alertable, uint32_t timeout, uint32_t handleCount, HANDLE* pHandles, UInt32_BOOL allowReentrantWait) +{ + // Only a single handle wait for event is supported + ASSERT(handleCount == 1); + + return WaitForSingleObjectEx(pHandles[0], timeout, alertable); +} + +#ifndef __has_builtin +#define __has_builtin(x) 0 +#endif + +#if !__has_builtin(_mm_pause) +extern "C" void _mm_pause() +// Defined for implementing PalYieldProcessor in PalRedhawk.h +{ +#if defined(HOST_AMD64) || defined(HOST_X86) + __asm__ volatile ("pause"); +#endif +} +#endif + +extern "C" Int32 _stricmp(const char *string1, const char *string2) +{ + return strcasecmp(string1, string2); +} + +REDHAWK_PALEXPORT Int32 PalGetProcessCpuCount() +{ + return g_cNumProcs; +} + +//Reads the entire contents of the file into the specified buffer, buff +//returns the number of bytes read if the file is successfully read +//returns 0 if the file is not found, size is greater than maxBytesToRead or the file couldn't be opened or read +REDHAWK_PALEXPORT UInt32 PalReadFileContents(_In_z_ const TCHAR* fileName, _Out_writes_all_(maxBytesToRead) char* buff, _In_ UInt32 maxBytesToRead) +{ + int fd = open(fileName, O_RDONLY); + if (fd < 0) + { + return 0; + } + + + UInt32 bytesRead = 0; + struct stat fileStats; + if ((fstat(fd, &fileStats) == 0) && (fileStats.st_size <= maxBytesToRead)) + { + bytesRead = read(fd, buff, fileStats.st_size); + } + + close(fd); + + return bytesRead; +} + +__thread void* pStackHighOut = NULL; +__thread void* pStackLowOut = NULL; + +// Retrieves the entire range of memory dedicated to the calling thread's stack. This does +// not get the current dynamic bounds of the stack, which can be significantly smaller than +// the maximum bounds. +REDHAWK_PALEXPORT bool PalGetMaximumStackBounds(_Out_ void** ppStackLowOut, _Out_ void** ppStackHighOut) +{ + if (pStackHighOut == NULL) + { +#ifdef __APPLE__ + // This is a Mac specific method + pStackHighOut = pthread_get_stackaddr_np(pthread_self()); + pStackLowOut = ((uint8_t *)pStackHighOut - pthread_get_stacksize_np(pthread_self())); +#else // __APPLE__ + pthread_attr_t attr; + size_t stackSize; + int status; + + pthread_t thread = pthread_self(); + + status = pthread_attr_init(&attr); + ASSERT_MSG(status == 0, "pthread_attr_init call failed"); + +#if HAVE_PTHREAD_ATTR_GET_NP + status = pthread_attr_get_np(thread, &attr); +#elif HAVE_PTHREAD_GETATTR_NP + status = pthread_getattr_np(thread, &attr); +#else +#error Dont know how to get thread attributes on this platform! +#endif + ASSERT_MSG(status == 0, "pthread_getattr_np call failed"); + + status = pthread_attr_getstack(&attr, &pStackLowOut, &stackSize); + ASSERT_MSG(status == 0, "pthread_attr_getstack call failed"); + + status = pthread_attr_destroy(&attr); + ASSERT_MSG(status == 0, "pthread_attr_destroy call failed"); + + pStackHighOut = (uint8_t*)pStackLowOut + stackSize; +#endif // __APPLE__ + } + + *ppStackLowOut = pStackLowOut; + *ppStackHighOut = pStackHighOut; + + return true; +} + +// retrieves the full path to the specified module, if moduleBase is NULL retreieves the full path to the +// executable module of the current process. +// +// Return value: number of characters in name string +// +REDHAWK_PALEXPORT Int32 PalGetModuleFileName(_Out_ const TCHAR** pModuleNameOut, HANDLE moduleBase) +{ +#if defined(HOST_WASM) + // Emscripten's implementation of dladdr corrupts memory and doesn't have the real name, so make up a name instead + const TCHAR* wasmModuleName = "WebAssemblyModule"; + *pModuleNameOut = wasmModuleName; + return strlen(wasmModuleName); +#else // HOST_WASM + Dl_info dl; + if (dladdr(moduleBase, &dl) == 0) + { + *pModuleNameOut = NULL; + return 0; + } + + *pModuleNameOut = dl.dli_fname; + return strlen(dl.dli_fname); +#endif // defined(HOST_WASM) +} + +GCSystemInfo g_RhSystemInfo; + +// Initialize the g_SystemInfo +bool InitializeSystemInfo() +{ + long pagesize = getpagesize(); + g_RhSystemInfo.dwPageSize = pagesize; + g_RhSystemInfo.dwAllocationGranularity = pagesize; + + int nrcpus = 0; + +#if HAVE_SYSCONF + nrcpus = sysconf(SYSCONF_GET_NUMPROCS); + if (nrcpus < 1) + { + ASSERT_UNCONDITIONALLY("sysconf failed for " SYSCONF_GET_NUMPROCS_NAME "\n"); + return false; + } +#elif HAVE_SYSCTL + int mib[2]; + + size_t sz = sizeof(nrcpus); + mib[0] = CTL_HW; + mib[1] = HW_NCPU; + int rc = sysctl(mib, 2, &nrcpus, &sz, NULL, 0); + if (rc != 0) + { + ASSERT_UNCONDITIONALLY("sysctl failed for HW_NCPU\n"); + return false; + } +#endif // HAVE_SYSCONF + + g_RhSystemInfo.dwNumberOfProcessors = nrcpus; + + return true; +} + +// This function initializes data structures needed for the FlushProcessWriteBuffers +// Return: +// true if it succeeded, false otherwise +bool InitializeFlushProcessWriteBuffers() +{ + // Verify that the s_helperPage is really aligned to the g_SystemInfo.dwPageSize + ASSERT((((size_t)g_helperPage) & (OS_PAGE_SIZE - 1)) == 0); + + // Locking the page ensures that it stays in memory during the two mprotect + // calls in the FlushProcessWriteBuffers below. If the page was unmapped between + // those calls, they would not have the expected effect of generating IPI. + int status = mlock(g_helperPage, OS_PAGE_SIZE); + + if (status != 0) + { + return false; + } + + status = pthread_mutex_init(&g_flushProcessWriteBuffersMutex, NULL); + if (status != 0) + { + munlock(g_helperPage, OS_PAGE_SIZE); + } + + return status == 0; +} + +extern "C" void FlushProcessWriteBuffers() +{ + int status = pthread_mutex_lock(&g_flushProcessWriteBuffersMutex); + FATAL_ASSERT(status == 0, "Failed to lock the flushProcessWriteBuffersMutex lock"); + + // Changing a helper memory page protection from read / write to no access + // causes the OS to issue IPI to flush TLBs on all processors. This also + // results in flushing the processor buffers. + status = mprotect(g_helperPage, OS_PAGE_SIZE, PROT_READ | PROT_WRITE); + FATAL_ASSERT(status == 0, "Failed to change helper page protection to read / write"); + + // Ensure that the page is dirty before we change the protection so that + // we prevent the OS from skipping the global TLB flush. + __sync_add_and_fetch((size_t*)g_helperPage, 1); + + status = mprotect(g_helperPage, OS_PAGE_SIZE, PROT_NONE); + FATAL_ASSERT(status == 0, "Failed to change helper page protection to no access"); + + status = pthread_mutex_unlock(&g_flushProcessWriteBuffersMutex); + FATAL_ASSERT(status == 0, "Failed to unlock the flushProcessWriteBuffersMutex lock"); +} + +static const int64_t SECS_BETWEEN_1601_AND_1970_EPOCHS = 11644473600LL; +static const int64_t SECS_TO_100NS = 10000000; /* 10^7 */ + +extern "C" void GetSystemTimeAsFileTime(FILETIME *lpSystemTimeAsFileTime) +{ + struct timeval time = { 0 }; + gettimeofday(&time, NULL); + + int64_t result = ((int64_t)time.tv_sec + SECS_BETWEEN_1601_AND_1970_EPOCHS) * SECS_TO_100NS + + (time.tv_usec * 10); + + lpSystemTimeAsFileTime->dwLowDateTime = (uint32_t)result; + lpSystemTimeAsFileTime->dwHighDateTime = (uint32_t)(result >> 32); +} + +extern "C" UInt32_BOOL QueryPerformanceCounter(LARGE_INTEGER *lpPerformanceCount) +{ + // TODO: More efficient, platform-specific implementation + struct timeval tv; + if (gettimeofday(&tv, NULL) == -1) + { + ASSERT_UNCONDITIONALLY("gettimeofday() failed"); + return UInt32_FALSE; + } + lpPerformanceCount->QuadPart = + (int64_t) tv.tv_sec * (int64_t) tccSecondsToMicroSeconds + (int64_t) tv.tv_usec; + return UInt32_TRUE; +} + +extern "C" UInt32_BOOL QueryPerformanceFrequency(LARGE_INTEGER *lpFrequency) +{ + lpFrequency->QuadPart = (int64_t) tccSecondsToMicroSeconds; + return UInt32_TRUE; +} + +extern "C" UInt64 PalGetCurrentThreadIdForLogging() +{ +#if defined(__linux__) + return (uint64_t)syscall(SYS_gettid); +#elif defined(__APPLE__) + uint64_t tid; + pthread_threadid_np(pthread_self(), &tid); + return (uint64_t)tid; +#elif HAVE_PTHREAD_GETTHREADID_NP + return (uint64_t)pthread_getthreadid_np(); +#elif HAVE_LWP_SELF + return (uint64_t)_lwp_self(); +#else + // Fallback in case we don't know how to get integer thread id on the current platform + return (uint64_t)pthread_self(); +#endif +} + +#if defined(HOST_X86) || defined(HOST_AMD64) +REDHAWK_PALEXPORT uint32_t REDHAWK_PALAPI getcpuid(uint32_t arg, unsigned char result[16]) +{ + DWORD eax; +#if defined(HOST_X86) + __asm(" xor %%ecx, %%ecx\n" \ + " cpuid\n" \ + " mov %%eax, 0(%[result])\n" \ + " mov %%ebx, 4(%[result])\n" \ + " mov %%ecx, 8(%[result])\n" \ + " mov %%edx, 12(%[result])\n" \ + : "=a"(eax) /*output in eax*/\ + : "a"(arg), [result]"r"(result) /*inputs - arg in eax, result in any register*/\ + : "ebx", "ecx", "edx", "memory" /* registers that are clobbered, *result is clobbered */ + ); +#endif // defined(HOST_X86) +#if defined(HOST_AMD64) + __asm(" xor %%ecx, %%ecx\n" \ + " cpuid\n" \ + " mov %%eax, 0(%[result])\n" \ + " mov %%ebx, 4(%[result])\n" \ + " mov %%ecx, 8(%[result])\n" \ + " mov %%edx, 12(%[result])\n" \ + : "=a"(eax) /*output in eax*/\ + : "a"(arg), [result]"r"(result) /*inputs - arg in eax, result in any register*/\ + : "rbx", "ecx", "edx", "memory" /* registers that are clobbered, *result is clobbered */ + ); +#endif // defined(HOST_AMD64) + return eax; +} + +REDHAWK_PALEXPORT uint32_t REDHAWK_PALAPI getextcpuid(uint32_t arg1, uint32_t arg2, unsigned char result[16]) +{ + DWORD eax; +#if defined(HOST_X86) + DWORD ecx; + __asm(" cpuid\n" \ + " mov %%eax, 0(%[result])\n" \ + " mov %%ebx, 4(%[result])\n" \ + " mov %%ecx, 8(%[result])\n" \ + " mov %%edx, 12(%[result])\n" \ + : "=a"(eax), "=c"(ecx) /*output in eax, ecx is rewritten*/\ + : "c"(arg1), "a"(arg2), [result]"r"(result) /*inputs - arg1 in ecx, arg2 in eax, result in any register*/\ + : "ebx", "edx", "memory" /* registers that are clobbered, *result is clobbered */ + ); +#endif // defined(HOST_X86) +#if defined(HOST_AMD64) + __asm(" cpuid\n" \ + " mov %%eax, 0(%[result])\n" \ + " mov %%ebx, 4(%[result])\n" \ + " mov %%ecx, 8(%[result])\n" \ + " mov %%edx, 12(%[result])\n" \ + : "=a"(eax) /*output in eax*/\ + : "c"(arg1), "a"(arg2), [result]"r"(result) /*inputs - arg1 in ecx, arg2 in eax, result in any register*/\ + : "rbx", "edx", "memory" /* registers that are clobbered, *result is clobbered */ + ); +#endif // defined(HOST_AMD64) + return eax; +} + +REDHAWK_PALEXPORT uint32_t REDHAWK_PALAPI xmmYmmStateSupport() +{ + DWORD eax; + __asm(" xgetbv\n" \ + : "=a"(eax) /*output in eax*/\ + : "c"(0) /*inputs - 0 in ecx*/\ + : "edx" /* registers that are clobbered*/ + ); + // check OS has enabled both XMM and YMM state support + return ((eax & 0x06) == 0x06) ? 1 : 0; +} +#endif // defined(HOST_X86) || defined(HOST_AMD64) diff --git a/src/coreclr/src/nativeaot/Runtime/unix/UnixContext.cpp b/src/coreclr/src/nativeaot/Runtime/unix/UnixContext.cpp new file mode 100644 index 0000000000000..458214bbe56f7 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/unix/UnixContext.cpp @@ -0,0 +1,636 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include "common.h" + +#include "CommonTypes.h" +#include "CommonMacros.h" +#include "daccess.h" +#include "PalRedhawkCommon.h" +#include "regdisplay.h" +#include "config.h" + +#include + +#if HAVE_UCONTEXT_T +#include +#endif // HAVE_UCONTEXT_T + +#include "UnixContext.h" +#include "UnwindHelpers.h" + +// WebAssembly has a slightly different version of LibUnwind that doesn't define unw_get_save_loc +#if defined(HOST_WASM) +enum unw_save_loc_type_t +{ + UNW_SLT_NONE, /* register is not saved ("not an l-value") */ + UNW_SLT_MEMORY, /* register has been saved in memory */ + UNW_SLT_REG /* register has been saved in (another) register */ +}; +typedef enum unw_save_loc_type_t unw_save_loc_type_t; + +struct unw_save_loc_t +{ + unw_save_loc_type_t type; + union + { + unw_word_t addr; /* valid if type==UNW_SLT_MEMORY */ + unw_regnum_t regnum; /* valid if type==UNW_SLT_REG */ + } + u; +}; +typedef struct unw_save_loc_t unw_save_loc_t; + +int unw_get_save_loc(unw_cursor_t*, int, unw_save_loc_t*) +{ + return -1; +} +#endif // _WASM + +#ifdef __APPLE__ + +#define MCREG_Rip(mc) ((mc)->__ss.__rip) +#define MCREG_Rsp(mc) ((mc)->__ss.__rsp) +#define MCREG_Rax(mc) ((mc)->__ss.__rax) +#define MCREG_Rbx(mc) ((mc)->__ss.__rbx) +#define MCREG_Rcx(mc) ((mc)->__ss.__rcx) +#define MCREG_Rdx(mc) ((mc)->__ss.__rdx) +#define MCREG_Rsi(mc) ((mc)->__ss.__rsi) +#define MCREG_Rdi(mc) ((mc)->__ss.__rdi) +#define MCREG_Rbp(mc) ((mc)->__ss.__rbp) +#define MCREG_R8(mc) ((mc)->__ss.__r8) +#define MCREG_R9(mc) ((mc)->__ss.__r9) +#define MCREG_R10(mc) ((mc)->__ss.__r10) +#define MCREG_R11(mc) ((mc)->__ss.__r11) +#define MCREG_R12(mc) ((mc)->__ss.__r12) +#define MCREG_R13(mc) ((mc)->__ss.__r13) +#define MCREG_R14(mc) ((mc)->__ss.__r14) +#define MCREG_R15(mc) ((mc)->__ss.__r15) + +#else + +#if HAVE___GREGSET_T + +#ifdef HOST_64BIT +#define MCREG_Rip(mc) ((mc).__gregs[_REG_RIP]) +#define MCREG_Rsp(mc) ((mc).__gregs[_REG_RSP]) +#define MCREG_Rax(mc) ((mc).__gregs[_REG_RAX]) +#define MCREG_Rbx(mc) ((mc).__gregs[_REG_RBX]) +#define MCREG_Rcx(mc) ((mc).__gregs[_REG_RCX]) +#define MCREG_Rdx(mc) ((mc).__gregs[_REG_RDX]) +#define MCREG_Rsi(mc) ((mc).__gregs[_REG_RSI]) +#define MCREG_Rdi(mc) ((mc).__gregs[_REG_RDI]) +#define MCREG_Rbp(mc) ((mc).__gregs[_REG_RBP]) +#define MCREG_R8(mc) ((mc).__gregs[_REG_R8]) +#define MCREG_R9(mc) ((mc).__gregs[_REG_R9]) +#define MCREG_R10(mc) ((mc).__gregs[_REG_R10]) +#define MCREG_R11(mc) ((mc).__gregs[_REG_R11]) +#define MCREG_R12(mc) ((mc).__gregs[_REG_R12]) +#define MCREG_R13(mc) ((mc).__gregs[_REG_R13]) +#define MCREG_R14(mc) ((mc).__gregs[_REG_R14]) +#define MCREG_R15(mc) ((mc).__gregs[_REG_R15]) + +#else // HOST_64BIT + +#define MCREG_Eip(mc) ((mc).__gregs[_REG_EIP]) +#define MCREG_Esp(mc) ((mc).__gregs[_REG_ESP]) +#define MCREG_Eax(mc) ((mc).__gregs[_REG_EAX]) +#define MCREG_Ebx(mc) ((mc).__gregs[_REG_EBX]) +#define MCREG_Ecx(mc) ((mc).__gregs[_REG_ECX]) +#define MCREG_Edx(mc) ((mc).__gregs[_REG_EDX]) +#define MCREG_Esi(mc) ((mc).__gregs[_REG_ESI]) +#define MCREG_Edi(mc) ((mc).__gregs[_REG_EDI]) +#define MCREG_Ebp(mc) ((mc).__gregs[_REG_EBP]) + +#endif // HOST_64BIT + +#elif HAVE_GREGSET_T + +#ifdef HOST_64BIT +#define MCREG_Rip(mc) ((mc).gregs[REG_RIP]) +#define MCREG_Rsp(mc) ((mc).gregs[REG_RSP]) +#define MCREG_Rax(mc) ((mc).gregs[REG_RAX]) +#define MCREG_Rbx(mc) ((mc).gregs[REG_RBX]) +#define MCREG_Rcx(mc) ((mc).gregs[REG_RCX]) +#define MCREG_Rdx(mc) ((mc).gregs[REG_RDX]) +#define MCREG_Rsi(mc) ((mc).gregs[REG_RSI]) +#define MCREG_Rdi(mc) ((mc).gregs[REG_RDI]) +#define MCREG_Rbp(mc) ((mc).gregs[REG_RBP]) +#define MCREG_R8(mc) ((mc).gregs[REG_R8]) +#define MCREG_R9(mc) ((mc).gregs[REG_R9]) +#define MCREG_R10(mc) ((mc).gregs[REG_R10]) +#define MCREG_R11(mc) ((mc).gregs[REG_R11]) +#define MCREG_R12(mc) ((mc).gregs[REG_R12]) +#define MCREG_R13(mc) ((mc).gregs[REG_R13]) +#define MCREG_R14(mc) ((mc).gregs[REG_R14]) +#define MCREG_R15(mc) ((mc).gregs[REG_R15]) + +#else // HOST_64BIT + +#define MCREG_Eip(mc) ((mc).gregs[REG_EIP]) +#define MCREG_Esp(mc) ((mc).gregs[REG_ESP]) +#define MCREG_Eax(mc) ((mc).gregs[REG_EAX]) +#define MCREG_Ebx(mc) ((mc).gregs[REG_EBX]) +#define MCREG_Ecx(mc) ((mc).gregs[REG_ECX]) +#define MCREG_Edx(mc) ((mc).gregs[REG_EDX]) +#define MCREG_Esi(mc) ((mc).gregs[REG_ESI]) +#define MCREG_Edi(mc) ((mc).gregs[REG_EDI]) +#define MCREG_Ebp(mc) ((mc).gregs[REG_EBP]) + +#endif // HOST_64BIT + +#else // HAVE_GREGSET_T + +#ifdef HOST_64BIT + +#if defined(HOST_ARM64) + +#define MCREG_Pc(mc) ((mc).pc) +#define MCREG_Sp(mc) ((mc).sp) +#define MCREG_Lr(mc) ((mc).regs[30]) +#define MCREG_X0(mc) ((mc).regs[0]) +#define MCREG_X1(mc) ((mc).regs[1]) +#define MCREG_X19(mc) ((mc).regs[19]) +#define MCREG_X20(mc) ((mc).regs[20]) +#define MCREG_X21(mc) ((mc).regs[21]) +#define MCREG_X22(mc) ((mc).regs[22]) +#define MCREG_X23(mc) ((mc).regs[23]) +#define MCREG_X24(mc) ((mc).regs[24]) +#define MCREG_X25(mc) ((mc).regs[25]) +#define MCREG_X26(mc) ((mc).regs[26]) +#define MCREG_X27(mc) ((mc).regs[27]) +#define MCREG_X28(mc) ((mc).regs[28]) +#define MCREG_Fp(mc) ((mc).regs[29]) + +#else + +// For FreeBSD, as found in x86/ucontext.h +#define MCREG_Rip(mc) ((mc).mc_rip) +#define MCREG_Rsp(mc) ((mc).mc_rsp) +#define MCREG_Rax(mc) ((mc).mc_rax) +#define MCREG_Rbx(mc) ((mc).mc_rbx) +#define MCREG_Rcx(mc) ((mc).mc_rcx) +#define MCREG_Rdx(mc) ((mc).mc_rdx) +#define MCREG_Rsi(mc) ((mc).mc_rsi) +#define MCREG_Rdi(mc) ((mc).mc_rdi) +#define MCREG_Rbp(mc) ((mc).mc_rbp) +#define MCREG_R8(mc) ((mc).mc_r8) +#define MCREG_R9(mc) ((mc).mc_r9) +#define MCREG_R10(mc) ((mc).mc_r10) +#define MCREG_R11(mc) ((mc).mc_r11) +#define MCREG_R12(mc) ((mc).mc_r12) +#define MCREG_R13(mc) ((mc).mc_r13) +#define MCREG_R14(mc) ((mc).mc_r14) +#define MCREG_R15(mc) ((mc).mc_r15) + +#endif + +#else // HOST_64BIT + +#if defined(HOST_ARM) + +#define MCREG_Pc(mc) ((mc).arm_pc) +#define MCREG_Sp(mc) ((mc).arm_sp) +#define MCREG_Lr(mc) ((mc).arm_lr) +#define MCREG_R0(mc) ((mc).arm_r0) +#define MCREG_R1(mc) ((mc).arm_r1) +#define MCREG_R4(mc) ((mc).arm_r4) +#define MCREG_R5(mc) ((mc).arm_r5) +#define MCREG_R6(mc) ((mc).arm_r6) +#define MCREG_R7(mc) ((mc).arm_r7) +#define MCREG_R8(mc) ((mc).arm_r8) +#define MCREG_R9(mc) ((mc).arm_r9) +#define MCREG_R10(mc) ((mc).arm_r10) +#define MCREG_R11(mc) ((mc).arm_fp) + +#elif defined(HOST_X86) + +#define MCREG_Eip(mc) ((mc).mc_eip) +#define MCREG_Esp(mc) ((mc).mc_esp) +#define MCREG_Eax(mc) ((mc).mc_eax) +#define MCREG_Ebx(mc) ((mc).mc_ebx) +#define MCREG_Ecx(mc) ((mc).mc_ecx) +#define MCREG_Edx(mc) ((mc).mc_edx) +#define MCREG_Esi(mc) ((mc).mc_esi) +#define MCREG_Edi(mc) ((mc).mc_edi) +#define MCREG_Ebp(mc) ((mc).mc_ebp) + +#else +#error "Unsupported arch" +#endif + +#endif // HOST_64BIT + +#endif // HAVE_GREGSET_T + +#endif // __APPLE__ + +// Update unw_cursor_t from REGDISPLAY. +// NOTE: We don't set the IP here since the current use cases for this function +// don't require it. +static void RegDisplayToUnwindCursor(REGDISPLAY* regDisplay, unw_cursor_t *cursor) +{ +#define ASSIGN_REG(regName1, regName2) \ + unw_set_reg(cursor, regName1, regDisplay->regName2, 0); + +#define ASSIGN_REG_PTR(regName1, regName2) \ + if (regDisplay->p##regName2 != NULL) \ + unw_set_reg(cursor, regName1, *(regDisplay->p##regName2), 0); + +#if defined(HOST_AMD64) + ASSIGN_REG(UNW_REG_SP, SP) + ASSIGN_REG_PTR(UNW_X86_64_RBP, Rbp) + ASSIGN_REG_PTR(UNW_X86_64_RBX, Rbx) + ASSIGN_REG_PTR(UNW_X86_64_R12, R12) + ASSIGN_REG_PTR(UNW_X86_64_R13, R13) + ASSIGN_REG_PTR(UNW_X86_64_R14, R14) + ASSIGN_REG_PTR(UNW_X86_64_R15, R15) +#elif HOST_ARM + ASSIGN_REG(UNW_ARM_SP, SP) + ASSIGN_REG_PTR(UNW_ARM_R4, R4) + ASSIGN_REG_PTR(UNW_ARM_R5, R5) + ASSIGN_REG_PTR(UNW_ARM_R6, R6) + ASSIGN_REG_PTR(UNW_ARM_R7, R7) + ASSIGN_REG_PTR(UNW_ARM_R8, R8) + ASSIGN_REG_PTR(UNW_ARM_R9, R9) + ASSIGN_REG_PTR(UNW_ARM_R10, R10) + ASSIGN_REG_PTR(UNW_ARM_R11, R11) + ASSIGN_REG_PTR(UNW_ARM_R14, LR) +#elif HOST_ARM64 + ASSIGN_REG(UNW_ARM64_SP, SP) + ASSIGN_REG_PTR(UNW_ARM64_FP, FP) + ASSIGN_REG_PTR(UNW_ARM64_X19, X19) + ASSIGN_REG_PTR(UNW_ARM64_X20, X20) + ASSIGN_REG_PTR(UNW_ARM64_X21, X21) + ASSIGN_REG_PTR(UNW_ARM64_X22, X22) + ASSIGN_REG_PTR(UNW_ARM64_X23, X23) + ASSIGN_REG_PTR(UNW_ARM64_X24, X24) + ASSIGN_REG_PTR(UNW_ARM64_X25, X25) + ASSIGN_REG_PTR(UNW_ARM64_X26, X26) + ASSIGN_REG_PTR(UNW_ARM64_X27, X27) + ASSIGN_REG_PTR(UNW_ARM64_X28, X28) +#elif defined(HOST_X86) + ASSIGN_REG(UNW_REG_SP, SP) + ASSIGN_REG_PTR(UNW_X86_EBP, Rbp) + ASSIGN_REG_PTR(UNW_X86_EBX, Rbx) +#endif + +#undef ASSIGN_REG +#undef ASSIGN_REG_PTR +} + +// Returns the unw_proc_info_t for a given IP. +bool GetUnwindProcInfo(PCODE ip, unw_proc_info_t *procInfo) +{ + int st; + + unw_context_t unwContext; + unw_cursor_t cursor; + + st = unw_getcontext(&unwContext); + if (st < 0) + { + return false; + } + +#ifdef HOST_AMD64 + // We manually index into the unw_context_t's internals for now because there's + // no better way to modify it. This will go away in the future when we locate the + // LSDA and other information without initializing an unwind cursor. + unwContext.data[16] = ip; +#elif HOST_ARM + ((uint32_t*)(unwContext.data))[15] = ip; +#elif HOST_ARM64 + ((uint32_t*)(unwContext.data))[32] = ip; +#elif HOST_WASM + ASSERT(false); +#elif HOST_X86 + ASSERT(false); +#else + #error "GetUnwindProcInfo is not supported on this arch yet." +#endif + + st = unw_init_local(&cursor, &unwContext); + if (st < 0) + { + return false; + } + + st = unw_get_proc_info(&cursor, procInfo); + if (st < 0) + { + return false; + } + + return true; +} + +// Initialize unw_cursor_t and unw_context_t from REGDISPLAY +bool InitializeUnwindContextAndCursor(REGDISPLAY* regDisplay, unw_cursor_t* cursor, unw_context_t* unwContext) +{ + int st; + + st = unw_getcontext(unwContext); + if (st < 0) + { + return false; + } + + // Set the IP here instead of after unwinder initialization. unw_init_local + // will do some initialization of internal structures based on the IP value. + // We manually index into the unw_context_t's internals for now because there's + // no better way to modify it. This whole function will go away in the future + // when we are able to read unwind info without initializing an unwind cursor. +#ifdef HOST_AMD64 + unwContext->data[16] = regDisplay->IP; +#elif HOST_ARM + ((uint32_t*)(unwContext->data))[15] = regDisplay->IP; +#elif HOST_ARM64 + ((uint32_t*)(unwContext->data))[32] = regDisplay->IP; +#elif HOST_X86 + ASSERT(false); +#else + #error "InitializeUnwindContextAndCursor is not supported on this arch yet." +#endif + + st = unw_init_local(cursor, unwContext); + if (st < 0) + { + return false; + } + + // Set the unwind context to the specified Windows context. + RegDisplayToUnwindCursor(regDisplay, cursor); + + return true; +} + +// Update context pointer for a register from the unw_cursor_t. +static void GetContextPointer(unw_cursor_t *cursor, unw_context_t *unwContext, int reg, PTR_UIntNative *contextPointer) +{ + unw_save_loc_t saveLoc; + unw_get_save_loc(cursor, reg, &saveLoc); + if (saveLoc.type == UNW_SLT_MEMORY) + { + PTR_UIntNative pLoc = (PTR_UIntNative)saveLoc.u.addr; + // Filter out fake save locations that point to unwContext + if (unwContext == NULL || (pLoc < (PTR_UIntNative)unwContext) || ((PTR_UIntNative)(unwContext + 1) <= pLoc)) + *contextPointer = (PTR_UIntNative)saveLoc.u.addr; + } +} + +#if defined(HOST_AMD64) +#define GET_CONTEXT_POINTERS \ + GET_CONTEXT_POINTER(UNW_X86_64_RBP, Rbp) \ + GET_CONTEXT_POINTER(UNW_X86_64_RBX, Rbx) \ + GET_CONTEXT_POINTER(UNW_X86_64_R12, R12) \ + GET_CONTEXT_POINTER(UNW_X86_64_R13, R13) \ + GET_CONTEXT_POINTER(UNW_X86_64_R14, R14) \ + GET_CONTEXT_POINTER(UNW_X86_64_R15, R15) +#elif defined(HOST_ARM) +#define GET_CONTEXT_POINTERS \ + GET_CONTEXT_POINTER(UNW_ARM_R4, R4) \ + GET_CONTEXT_POINTER(UNW_ARM_R5, R5) \ + GET_CONTEXT_POINTER(UNW_ARM_R6, R6) \ + GET_CONTEXT_POINTER(UNW_ARM_R7, R7) \ + GET_CONTEXT_POINTER(UNW_ARM_R8, R8) \ + GET_CONTEXT_POINTER(UNW_ARM_R9, R9) \ + GET_CONTEXT_POINTER(UNW_ARM_R10, R10) \ + GET_CONTEXT_POINTER(UNW_ARM_R11, R11) +#elif defined(HOST_ARM64) +#define GET_CONTEXT_POINTERS \ + GET_CONTEXT_POINTER(UNW_ARM64_X19, X19) \ + GET_CONTEXT_POINTER(UNW_ARM64_X20, X20) \ + GET_CONTEXT_POINTER(UNW_ARM64_X21, X21) \ + GET_CONTEXT_POINTER(UNW_ARM64_X22, X22) \ + GET_CONTEXT_POINTER(UNW_ARM64_X23, X23) \ + GET_CONTEXT_POINTER(UNW_ARM64_X24, X24) \ + GET_CONTEXT_POINTER(UNW_ARM64_X25, X25) \ + GET_CONTEXT_POINTER(UNW_ARM64_X26, X26) \ + GET_CONTEXT_POINTER(UNW_ARM64_X27, X27) \ + GET_CONTEXT_POINTER(UNW_ARM64_X28, X28) \ + GET_CONTEXT_POINTER(UNW_ARM64_FP, FP) +#elif defined(HOST_X86) +#define GET_CONTEXT_POINTERS \ + GET_CONTEXT_POINTER(UNW_X86_EBP, Rbp) \ + GET_CONTEXT_POINTER(UNW_X86_EBX, Rbx) +#elif defined (HOST_WASM) +// No registers +#define GET_CONTEXT_POINTERS +#else +#error unsupported architecture +#endif + +// Update REGDISPLAY from the unw_cursor_t and unw_context_t +void UnwindCursorToRegDisplay(unw_cursor_t *cursor, unw_context_t *unwContext, REGDISPLAY *regDisplay) +{ +#define GET_CONTEXT_POINTER(unwReg, rdReg) GetContextPointer(cursor, unwContext, unwReg, ®Display->p##rdReg); + GET_CONTEXT_POINTERS +#undef GET_CONTEXT_POINTER + + unw_get_reg(cursor, UNW_REG_IP, (unw_word_t *) ®Display->IP); + unw_get_reg(cursor, UNW_REG_SP, (unw_word_t *) ®Display->SP); + +#if defined(HOST_AMD64) + regDisplay->pIP = PTR_PCODE(regDisplay->SP - sizeof(TADDR)); +#endif + +#if defined(HOST_ARM) || defined(HOST_ARM64) + regDisplay->IP |= 1; +#endif +} + +#if defined(HOST_AMD64) +#define ASSIGN_CONTROL_REGS \ + ASSIGN_REG(Rip, IP) \ + ASSIGN_REG(Rsp, Rsp) + +#define ASSIGN_INTEGER_REGS \ + ASSIGN_REG(Rbx, Rbx) \ + ASSIGN_REG(Rbp, Rbp) \ + ASSIGN_REG(R12, R12) \ + ASSIGN_REG(R13, R13) \ + ASSIGN_REG(R14, R14) \ + ASSIGN_REG(R15, R15) + +#define ASSIGN_TWO_ARGUMENT_REGS(arg0Reg, arg1Reg) \ + MCREG_Rdi(nativeContext->uc_mcontext) = arg0Reg; \ + MCREG_Rsi(nativeContext->uc_mcontext) = arg1Reg; + +#elif defined(HOST_X86) +#define ASSIGN_CONTROL_REGS \ + ASSIGN_REG(Eip, IP) \ + ASSIGN_REG(Esp, Rsp) + +#define ASSIGN_INTEGER_REGS \ + ASSIGN_REG(Ebx, Rbx) \ + ASSIGN_REG(Ebp, Rbp) + +#define ASSIGN_TWO_ARGUMENT_REGS(arg0Reg, arg1Reg) \ + MCREG_Ecx(nativeContext->uc_mcontext) = arg0Reg; \ + MCREG_Edx(nativeContext->uc_mcontext) = arg1Reg; + +#elif defined(HOST_ARM) + +#define ASSIGN_CONTROL_REGS \ + ASSIGN_REG(Pc, IP) \ + ASSIGN_REG(Sp, SP) \ + ASSIGN_REG(Lr, LR) + +#define ASSIGN_INTEGER_REGS \ + ASSIGN_REG(R4, R4) \ + ASSIGN_REG(R5, R5) \ + ASSIGN_REG(R6, R6) \ + ASSIGN_REG(R7, R7) \ + ASSIGN_REG(R8, R8) \ + ASSIGN_REG(R9, R9) \ + ASSIGN_REG(R10, R10) \ + ASSIGN_REG(R11, R11) + +#define ASSIGN_TWO_ARGUMENT_REGS(arg0Reg, arg1Reg) \ + MCREG_R0(nativeContext->uc_mcontext) = arg0Reg; \ + MCREG_R1(nativeContext->uc_mcontext) = arg1Reg; + +#elif defined(HOST_ARM64) + +#define ASSIGN_CONTROL_REGS \ + ASSIGN_REG(Pc, IP) \ + ASSIGN_REG(Sp, SP) \ + ASSIGN_REG(Fp, FP) \ + ASSIGN_REG(Lr, LR) + +#define ASSIGN_INTEGER_REGS \ + ASSIGN_REG(X19, X19) \ + ASSIGN_REG(X20, X20) \ + ASSIGN_REG(X21, X21) \ + ASSIGN_REG(X22, X22) \ + ASSIGN_REG(X23, X23) \ + ASSIGN_REG(X24, X24) \ + ASSIGN_REG(X25, X25) \ + ASSIGN_REG(X26, X26) \ + ASSIGN_REG(X27, X27) \ + ASSIGN_REG(X28, X28) + +#define ASSIGN_TWO_ARGUMENT_REGS \ + MCREG_X0(nativeContext->uc_mcontext) = arg0Reg; \ + MCREG_X1(nativeContext->uc_mcontext) = arg1Reg; + +#elif defined(HOST_WASM) + // TODO: determine how unwinding will work on WebAssembly +#define ASSIGN_CONTROL_REGS +#define ASSIGN_INTEGER_REGS +#define ASSIGN_TWO_ARGUMENT_REGS +#else +#error unsupported architecture +#endif + +// Convert Unix native context to PAL_LIMITED_CONTEXT +void NativeContextToPalContext(const void* context, PAL_LIMITED_CONTEXT* palContext) +{ + ucontext_t *nativeContext = (ucontext_t*)context; +#define ASSIGN_REG(regNative, regPal) palContext->regPal = MCREG_##regNative(nativeContext->uc_mcontext); + ASSIGN_CONTROL_REGS + ASSIGN_INTEGER_REGS +#undef ASSIGN_REG +} + +// Redirect Unix native context to the PAL_LIMITED_CONTEXT and also set the first two argument registers +void RedirectNativeContext(void* context, const PAL_LIMITED_CONTEXT* palContext, UIntNative arg0Reg, UIntNative arg1Reg) +{ + ucontext_t *nativeContext = (ucontext_t*)context; + +#define ASSIGN_REG(regNative, regPal) MCREG_##regNative(nativeContext->uc_mcontext) = palContext->regPal; + ASSIGN_CONTROL_REGS +#undef ASSIGN_REG + ASSIGN_TWO_ARGUMENT_REGS(arg0Reg, arg1Reg); +} + +#ifdef HOST_AMD64 +// Get value of a register from the native context +// Parameters: +// void* context - context containing the registers +// uint32_t index - index of the register +// Rax = 0, Rcx = 1, Rdx = 2, Rbx = 3 +// Rsp = 4, Rbp = 5, Rsi = 6, Rdi = 7 +// R8 = 8, R9 = 9, R10 = 10, R11 = 11 +// R12 = 12, R13 = 13, R14 = 14, R15 = 15 +uint64_t GetRegisterValueByIndex(void* context, uint32_t index) +{ + ucontext_t *nativeContext = (ucontext_t*)context; + switch (index) + { + case 0: + return MCREG_Rax(nativeContext->uc_mcontext); + case 1: + return MCREG_Rcx(nativeContext->uc_mcontext); + case 2: + return MCREG_Rdx(nativeContext->uc_mcontext); + case 3: + return MCREG_Rbx(nativeContext->uc_mcontext); + case 4: + return MCREG_Rsp(nativeContext->uc_mcontext); + case 5: + return MCREG_Rbp(nativeContext->uc_mcontext); + case 6: + return MCREG_Rsi(nativeContext->uc_mcontext); + case 7: + return MCREG_Rdi(nativeContext->uc_mcontext); + case 8: + return MCREG_R8(nativeContext->uc_mcontext); + case 9: + return MCREG_R9(nativeContext->uc_mcontext); + case 10: + return MCREG_R10(nativeContext->uc_mcontext); + case 11: + return MCREG_R11(nativeContext->uc_mcontext); + case 12: + return MCREG_R12(nativeContext->uc_mcontext); + case 13: + return MCREG_R13(nativeContext->uc_mcontext); + case 14: + return MCREG_R14(nativeContext->uc_mcontext); + case 15: + return MCREG_R15(nativeContext->uc_mcontext); + } + + ASSERT(false); + return 0; +} + +// Get value of the program counter from the native context +uint64_t GetPC(void* context) +{ + ucontext_t *nativeContext = (ucontext_t*)context; + return MCREG_Rip(nativeContext->uc_mcontext); +} + +#endif // HOST_AMD64 + +// Find LSDA and start address for a function at address controlPC +bool FindProcInfo(UIntNative controlPC, UIntNative* startAddress, UIntNative* lsda) +{ + unw_proc_info_t procInfo; + + if (!GetUnwindProcInfo((PCODE)controlPC, &procInfo)) + { + return false; + } + + assert((procInfo.start_ip <= controlPC) && (controlPC < procInfo.end_ip)); + +#if defined(HOST_ARM) || defined(HOST_ARM64) + // libunwind fills by reference not by value for ARM + *lsda = *((UIntNative *)procInfo.lsda); +#else + *lsda = procInfo.lsda; +#endif + *startAddress = procInfo.start_ip; + + return true; +} + +// Virtually unwind stack to the caller of the context specified by the REGDISPLAY +bool VirtualUnwind(REGDISPLAY* pRegisterSet) +{ + return UnwindHelpers::StepFrame(pRegisterSet); +} diff --git a/src/coreclr/src/nativeaot/Runtime/unix/UnixContext.h b/src/coreclr/src/nativeaot/Runtime/unix/UnixContext.h new file mode 100644 index 0000000000000..3218c49928d16 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/unix/UnixContext.h @@ -0,0 +1,25 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#ifndef __UNIX_CONTEXT_H__ +#define __UNIX_CONTEXT_H__ + +// Convert Unix native context to PAL_LIMITED_CONTEXT +void NativeContextToPalContext(const void* context, PAL_LIMITED_CONTEXT* palContext); +// Redirect Unix native context to the PAL_LIMITED_CONTEXT and also set the first two argument registers +void RedirectNativeContext(void* context, const PAL_LIMITED_CONTEXT* palContext, UIntNative arg0Reg, UIntNative arg1Reg); + +// Find LSDA and start address for a function at address controlPC +bool FindProcInfo(UIntNative controlPC, UIntNative* startAddress, UIntNative* lsda); +// Virtually unwind stack to the caller of the context specified by the REGDISPLAY +bool VirtualUnwind(REGDISPLAY* pRegisterSet); + +#ifdef HOST_AMD64 +// Get value of a register from the native context. The index is the processor specific +// register index stored in machine instructions. +uint64_t GetRegisterValueByIndex(void* context, uint32_t index); +// Get value of the program counter from the native context +uint64_t GetPC(void* context); +#endif // HOST_AMD64 + +#endif // __UNIX_CONTEXT_H__ diff --git a/src/coreclr/src/nativeaot/Runtime/unix/UnixHandle.h b/src/coreclr/src/nativeaot/Runtime/unix/UnixHandle.h new file mode 100644 index 0000000000000..ac9712a145633 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/unix/UnixHandle.h @@ -0,0 +1,59 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#ifndef __UNIX_HANDLE_H__ +#define __UNIX_HANDLE_H__ + +enum class UnixHandleType +{ + Thread, + Event +}; + +// TODO: add validity check for usage / closing? +class UnixHandleBase +{ + UnixHandleType m_type; +protected: + UnixHandleBase(UnixHandleType type) + : m_type(type) + { + } + +public: + + virtual ~UnixHandleBase() + { + } + + virtual bool Destroy() + { + return true; + } + + UnixHandleType GetType() + { + return m_type; + } +}; + +template +class UnixHandle : UnixHandleBase +{ +protected: + T m_object; +public: + + UnixHandle(T object) + : UnixHandleBase(HT), + m_object(object) + { + } + + T* GetObject() + { + return &m_object; + } +}; + +#endif // __UNIX_HANDLE_H__ diff --git a/src/coreclr/src/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp b/src/coreclr/src/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp new file mode 100644 index 0000000000000..e03a85e320588 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp @@ -0,0 +1,476 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +#include "common.h" + +#include "CommonTypes.h" +#include "CommonMacros.h" +#include "daccess.h" +#include "PalRedhawkCommon.h" +#include "regdisplay.h" +#include "ICodeManager.h" +#include "UnixNativeCodeManager.h" +#include "varint.h" +#include "holder.h" + +#include "CommonMacros.inl" + +#define GCINFODECODER_NO_EE +#include "coreclr/gcinfodecoder.cpp" + +#include "UnixContext.h" + +#define UBF_FUNC_KIND_MASK 0x03 +#define UBF_FUNC_KIND_ROOT 0x00 +#define UBF_FUNC_KIND_HANDLER 0x01 +#define UBF_FUNC_KIND_FILTER 0x02 + +#define UBF_FUNC_HAS_EHINFO 0x04 +#define UBF_FUNC_REVERSE_PINVOKE 0x08 +#define UBF_FUNC_HAS_ASSOCIATED_DATA 0x10 + +struct UnixNativeMethodInfo +{ + PTR_VOID pMethodStartAddress; + PTR_UInt8 pMainLSDA; + PTR_UInt8 pLSDA; + bool executionAborted; +}; + +// Ensure that UnixNativeMethodInfo fits into the space reserved by MethodInfo +static_assert(sizeof(UnixNativeMethodInfo) <= sizeof(MethodInfo), "UnixNativeMethodInfo too big"); + +UnixNativeCodeManager::UnixNativeCodeManager(TADDR moduleBase, + PTR_VOID pvManagedCodeStartRange, UInt32 cbManagedCodeRange, + PTR_PTR_VOID pClasslibFunctions, UInt32 nClasslibFunctions) + : m_moduleBase(moduleBase), + m_pvManagedCodeStartRange(pvManagedCodeStartRange), m_cbManagedCodeRange(cbManagedCodeRange), + m_pClasslibFunctions(pClasslibFunctions), m_nClasslibFunctions(nClasslibFunctions) +{ +} + +UnixNativeCodeManager::~UnixNativeCodeManager() +{ +} + +bool UnixNativeCodeManager::FindMethodInfo(PTR_VOID ControlPC, + MethodInfo * pMethodInfoOut) +{ + // Stackwalker may call this with ControlPC that does not belong to this code manager + if (dac_cast(ControlPC) < dac_cast(m_pvManagedCodeStartRange) || + dac_cast(m_pvManagedCodeStartRange) + m_cbManagedCodeRange <= dac_cast(ControlPC)) + { + return false; + } + + UnixNativeMethodInfo * pMethodInfo = (UnixNativeMethodInfo *)pMethodInfoOut; + UIntNative startAddress; + UIntNative lsda; + + if (!FindProcInfo((UIntNative)ControlPC, &startAddress, &lsda)) + { + return false; + } + + PTR_UInt8 p = dac_cast(lsda); + + pMethodInfo->pLSDA = p; + + uint8_t unwindBlockFlags = *p++; + + if ((unwindBlockFlags & UBF_FUNC_KIND_MASK) != UBF_FUNC_KIND_ROOT) + { + // Funclets just refer to the main function's blob + pMethodInfo->pMainLSDA = p + *dac_cast(p); + p += sizeof(int32_t); + + pMethodInfo->pMethodStartAddress = dac_cast(startAddress - *dac_cast(p)); + } + else + { + pMethodInfo->pMainLSDA = dac_cast(lsda); + pMethodInfo->pMethodStartAddress = dac_cast(startAddress); + } + + pMethodInfo->executionAborted = false; + + return true; +} + +bool UnixNativeCodeManager::IsFunclet(MethodInfo * pMethodInfo) +{ + UnixNativeMethodInfo * pNativeMethodInfo = (UnixNativeMethodInfo *)pMethodInfo; + + uint8_t unwindBlockFlags = *(pNativeMethodInfo->pLSDA); + return (unwindBlockFlags & UBF_FUNC_KIND_MASK) != UBF_FUNC_KIND_ROOT; +} + +bool UnixNativeCodeManager::IsFilter(MethodInfo * pMethodInfo) +{ + UnixNativeMethodInfo * pNativeMethodInfo = (UnixNativeMethodInfo *)pMethodInfo; + + uint8_t unwindBlockFlags = *(pNativeMethodInfo->pLSDA); + return (unwindBlockFlags & UBF_FUNC_KIND_MASK) == UBF_FUNC_KIND_FILTER; +} + +PTR_VOID UnixNativeCodeManager::GetFramePointer(MethodInfo * pMethodInfo, + REGDISPLAY * pRegisterSet) +{ + UnixNativeMethodInfo * pNativeMethodInfo = (UnixNativeMethodInfo *)pMethodInfo; + + // Return frame pointer for methods with EH and funclets + uint8_t unwindBlockFlags = *(pNativeMethodInfo->pLSDA); + if ((unwindBlockFlags & UBF_FUNC_HAS_EHINFO) != 0 || (unwindBlockFlags & UBF_FUNC_KIND_MASK) != UBF_FUNC_KIND_ROOT) + { + return (PTR_VOID)pRegisterSet->GetFP(); + } + + return NULL; +} + +void UnixNativeCodeManager::EnumGcRefs(MethodInfo * pMethodInfo, + PTR_VOID safePointAddress, + REGDISPLAY * pRegisterSet, + GCEnumContext * hCallback) +{ + UnixNativeMethodInfo * pNativeMethodInfo = (UnixNativeMethodInfo *)pMethodInfo; + + PTR_UInt8 p = pNativeMethodInfo->pMainLSDA; + + uint8_t unwindBlockFlags = *p++; + + if ((unwindBlockFlags & UBF_FUNC_HAS_ASSOCIATED_DATA) != 0) + p += sizeof(int32_t); + + if ((unwindBlockFlags & UBF_FUNC_HAS_EHINFO) != 0) + p += sizeof(int32_t); + + UInt32 codeOffset = (UInt32)(PINSTRToPCODE(dac_cast(safePointAddress)) - PINSTRToPCODE(dac_cast(pNativeMethodInfo->pMethodStartAddress))); + + GcInfoDecoder decoder( + GCInfoToken(p), + GcInfoDecoderFlags(DECODE_GC_LIFETIMES | DECODE_SECURITY_OBJECT | DECODE_VARARG), + codeOffset - 1 // TODO: Is this adjustment correct? + ); + + ICodeManagerFlags flags = (ICodeManagerFlags)0; + if (pNativeMethodInfo->executionAborted) + flags = ICodeManagerFlags::ExecutionAborted; + if (IsFilter(pMethodInfo)) + flags = (ICodeManagerFlags)(flags | ICodeManagerFlags::NoReportUntracked); + + if (!decoder.EnumerateLiveSlots( + pRegisterSet, + false /* reportScratchSlots */, + flags, + hCallback->pCallback, + hCallback + )) + { + assert(false); + } +} + +UIntNative UnixNativeCodeManager::GetConservativeUpperBoundForOutgoingArgs(MethodInfo * pMethodInfo, REGDISPLAY * pRegisterSet) +{ + // Return value + UIntNative upperBound; + + UnixNativeMethodInfo * pNativeMethodInfo = (UnixNativeMethodInfo *)pMethodInfo; + + PTR_UInt8 p = pNativeMethodInfo->pMainLSDA; + + uint8_t unwindBlockFlags = *p++; + + if ((unwindBlockFlags & UBF_FUNC_HAS_ASSOCIATED_DATA) != 0) + p += sizeof(int32_t); + + if ((unwindBlockFlags & UBF_FUNC_REVERSE_PINVOKE) != 0) + { + // Reverse PInvoke transition should be on the main function body only + assert(pNativeMethodInfo->pMainLSDA == pNativeMethodInfo->pLSDA); + + if ((unwindBlockFlags & UBF_FUNC_HAS_EHINFO) != 0) + p += sizeof(int32_t); + + GcInfoDecoder decoder(GCInfoToken(p), DECODE_REVERSE_PINVOKE_VAR); + INT32 slot = decoder.GetReversePInvokeFrameStackSlot(); + assert(slot != NO_REVERSE_PINVOKE_FRAME); + + TADDR basePointer = NULL; + UINT32 stackBasedRegister = decoder.GetStackBaseRegister(); + if (stackBasedRegister == NO_STACK_BASE_REGISTER) + { + basePointer = dac_cast(pRegisterSet->GetSP()); + } + else + { + basePointer = dac_cast(pRegisterSet->GetFP()); + } + + // Reverse PInvoke case. The embedded reverse PInvoke frame is guaranteed to reside above + // all outgoing arguments. + upperBound = (UIntNative)dac_cast(basePointer + slot); + } + else + { + // The passed in pRegisterSet should be left intact + REGDISPLAY localRegisterSet = *pRegisterSet; + + bool result = VirtualUnwind(&localRegisterSet); + assert(result); + + // All common ABIs have outgoing arguments under caller SP (minus slot reserved for return address). + // There are ABI-specific optimizations that could applied here, but they are not worth the complexity + // given that this path is used rarely. + upperBound = dac_cast(localRegisterSet.GetSP() - sizeof(TADDR)); + } + + return upperBound; +} + +bool UnixNativeCodeManager::UnwindStackFrame(MethodInfo * pMethodInfo, + REGDISPLAY * pRegisterSet, // in/out + PTR_VOID * ppPreviousTransitionFrame) // out +{ + UnixNativeMethodInfo * pNativeMethodInfo = (UnixNativeMethodInfo *)pMethodInfo; + + PTR_UInt8 p = pNativeMethodInfo->pMainLSDA; + + uint8_t unwindBlockFlags = *p++; + + if ((unwindBlockFlags & UBF_FUNC_HAS_ASSOCIATED_DATA) != 0) + p += sizeof(int32_t); + + if ((unwindBlockFlags & UBF_FUNC_REVERSE_PINVOKE) != 0) + { + // Reverse PInvoke transition should be on the main function body only + assert(pNativeMethodInfo->pMainLSDA == pNativeMethodInfo->pLSDA); + + if ((unwindBlockFlags & UBF_FUNC_HAS_EHINFO) != 0) + p += sizeof(int32_t); + + GcInfoDecoder decoder(GCInfoToken(p), DECODE_REVERSE_PINVOKE_VAR); + INT32 slot = decoder.GetReversePInvokeFrameStackSlot(); + assert(slot != NO_REVERSE_PINVOKE_FRAME); + + TADDR basePointer = NULL; + UINT32 stackBasedRegister = decoder.GetStackBaseRegister(); + if (stackBasedRegister == NO_STACK_BASE_REGISTER) + { + basePointer = dac_cast(pRegisterSet->GetSP()); + } + else + { + basePointer = dac_cast(pRegisterSet->GetFP()); + } + *ppPreviousTransitionFrame = *(void**)(basePointer + slot); + return true; + } + + *ppPreviousTransitionFrame = NULL; + + if (!VirtualUnwind(pRegisterSet)) + { + return false; + } + + return true; +} + +bool UnixNativeCodeManager::GetReturnAddressHijackInfo(MethodInfo * pMethodInfo, + REGDISPLAY * pRegisterSet, // in + PTR_PTR_VOID * ppvRetAddrLocation, // out + GCRefKind * pRetValueKind) // out +{ + // @TODO: CORERT: GetReturnAddressHijackInfo + return false; +} + +void UnixNativeCodeManager::UnsynchronizedHijackMethodLoops(MethodInfo * pMethodInfo) +{ + // @TODO: CORERT: UnsynchronizedHijackMethodLoops +} + +PTR_VOID UnixNativeCodeManager::RemapHardwareFaultToGCSafePoint(MethodInfo * pMethodInfo, PTR_VOID controlPC) +{ + // GCInfo decoder needs to know whether execution of the method is aborted + // while querying for gc-info. But ICodeManager::EnumGCRef() doesn't receive any + // flags from mrt. Call to this method is used as a cue to mark the method info + // as execution aborted. Note - if pMethodInfo was cached, this scheme would not work. + // + // If the method has EH, then JIT will make sure the method is fully interruptible + // and we will have GC-info available at the faulting address as well. + + UnixNativeMethodInfo * pNativeMethodInfo = (UnixNativeMethodInfo *)pMethodInfo; + pNativeMethodInfo->executionAborted = true; + + return controlPC; +} + +struct UnixEHEnumState +{ + PTR_UInt8 pMethodStartAddress; + PTR_UInt8 pEHInfo; + UInt32 uClause; + UInt32 nClauses; +}; + +// Ensure that UnixEHEnumState fits into the space reserved by EHEnumState +static_assert(sizeof(UnixEHEnumState) <= sizeof(EHEnumState), "UnixEHEnumState too big"); + +bool UnixNativeCodeManager::EHEnumInit(MethodInfo * pMethodInfo, PTR_VOID * pMethodStartAddress, EHEnumState * pEHEnumStateOut) +{ + assert(pMethodInfo != NULL); + assert(pMethodStartAddress != NULL); + assert(pEHEnumStateOut != NULL); + + UnixNativeMethodInfo * pNativeMethodInfo = (UnixNativeMethodInfo *)pMethodInfo; + + PTR_UInt8 p = pNativeMethodInfo->pMainLSDA; + + uint8_t unwindBlockFlags = *p++; + + if ((unwindBlockFlags & UBF_FUNC_HAS_ASSOCIATED_DATA) != 0) + p += sizeof(int32_t); + + // return if there is no EH info associated with this method + if ((unwindBlockFlags & UBF_FUNC_HAS_EHINFO) == 0) + { + return false; + } + + UnixEHEnumState * pEnumState = (UnixEHEnumState *)pEHEnumStateOut; + + *pMethodStartAddress = pNativeMethodInfo->pMethodStartAddress; + + pEnumState->pMethodStartAddress = dac_cast(pNativeMethodInfo->pMethodStartAddress); + pEnumState->pEHInfo = dac_cast(p + *dac_cast(p)); + pEnumState->uClause = 0; + pEnumState->nClauses = VarInt::ReadUnsigned(pEnumState->pEHInfo); + + return true; +} + +bool UnixNativeCodeManager::EHEnumNext(EHEnumState * pEHEnumState, EHClause * pEHClauseOut) +{ + assert(pEHEnumState != NULL); + assert(pEHClauseOut != NULL); + + UnixEHEnumState * pEnumState = (UnixEHEnumState *)pEHEnumState; + if (pEnumState->uClause >= pEnumState->nClauses) + { + return false; + } + + pEnumState->uClause++; + + pEHClauseOut->m_tryStartOffset = VarInt::ReadUnsigned(pEnumState->pEHInfo); + + UInt32 tryEndDeltaAndClauseKind = VarInt::ReadUnsigned(pEnumState->pEHInfo); + pEHClauseOut->m_clauseKind = (EHClauseKind)(tryEndDeltaAndClauseKind & 0x3); + pEHClauseOut->m_tryEndOffset = pEHClauseOut->m_tryStartOffset + (tryEndDeltaAndClauseKind >> 2); + + // For each clause, we have up to 4 integers: + // 1) try start offset + // 2) (try length << 2) | clauseKind + // 3) if (typed || fault || filter) { handler start offset } + // 4a) if (typed) { type RVA } + // 4b) if (filter) { filter start offset } + // + // The first two integers have already been decoded + + switch (pEHClauseOut->m_clauseKind) + { + case EH_CLAUSE_TYPED: + pEHClauseOut->m_handlerAddress = dac_cast(PINSTRToPCODE(dac_cast(pEnumState->pMethodStartAddress))) + VarInt::ReadUnsigned(pEnumState->pEHInfo); + + // Read target type + { + // @TODO: CORERT: Compress EHInfo using type table index scheme + // https://github.com/dotnet/corert/issues/972 + Int32 typeRelAddr = *((PTR_Int32&)pEnumState->pEHInfo)++; + pEHClauseOut->m_pTargetType = dac_cast(pEnumState->pEHInfo + typeRelAddr); + } + break; + case EH_CLAUSE_FAULT: + pEHClauseOut->m_handlerAddress = dac_cast(PINSTRToPCODE(dac_cast(pEnumState->pMethodStartAddress))) + VarInt::ReadUnsigned(pEnumState->pEHInfo); + break; + case EH_CLAUSE_FILTER: + pEHClauseOut->m_handlerAddress = dac_cast(PINSTRToPCODE(dac_cast(pEnumState->pMethodStartAddress))) + VarInt::ReadUnsigned(pEnumState->pEHInfo); + pEHClauseOut->m_filterAddress = dac_cast(PINSTRToPCODE(dac_cast(pEnumState->pMethodStartAddress))) + VarInt::ReadUnsigned(pEnumState->pEHInfo); + break; + default: + UNREACHABLE_MSG("unexpected EHClauseKind"); + } + + return true; +} + +PTR_VOID UnixNativeCodeManager::GetOsModuleHandle() +{ + return (PTR_VOID)m_moduleBase; +} + +PTR_VOID UnixNativeCodeManager::GetMethodStartAddress(MethodInfo * pMethodInfo) +{ + UnixNativeMethodInfo * pNativeMethodInfo = (UnixNativeMethodInfo *)pMethodInfo; + return pNativeMethodInfo->pMethodStartAddress; +} + +void * UnixNativeCodeManager::GetClasslibFunction(ClasslibFunctionId functionId) +{ + uint32_t id = (uint32_t)functionId; + + if (id >= m_nClasslibFunctions) + { + return nullptr; + } + + return m_pClasslibFunctions[id]; +} + +PTR_VOID UnixNativeCodeManager::GetAssociatedData(PTR_VOID ControlPC) +{ + UnixNativeMethodInfo methodInfo; + if (!FindMethodInfo(ControlPC, (MethodInfo*)&methodInfo)) + return NULL; + + PTR_UInt8 p = methodInfo.pMainLSDA; + + uint8_t unwindBlockFlags = *p++; + if ((unwindBlockFlags & UBF_FUNC_HAS_ASSOCIATED_DATA) == 0) + return NULL; + + return dac_cast(p + *dac_cast(p)); +} + +extern "C" bool __stdcall RegisterCodeManager(ICodeManager * pCodeManager, PTR_VOID pvStartRange, UInt32 cbRange); +extern "C" void __stdcall UnregisterCodeManager(ICodeManager * pCodeManager); +extern "C" bool __stdcall RegisterUnboxingStubs(PTR_VOID pvStartRange, UInt32 cbRange); + +extern "C" +bool RhRegisterOSModule(void * pModule, + void * pvManagedCodeStartRange, UInt32 cbManagedCodeRange, + void * pvUnboxingStubsStartRange, UInt32 cbUnboxingStubsRange, + void ** pClasslibFunctions, UInt32 nClasslibFunctions) +{ + NewHolder pUnixNativeCodeManager = new (nothrow) UnixNativeCodeManager((TADDR)pModule, + pvManagedCodeStartRange, cbManagedCodeRange, + pClasslibFunctions, nClasslibFunctions); + + if (pUnixNativeCodeManager == nullptr) + return false; + + if (!RegisterCodeManager(pUnixNativeCodeManager, pvManagedCodeStartRange, cbManagedCodeRange)) + return false; + + if (!RegisterUnboxingStubs(pvUnboxingStubsStartRange, cbUnboxingStubsRange)) + { + UnregisterCodeManager(pUnixNativeCodeManager); + return false; + } + + pUnixNativeCodeManager.SuppressRelease(); + + return true; +} diff --git a/src/coreclr/src/nativeaot/Runtime/unix/UnixNativeCodeManager.h b/src/coreclr/src/nativeaot/Runtime/unix/UnixNativeCodeManager.h new file mode 100644 index 0000000000000..45ebeac60a243 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/unix/UnixNativeCodeManager.h @@ -0,0 +1,69 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#pragma once + +class UnixNativeCodeManager : public ICodeManager +{ + TADDR m_moduleBase; + + PTR_VOID m_pvManagedCodeStartRange; + UInt32 m_cbManagedCodeRange; + + PTR_PTR_VOID m_pClasslibFunctions; + UInt32 m_nClasslibFunctions; + +public: + UnixNativeCodeManager(TADDR moduleBase, + PTR_VOID pvManagedCodeStartRange, UInt32 cbManagedCodeRange, + PTR_PTR_VOID pClasslibFunctions, UInt32 nClasslibFunctions); + + virtual ~UnixNativeCodeManager(); + + // + // Code manager methods + // + + bool FindMethodInfo(PTR_VOID ControlPC, + MethodInfo * pMethodInfoOut); + + bool IsFunclet(MethodInfo * pMethodInfo); + + bool IsFilter(MethodInfo * pMethodInfo); + + PTR_VOID GetFramePointer(MethodInfo * pMethodInfo, + REGDISPLAY * pRegisterSet); + + void EnumGcRefs(MethodInfo * pMethodInfo, + PTR_VOID safePointAddress, + REGDISPLAY * pRegisterSet, + GCEnumContext * hCallback); + + bool UnwindStackFrame(MethodInfo * pMethodInfo, + REGDISPLAY * pRegisterSet, // in/out + PTR_VOID * ppPreviousTransitionFrame); // out + + UIntNative GetConservativeUpperBoundForOutgoingArgs(MethodInfo * pMethodInfo, + REGDISPLAY * pRegisterSet); + + bool GetReturnAddressHijackInfo(MethodInfo * pMethodInfo, + REGDISPLAY * pRegisterSet, // in + PTR_PTR_VOID * ppvRetAddrLocation, // out + GCRefKind * pRetValueKind); // out + + void UnsynchronizedHijackMethodLoops(MethodInfo * pMethodInfo); + + PTR_VOID RemapHardwareFaultToGCSafePoint(MethodInfo * pMethodInfo, PTR_VOID controlPC); + + bool EHEnumInit(MethodInfo * pMethodInfo, PTR_VOID * pMethodStartAddress, EHEnumState * pEHEnumState); + + bool EHEnumNext(EHEnumState * pEHEnumState, EHClause * pEHClause); + + PTR_VOID GetMethodStartAddress(MethodInfo * pMethodInfo); + + void * GetClasslibFunction(ClasslibFunctionId functionId); + + PTR_VOID GetAssociatedData(PTR_VOID ControlPC); + + PTR_VOID GetOsModuleHandle(); +}; diff --git a/src/coreclr/src/nativeaot/Runtime/unix/UnwindHelpers.cpp b/src/coreclr/src/nativeaot/Runtime/unix/UnwindHelpers.cpp new file mode 100644 index 0000000000000..ced22cc272cd6 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/unix/UnwindHelpers.cpp @@ -0,0 +1,773 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include "common.h" +#include "daccess.h" +#include "rhassert.h" + +#define UNW_STEP_SUCCESS 1 +#define UNW_STEP_END 0 + +#ifdef __APPLE__ +#include +#endif + +#include +#include "UnwindHelpers.h" + +// libunwind headers +#include +#include +#include +#include +#if defined(TARGET_ARM) +#include +#endif +#include + + +#if defined(TARGET_AMD64) +using libunwind::Registers_x86_64; +#elif defined(TARGET_ARM) +using libunwind::Registers_arm; +#elif defined(TARGET_ARM64) +using libunwind::Registers_arm64; +#elif defined(TARGET_X86) +using libunwind::Registers_x86; +#else +#error "Unwinding is not implemented for this architecture yet." +#endif +using libunwind::LocalAddressSpace; +using libunwind::EHHeaderParser; +#if _LIBUNWIND_SUPPORT_DWARF_UNWIND +using libunwind::DwarfInstructions; +#endif +using libunwind::UnwindInfoSections; + +LocalAddressSpace _addressSpace; + +#ifdef TARGET_AMD64 + +// Shim that implements methods required by libunwind over REGDISPLAY +struct Registers_REGDISPLAY : REGDISPLAY +{ + static int getArch() { return libunwind::REGISTERS_X86_64; } + + inline uint64_t getRegister(int regNum) const + { + switch (regNum) + { + case UNW_REG_IP: + return IP; + case UNW_REG_SP: + return SP; + case UNW_X86_64_RAX: + return *pRax; + case UNW_X86_64_RDX: + return *pRdx; + case UNW_X86_64_RCX: + return *pRcx; + case UNW_X86_64_RBX: + return *pRbx; + case UNW_X86_64_RSI: + return *pRsi; + case UNW_X86_64_RDI: + return *pRdi; + case UNW_X86_64_RBP: + return *pRbp; + case UNW_X86_64_RSP: + return SP; + case UNW_X86_64_R8: + return *pR8; + case UNW_X86_64_R9: + return *pR9; + case UNW_X86_64_R10: + return *pR10; + case UNW_X86_64_R11: + return *pR11; + case UNW_X86_64_R12: + return *pR12; + case UNW_X86_64_R13: + return *pR13; + case UNW_X86_64_R14: + return *pR14; + case UNW_X86_64_R15: + return *pR15; + } + + // Unsupported register requested + abort(); + } + + inline void setRegister(int regNum, uint64_t value, uint64_t location) + { + switch (regNum) + { + case UNW_REG_IP: + IP = value; + pIP = (PTR_PCODE)location; + return; + case UNW_REG_SP: + SP = value; + return; + case UNW_X86_64_RAX: + pRax = (PTR_UIntNative)location; + return; + case UNW_X86_64_RDX: + pRdx = (PTR_UIntNative)location; + return; + case UNW_X86_64_RCX: + pRcx = (PTR_UIntNative)location; + return; + case UNW_X86_64_RBX: + pRbx = (PTR_UIntNative)location; + return; + case UNW_X86_64_RSI: + pRsi = (PTR_UIntNative)location; + return; + case UNW_X86_64_RDI: + pRdi = (PTR_UIntNative)location; + return; + case UNW_X86_64_RBP: + pRbp = (PTR_UIntNative)location; + return; + case UNW_X86_64_RSP: + SP = value; + return; + case UNW_X86_64_R8: + pR8 = (PTR_UIntNative)location; + return; + case UNW_X86_64_R9: + pR9 = (PTR_UIntNative)location; + return; + case UNW_X86_64_R10: + pR10 = (PTR_UIntNative)location; + return; + case UNW_X86_64_R11: + pR11 = (PTR_UIntNative)location; + return; + case UNW_X86_64_R12: + pR12 = (PTR_UIntNative)location; + return; + case UNW_X86_64_R13: + pR13 = (PTR_UIntNative)location; + return; + case UNW_X86_64_R14: + pR14 = (PTR_UIntNative)location; + return; + case UNW_X86_64_R15: + pR15 = (PTR_UIntNative)location; + return; + } + + // Unsupported x86_64 register + abort(); + } + + // N/A for x86_64 + inline bool validFloatRegister(int) { return false; } + inline bool validVectorRegister(int) { return false; } + + inline static int lastDwarfRegNum() { return 16; } + + inline bool validRegister(int regNum) const + { + if (regNum == UNW_REG_IP) + return true; + if (regNum == UNW_REG_SP) + return true; + if (regNum < 0) + return false; + if (regNum > 15) + return false; + return true; + } + + // N/A for x86_64 + inline double getFloatRegister(int) const { abort(); } + inline void setFloatRegister(int, double) { abort(); } + inline double getVectorRegister(int) const { abort(); } + inline void setVectorRegister(int, ...) { abort(); } + + uint64_t getSP() const { return SP; } + void setSP(uint64_t value, uint64_t location) { SP = value; } + + uint64_t getIP() const { return IP; } + + void setIP(uint64_t value, uint64_t location) + { + IP = value; + pIP = (PTR_PCODE)location; + } + + uint64_t getRBP() const { return *pRbp; } + void setRBP(uint64_t value, uint64_t location) { pRbp = (PTR_UIntNative)location; } + uint64_t getRBX() const { return *pRbx; } + void setRBX(uint64_t value, uint64_t location) { pRbx = (PTR_UIntNative)location; } + uint64_t getR12() const { return *pR12; } + void setR12(uint64_t value, uint64_t location) { pR12 = (PTR_UIntNative)location; } + uint64_t getR13() const { return *pR13; } + void setR13(uint64_t value, uint64_t location) { pR13 = (PTR_UIntNative)location; } + uint64_t getR14() const { return *pR14; } + void setR14(uint64_t value, uint64_t location) { pR14 = (PTR_UIntNative)location; } + uint64_t getR15() const { return *pR15; } + void setR15(uint64_t value, uint64_t location) { pR15 = (PTR_UIntNative)location; } +}; + +#endif // TARGET_AMD64 +#if defined(TARGET_X86) +struct Registers_REGDISPLAY : REGDISPLAY +{ + static int getArch() { return libunwind::REGISTERS_X86; } + + inline uint64_t getRegister(int regNum) const + { + switch (regNum) + { + case UNW_REG_IP: + return IP; + case UNW_REG_SP: + return SP; + case UNW_X86_EAX: + return *pRax; + case UNW_X86_EDX: + return *pRdx; + case UNW_X86_ECX: + return *pRcx; + case UNW_X86_EBX: + return *pRbx; + case UNW_X86_ESI: + return *pRsi; + case UNW_X86_EDI: + return *pRdi; + case UNW_X86_EBP: + return *pRbp; + case UNW_X86_ESP: + return SP; + } + + // Unsupported register requested + abort(); + } + + inline void setRegister(int regNum, uint64_t value, uint64_t location) + { + switch (regNum) + { + case UNW_REG_IP: + IP = value; + pIP = (PTR_PCODE)location; + return; + case UNW_REG_SP: + SP = value; + return; + case UNW_X86_EAX: + pRax = (PTR_UIntNative)location; + return; + case UNW_X86_EDX: + pRdx = (PTR_UIntNative)location; + return; + case UNW_X86_ECX: + pRcx = (PTR_UIntNative)location; + return; + case UNW_X86_EBX: + pRbx = (PTR_UIntNative)location; + return; + case UNW_X86_ESI: + pRsi = (PTR_UIntNative)location; + return; + case UNW_X86_EDI: + pRdi = (PTR_UIntNative)location; + return; + case UNW_X86_EBP: + pRbp = (PTR_UIntNative)location; + return; + case UNW_X86_ESP: + SP = value; + return; + } + + // Unsupported x86_64 register + abort(); + } + + // N/A for x86 + inline bool validFloatRegister(int) { return false; } + inline bool validVectorRegister(int) { return false; } + + inline static int lastDwarfRegNum() { return 16; } + + inline bool validRegister(int regNum) const + { + if (regNum == UNW_REG_IP) + return true; + if (regNum == UNW_REG_SP) + return true; + if (regNum < 0) + return false; + if (regNum > 15) + return false; + return true; + } + + // N/A for x86 + inline double getFloatRegister(int) const { abort(); } + inline void setFloatRegister(int, double) { abort(); } + inline double getVectorRegister(int) const { abort(); } + inline void setVectorRegister(int, ...) { abort(); } + + void setSP(uint64_t value, uint64_t location) { SP = value; } + + uint64_t getIP() const { return IP; } + + void setIP(uint64_t value, uint64_t location) + { + IP = value; + pIP = (PTR_PCODE)location; + } + + uint64_t getEBP() const { return *pRbp; } + void setEBP(uint64_t value, uint64_t location) { pRbp = (PTR_UIntNative)location; } + uint64_t getEBX() const { return *pRbx; } + void setEBX(uint64_t value, uint64_t location) { pRbx = (PTR_UIntNative)location; } +}; + +#endif // TARGET_X86 +#if defined(TARGET_ARM) + +class Registers_arm_rt: public libunwind::Registers_arm { +public: + Registers_arm_rt() { abort(); }; + Registers_arm_rt(void *registers) { regs = (REGDISPLAY *)registers; }; + uint32_t getRegister(int num); + void setRegister(int num, uint32_t value, uint32_t location); + uint32_t getRegisterLocation(int regNum) const { abort();} + unw_fpreg_t getFloatRegister(int num) { abort();} + void setFloatRegister(int num, unw_fpreg_t value) {abort();} + bool validVectorRegister(int num) const { abort();} + uint32_t getVectorRegister(int num) const {abort();}; + void setVectorRegister(int num, uint32_t value) {abort();}; + void jumpto() { abort();}; + uint32_t getSP() const { return regs->SP;} + void setSP(uint32_t value, uint32_t location) { regs->SP = value;} + uint32_t getIP() const { return regs->IP;} + void setIP(uint32_t value, uint32_t location) + { regs->IP = value; regs->pIP = (PTR_UIntNative)location; } + void saveVFPAsX() {abort();}; +private: + REGDISPLAY *regs; +}; + +inline uint32_t Registers_arm_rt::getRegister(int regNum) { + if (regNum == UNW_REG_SP || regNum == UNW_ARM_SP) + return regs->SP; + + if (regNum == UNW_ARM_LR) + return *regs->pLR; + + if (regNum == UNW_REG_IP || regNum == UNW_ARM_IP) + return regs->IP; + + switch (regNum) + { + case (UNW_ARM_R0): + return *regs->pR0; + case (UNW_ARM_R1): + return *regs->pR1; + case (UNW_ARM_R2): + return *regs->pR2; + case (UNW_ARM_R3): + return *regs->pR3; + case (UNW_ARM_R4): + return *regs->pR4; + case (UNW_ARM_R5): + return *regs->pR5; + case (UNW_ARM_R6): + return *regs->pR6; + case (UNW_ARM_R7): + return *regs->pR7; + case (UNW_ARM_R8): + return *regs->pR8; + case (UNW_ARM_R9): + return *regs->pR9; + case (UNW_ARM_R10): + return *regs->pR10; + case (UNW_ARM_R11): + return *regs->pR11; + case (UNW_ARM_R12): + return *regs->pR12; + } + + PORTABILITY_ASSERT("unsupported arm register"); +} + +void Registers_arm_rt::setRegister(int num, uint32_t value, uint32_t location) +{ + + if (num == UNW_REG_SP || num == UNW_ARM_SP) { + regs->SP = (UIntNative )value; + return; + } + + if (num == UNW_ARM_LR) { + regs->pLR = (PTR_UIntNative)location; + return; + } + + if (num == UNW_REG_IP || num == UNW_ARM_IP) { + regs->IP = value; + /* the location could be NULL, we could try to recover + pointer to value in stack from pLR */ + if ((!location) && (regs->pLR) && (*regs->pLR == value)) + regs->pIP = regs->pLR; + else + regs->pIP = (PTR_UIntNative)location; + return; + } + + switch (num) + { + case (UNW_ARM_R0): + regs->pR0 = (PTR_UIntNative)location; + break; + case (UNW_ARM_R1): + regs->pR1 = (PTR_UIntNative)location; + break; + case (UNW_ARM_R2): + regs->pR2 = (PTR_UIntNative)location; + break; + case (UNW_ARM_R3): + regs->pR3 = (PTR_UIntNative)location; + break; + case (UNW_ARM_R4): + regs->pR4 = (PTR_UIntNative)location; + break; + case (UNW_ARM_R5): + regs->pR5 = (PTR_UIntNative)location; + break; + case (UNW_ARM_R6): + regs->pR6 = (PTR_UIntNative)location; + break; + case (UNW_ARM_R7): + regs->pR7 = (PTR_UIntNative)location; + break; + case (UNW_ARM_R8): + regs->pR8 = (PTR_UIntNative)location; + break; + case (UNW_ARM_R9): + regs->pR9 = (PTR_UIntNative)location; + break; + case (UNW_ARM_R10): + regs->pR10 = (PTR_UIntNative)location; + break; + case (UNW_ARM_R11): + regs->pR11 = (PTR_UIntNative)location; + break; + case (UNW_ARM_R12): + regs->pR12 = (PTR_UIntNative)location; + break; + default: + PORTABILITY_ASSERT("unsupported arm register"); + } +} + +#endif // TARGET_ARM + +#if defined(TARGET_ARM64) + +class Registers_arm64_rt: public libunwind::Registers_arm64 { +public: + Registers_arm64_rt() { abort(); }; + Registers_arm64_rt(const void *registers); + + bool validRegister(int num) {abort();}; + uint64_t getRegister(int num) const; + void setRegister(int num, uint64_t value, uint64_t location); + bool validFloatRegister(int num) {abort();}; + double getFloatRegister(int num) {abort();} + void setFloatRegister(int num, double value) {abort();} + bool validVectorRegister(int num) const {abort();} + libunwind::v128 getVectorRegister(int num) const {abort();}; + void setVectorRegister(int num, libunwind::v128 value) {abort();}; + void jumpto() { abort();}; + + uint64_t getSP() const { return regs->SP;} + void setSP(uint64_t value, uint64_t location) { regs->SP = value;} + uint64_t getIP() const { return regs->IP;} + void setIP(uint64_t value, uint64_t location) + { regs->IP = value; regs->pIP = (PTR_UIntNative)location; } + void saveVFPAsX() {abort();}; +private: + REGDISPLAY *regs; +}; + +inline Registers_arm64_rt::Registers_arm64_rt(const void *registers) { + regs = (REGDISPLAY *)registers; +} + +inline uint64_t Registers_arm64_rt::getRegister(int regNum) const { + if (regNum == UNW_REG_SP || regNum == UNW_ARM64_SP) + return regs->SP; + + if (regNum == UNW_ARM64_LR) + return *regs->pLR; + + if (regNum == UNW_REG_IP) + return regs->IP; + + switch (regNum) + { + case (UNW_ARM64_X0): + return *regs->pX0; + case (UNW_ARM64_X1): + return *regs->pX1; + case (UNW_ARM64_X2): + return *regs->pX2; + case (UNW_ARM64_X3): + return *regs->pX3; + case (UNW_ARM64_X4): + return *regs->pX4; + case (UNW_ARM64_X5): + return *regs->pX5; + case (UNW_ARM64_X6): + return *regs->pX6; + case (UNW_ARM64_X7): + return *regs->pX7; + case (UNW_ARM64_X8): + return *regs->pX8; + case (UNW_ARM64_X9): + return *regs->pX9; + case (UNW_ARM64_X10): + return *regs->pX10; + case (UNW_ARM64_X11): + return *regs->pX11; + case (UNW_ARM64_X12): + return *regs->pX12; + case (UNW_ARM64_X13): + return *regs->pX13; + case (UNW_ARM64_X14): + return *regs->pX14; + case (UNW_ARM64_X15): + return *regs->pX15; + case (UNW_ARM64_X16): + return *regs->pX16; + case (UNW_ARM64_X17): + return *regs->pX17; + case (UNW_ARM64_X18): + return *regs->pX18; + case (UNW_ARM64_X19): + return *regs->pX19; + case (UNW_ARM64_X20): + return *regs->pX20; + case (UNW_ARM64_X21): + return *regs->pX21; + case (UNW_ARM64_X22): + return *regs->pX22; + case (UNW_ARM64_X23): + return *regs->pX23; + case (UNW_ARM64_X24): + return *regs->pX24; + case (UNW_ARM64_X25): + return *regs->pX25; + case (UNW_ARM64_X26): + return *regs->pX26; + case (UNW_ARM64_X27): + return *regs->pX27; + case (UNW_ARM64_X28): + return *regs->pX28; + } + + PORTABILITY_ASSERT("unsupported arm64 register"); +} + +void Registers_arm64_rt::setRegister(int num, uint64_t value, uint64_t location) +{ + + if (num == UNW_REG_SP || num == UNW_ARM64_SP) { + regs->SP = (UIntNative )value; + return; + } + + if (num == UNW_ARM64_LR) { + regs->pLR = (PTR_UIntNative)location; + return; + } + + if (num == UNW_REG_IP) { + regs->IP = value; + /* the location could be NULL, we could try to recover + pointer to value in stack from pLR */ + if ((!location) && (regs->pLR) && (*regs->pLR == value)) + regs->pIP = regs->pLR; + else + regs->pIP = (PTR_UIntNative)location; + return; + } + + switch (num) + { + case (UNW_ARM64_X0): + regs->pX0 = (PTR_UIntNative)location; + break; + case (UNW_ARM64_X1): + regs->pX1 = (PTR_UIntNative)location; + break; + case (UNW_ARM64_X2): + regs->pX2 = (PTR_UIntNative)location; + break; + case (UNW_ARM64_X3): + regs->pX3 = (PTR_UIntNative)location; + break; + case (UNW_ARM64_X4): + regs->pX4 = (PTR_UIntNative)location; + break; + case (UNW_ARM64_X5): + regs->pX5 = (PTR_UIntNative)location; + break; + case (UNW_ARM64_X6): + regs->pX6 = (PTR_UIntNative)location; + break; + case (UNW_ARM64_X7): + regs->pX7 = (PTR_UIntNative)location; + break; + case (UNW_ARM64_X8): + regs->pX8 = (PTR_UIntNative)location; + break; + case (UNW_ARM64_X9): + regs->pX9 = (PTR_UIntNative)location; + break; + case (UNW_ARM64_X10): + regs->pX10 = (PTR_UIntNative)location; + break; + case (UNW_ARM64_X11): + regs->pX11 = (PTR_UIntNative)location; + break; + case (UNW_ARM64_X12): + regs->pX12 = (PTR_UIntNative)location; + break; + case (UNW_ARM64_X13): + regs->pX13 = (PTR_UIntNative)location; + break; + case (UNW_ARM64_X14): + regs->pX14 = (PTR_UIntNative)location; + break; + case (UNW_ARM64_X15): + regs->pX15 = (PTR_UIntNative)location; + break; + case (UNW_ARM64_X16): + regs->pX16 = (PTR_UIntNative)location; + break; + case (UNW_ARM64_X17): + regs->pX17 = (PTR_UIntNative)location; + break; + case (UNW_ARM64_X18): + regs->pX18 = (PTR_UIntNative)location; + break; + case (UNW_ARM64_X19): + regs->pX19 = (PTR_UIntNative)location; + break; + case (UNW_ARM64_X20): + regs->pX20 = (PTR_UIntNative)location; + break; + case (UNW_ARM64_X21): + regs->pX21 = (PTR_UIntNative)location; + break; + case (UNW_ARM64_X22): + regs->pX22 = (PTR_UIntNative)location; + break; + case (UNW_ARM64_X23): + regs->pX23 = (PTR_UIntNative)location; + break; + case (UNW_ARM64_X24): + regs->pX24 = (PTR_UIntNative)location; + break; + case (UNW_ARM64_X25): + regs->pX25 = (PTR_UIntNative)location; + break; + case (UNW_ARM64_X26): + regs->pX26 = (PTR_UIntNative)location; + break; + case (UNW_ARM64_X27): + regs->pX27 = (PTR_UIntNative)location; + break; + case (UNW_ARM64_X28): + regs->pX28 = (PTR_UIntNative)location; + break; + default: + PORTABILITY_ASSERT("unsupported arm64 register"); + } +} + +#endif // TARGET_ARM64 + +bool DoTheStep(uintptr_t pc, UnwindInfoSections uwInfoSections, REGDISPLAY *regs) +{ +#if defined(TARGET_AMD64) + libunwind::UnwindCursor uc(_addressSpace); +#elif defined(TARGET_ARM) + libunwind::UnwindCursor uc(_addressSpace, regs); +#elif defined(TARGET_ARM64) + libunwind::UnwindCursor uc(_addressSpace, regs); +#elif defined(HOST_X86) + libunwind::UnwindCursor uc(_addressSpace, regs); +#else + #error "Unwinding is not implemented for this architecture yet." +#endif + +#if _LIBUNWIND_SUPPORT_DWARF_UNWIND + bool retVal = uc.getInfoFromDwarfSection(pc, uwInfoSections, 0 /* fdeSectionOffsetHint */); + if (!retVal) + { + return false; + } + + unw_proc_info_t procInfo; + uc.getInfo(&procInfo); + +#if defined(TARGET_ARM64) + DwarfInstructions dwarfInst; + int stepRet = dwarfInst.stepWithDwarf(_addressSpace, pc, procInfo.unwind_info, *(Registers_arm64_rt*)regs); +#elif defined(TARGET_ARM) + DwarfInstructions dwarfInst; + int stepRet = dwarfInst.stepWithDwarf(_addressSpace, pc, procInfo.unwind_info, *(Registers_arm_rt*)regs); +#else + DwarfInstructions dwarfInst; + int stepRet = dwarfInst.stepWithDwarf(_addressSpace, pc, procInfo.unwind_info, *(Registers_REGDISPLAY*)regs); +#endif + + if (stepRet != UNW_STEP_SUCCESS) + { + return false; + } + + regs->pIP = PTR_PCODE(regs->SP - sizeof(TADDR)); +#elif defined(_LIBUNWIND_ARM_EHABI) + uc.setInfoBasedOnIPRegister(true); + int stepRet = uc.step(); + if ((stepRet != UNW_STEP_SUCCESS) && (stepRet != UNW_STEP_END)) + { + return false; + } +#endif + + return true; +} + +bool UnwindHelpers::StepFrame(REGDISPLAY *regs) +{ + UnwindInfoSections uwInfoSections; +#if _LIBUNWIND_SUPPORT_DWARF_UNWIND + uintptr_t pc = regs->GetIP(); + if (!_addressSpace.findUnwindSections(pc, uwInfoSections)) + { + return false; + } + return DoTheStep(pc, uwInfoSections, regs); +#elif defined(_LIBUNWIND_ARM_EHABI) + // unwind section is located later for ARM + // pc will be taked from regs parameter + return DoTheStep(0, uwInfoSections, regs); +#else + PORTABILITY_ASSERT("StepFrame"); +#endif +} diff --git a/src/coreclr/src/nativeaot/Runtime/unix/UnwindHelpers.h b/src/coreclr/src/nativeaot/Runtime/unix/UnwindHelpers.h new file mode 100644 index 0000000000000..86dadb71f4b82 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/unix/UnwindHelpers.h @@ -0,0 +1,13 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include "common.h" + +// This class is used to encapsulate the internals of our unwinding implementation +// and any custom versions of libunwind structures that we use for performance +// reasons. +class UnwindHelpers +{ +public: + static bool StepFrame(REGDISPLAY *regs); +}; diff --git a/src/coreclr/src/nativeaot/Runtime/unix/config.h.in b/src/coreclr/src/nativeaot/Runtime/unix/config.h.in new file mode 100644 index 0000000000000..ca5f5aa7145c2 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/unix/config.h.in @@ -0,0 +1,34 @@ +#ifndef _PAL_CONFIG_H_INCLUDED +#define _PAL_CONFIG_H_INCLUDED 1 + +#cmakedefine01 HAVE_SYS_VMPARAM_H +#cmakedefine01 HAVE_MACH_VM_TYPES_H +#cmakedefine01 HAVE_MACH_VM_PARAM_H + +#cmakedefine01 HAVE_PTHREAD_ATTR_GET_NP +#cmakedefine01 HAVE_PTHREAD_GETATTR_NP +#cmakedefine01 HAVE_PTHREAD_CONDATTR_SETCLOCK +#cmakedefine01 HAVE_PTHREAD_GETTHREADID_NP + +#cmakedefine01 HAVE_CLOCK_NANOSLEEP +#cmakedefine01 HAVE_SYSCTL +#cmakedefine01 HAVE_SYSCONF + +#cmakedefine01 HAVE_GREGSET_T +#cmakedefine01 HAVE___GREGSET_T + +#cmakedefine01 HAVE_SIGINFO_T +#cmakedefine01 HAVE_UCONTEXT_T + +#cmakedefine01 HAVE__SC_PHYS_PAGES +#cmakedefine01 HAVE__SC_AVPHYS_PAGES + +#cmakedefine01 HAVE_LWP_SELF +#cmakedefine01 HAVE_SCHED_GETCPU +#cmakedefine01 HAVE_CLOCK_MONOTONIC +#cmakedefine01 HAVE_CLOCK_MONOTONIC_COARSE +#cmakedefine01 HAVE_MACH_ABSOLUTE_TIME + +#cmakedefine01 HAVE_THREAD_LOCAL + +#endif diff --git a/src/coreclr/src/nativeaot/Runtime/unix/configure.cmake b/src/coreclr/src/nativeaot/Runtime/unix/configure.cmake new file mode 100644 index 0000000000000..a8fd29d182cda --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/unix/configure.cmake @@ -0,0 +1,117 @@ +include(CheckCXXSourceCompiles) +include(CheckCXXSourceRuns) +include(CheckCXXSymbolExists) +include(CheckFunctionExists) +include(CheckIncludeFiles) +include(CheckStructHasMember) +include(CheckTypeSize) +include(CheckLibraryExists) + +if(CMAKE_SYSTEM_NAME STREQUAL FreeBSD) + set(CMAKE_REQUIRED_INCLUDES /usr/local/include) +elseif(NOT CMAKE_SYSTEM_NAME STREQUAL Darwin) + set(CMAKE_REQUIRED_DEFINITIONS "-D_BSD_SOURCE -D_SVID_SOURCE -D_DEFAULT_SOURCE -D_POSIX_C_SOURCE=200809L") +endif() + +list(APPEND CMAKE_REQUIRED_DEFINITIONS -D_FILE_OFFSET_BITS=64) + +check_include_files(sys/vmparam.h HAVE_SYS_VMPARAM_H) +check_include_files(mach/vm_types.h HAVE_MACH_VM_TYPES_H) +check_include_files(mach/vm_param.h HAVE_MACH_VM_PARAM_H) + +check_library_exists(pthread pthread_attr_get_np "" HAVE_PTHREAD_ATTR_GET_NP) +check_library_exists(pthread pthread_getattr_np "" HAVE_PTHREAD_GETATTR_NP) +check_library_exists(pthread pthread_condattr_setclock "" HAVE_PTHREAD_CONDATTR_SETCLOCK) +check_library_exists(pthread pthread_getthreadid_np "" HAVE_PTHREAD_GETTHREADID_NP) + +check_function_exists(clock_nanosleep HAVE_CLOCK_NANOSLEEP) +check_function_exists(sysctl HAVE_SYSCTL) +check_function_exists(sysconf HAVE_SYSCONF) + +check_struct_has_member ("ucontext_t" uc_mcontext.gregs[0] ucontext.h HAVE_GREGSET_T) +check_struct_has_member ("ucontext_t" uc_mcontext.__gregs[0] ucontext.h HAVE___GREGSET_T) + +set(CMAKE_EXTRA_INCLUDE_FILES) +set(CMAKE_EXTRA_INCLUDE_FILES signal.h) +check_type_size(siginfo_t SIGINFO_T) +set(CMAKE_EXTRA_INCLUDE_FILES) +set(CMAKE_EXTRA_INCLUDE_FILES ucontext.h) +check_type_size(ucontext_t UCONTEXT_T) + +check_cxx_symbol_exists(_SC_PHYS_PAGES unistd.h HAVE__SC_PHYS_PAGES) +check_cxx_symbol_exists(_SC_AVPHYS_PAGES unistd.h HAVE__SC_AVPHYS_PAGES) + +check_cxx_source_compiles(" +#include + +int main(int argc, char **argv) +{ + return (int)_lwp_self(); +}" HAVE_LWP_SELF) + +set(CMAKE_REQUIRED_LIBRARIES pthread) +check_cxx_source_runs(" +#include +#include + +int main(void) +{ + if (sched_getcpu() >= 0) + { + exit(0); + } + exit(1); +}" HAVE_SCHED_GETCPU) +set(CMAKE_REQUIRED_LIBRARIES) + +check_cxx_source_runs(" +#include +#include +#include + +int main() +{ + int ret; + struct timespec ts; + ret = clock_gettime(CLOCK_MONOTONIC, &ts); + + exit(ret); +}" HAVE_CLOCK_MONOTONIC) + +check_cxx_source_runs(" +#include +#include +#include + +int main() +{ + int ret; + struct timespec ts; + ret = clock_gettime(CLOCK_MONOTONIC_COARSE, &ts); + + exit(ret); +}" HAVE_CLOCK_MONOTONIC_COARSE) + +check_cxx_source_runs(" +#include +#include + +int main() +{ + int ret; + mach_timebase_info_data_t timebaseInfo; + ret = mach_timebase_info(&timebaseInfo); + mach_absolute_time(); + exit(ret); +}" HAVE_MACH_ABSOLUTE_TIME) + +check_cxx_source_compiles(" +thread_local int x; + +int main(int argc, char **argv) +{ + x = 1; + return 0; +}" HAVE_THREAD_LOCAL) + +configure_file(${CMAKE_CURRENT_LIST_DIR}/config.h.in ${CMAKE_CURRENT_BINARY_DIR}/config.h) diff --git a/src/coreclr/src/nativeaot/Runtime/unix/no_sal2.h b/src/coreclr/src/nativeaot/Runtime/unix/no_sal2.h new file mode 100644 index 0000000000000..9c4dff666be1c --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/unix/no_sal2.h @@ -0,0 +1,533 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + + +/*** +* no_sal2.h - renders the SAL annotations for documenting APIs harmless. +* + +* +*Purpose: +* sal.h provides a set of SAL2 annotations to describe how a function uses its +* parameters - the assumptions it makes about them, and the guarantees it makes +* upon finishing. This file redefines all those annotation macros to be harmless. +* It is designed for use in down-level build environments where the tooling may +* be unhappy with the standard SAL2 macro definitions. +* +* [Public] +* + +* +****/ + +#ifndef _NO_SAL_2_H_ +#define _NO_SAL_2_H_ + +#undef _When_ +#define _When_(c,a) +#undef _At_ +#define _At_(t,a) +#undef _At_buffer_ +#define _At_buffer_(t,i,c,a) +#undef _Group_ +#define _Group_(a) +#undef _Pre_ +#define _Pre_ +#undef _Post_ +#define _Post_ +#undef _Deref_ +#define _Deref_ +#undef _Null_ +#define _Null_ +#undef _Notnull_ +#define _Notnull_ +#undef _Maybenull_ +#define _Maybenull_ +#undef _Const_ +#define _Const_ +#undef _Check_return_ +#define _Check_return_ +#undef _Must_inspect_result_ +#define _Must_inspect_result_ +#undef _Pre_satisfies_ +#define _Pre_satisfies_(e) +#undef _Post_satisfies_ +#define _Post_satisfies_(e) +#undef _Writable_elements_ +#define _Writable_elements_(s) +#undef _Writable_bytes_ +#define _Writable_bytes_(s) +#undef _Readable_elements_ +#define _Readable_elements_(s) +#undef _Readable_bytes_ +#define _Readable_bytes_(s) +#undef _Null_terminated_ +#define _Null_terminated_ +#undef _NullNull_terminated_ +#define _NullNull_terminated_ +#undef _Valid_ +#define _Valid_ +#undef _Notvalid_ +#define _Notvalid_ +#undef _Success_ +#define _Success_(c) +#undef _Return_type_success_ +#define _Return_type_success_(c) +#undef _On_failure_ +#define _On_failure_(a) +#undef _Always_ +#define _Always_(a) +#undef _Use_decl_annotations_ +#define _Use_decl_annotations_ +#undef _Pre_defensive_ +#define _Pre_defensive_ +#undef _Post_defensive_ +#define _Post_defensive_ +#undef _Pre_unknown_ +#define _Pre_unknown_ +#undef _Acquires_lock_ +#define _Acquires_lock_(e) +#undef _Releases_lock_ +#define _Releases_lock_(e) +#undef _Requires_lock_held_ +#define _Requires_lock_held_(e) +#undef _Requires_lock_not_held_ +#define _Requires_lock_not_held_(e) +#undef _Requires_no_locks_held_ +#define _Requires_no_locks_held_ +#undef _Guarded_by_ +#define _Guarded_by_(e) +#undef _Write_guarded_by_ +#define _Write_guarded_by_(e) +#undef _Interlocked_ +#define _Interlocked_ +#undef _Post_same_lock_ +#define _Post_same_lock_(e1,e2) +#undef _Benign_race_begin_ +#define _Benign_race_begin_ +#undef _Benign_race_end_ +#define _Benign_race_end_ +#undef _No_competing_thread_ +#define _No_competing_thread_ +#undef _No_competing_thread_begin_ +#define _No_competing_thread_begin_ +#undef _No_competing_thread_end_ +#define _No_competing_thread_end_ +#undef _Acquires_shared_lock_ +#define _Acquires_shared_lock_(e) +#undef _Releases_shared_lock_ +#define _Releases_shared_lock_(e) +#undef _Requires_shared_lock_held_ +#define _Requires_shared_lock_held_(e) +#undef _Acquires_exclusive_lock_ +#define _Acquires_exclusive_lock_(e) +#undef _Releases_exclusive_lock_ +#define _Releases_exclusive_lock_(e) +#undef _Requires_exclusive_lock_held_ +#define _Requires_exclusive_lock_held_(e) +#undef _Has_lock_kind_ +#define _Has_lock_kind_(n) +#undef _Create_lock_level_ +#define _Create_lock_level_(n) +#undef _Has_lock_level_ +#define _Has_lock_level_(n) +#undef _Lock_level_order_ +#define _Lock_level_order_(n1,n2) +#undef _Analysis_assume_lock_acquired_ +#define _Analysis_assume_lock_acquired_(e) +#undef _Analysis_assume_lock_released_ +#define _Analysis_assume_lock_released_(e) +#undef _Analysis_assume_lock_held_ +#define _Analysis_assume_lock_held_(e) +#undef _Analysis_assume_lock_not_held_ +#define _Analysis_assume_lock_not_held_(e) +#undef _Analysis_assume_same_lock_ +#define _Analysis_assume_same_lock_(e) +#undef _In_ +#define _In_ +#undef _Out_ +#define _Out_ +#undef _Inout_ +#define _Inout_ +#undef _In_z_ +#define _In_z_ +#undef _Inout_z_ +#define _Inout_z_ +#undef _In_reads_ +#define _In_reads_(s) +#undef _In_reads_bytes_ +#define _In_reads_bytes_(s) +#undef _In_reads_z_ +#define _In_reads_z_(s) +#undef _In_reads_or_z_ +#define _In_reads_or_z_(s) +#undef _Out_writes_ +#define _Out_writes_(s) +#undef _Out_writes_bytes_ +#define _Out_writes_bytes_(s) +#undef _Out_writes_z_ +#define _Out_writes_z_(s) +#undef _Inout_updates_ +#define _Inout_updates_(s) +#undef _Inout_updates_bytes_ +#define _Inout_updates_bytes_(s) +#undef _Inout_updates_z_ +#define _Inout_updates_z_(s) +#undef _Out_writes_to_ +#define _Out_writes_to_(s,c) +#undef _Out_writes_bytes_to_ +#define _Out_writes_bytes_to_(s,c) +#undef _Out_writes_all_ +#define _Out_writes_all_(s) +#undef _Out_writes_bytes_all_ +#define _Out_writes_bytes_all_(s) +#undef _Inout_updates_to_ +#define _Inout_updates_to_(s,c) +#undef _Inout_updates_bytes_to_ +#define _Inout_updates_bytes_to_(s,c) +#undef _Inout_updates_all_ +#define _Inout_updates_all_(s) +#undef _Inout_updates_bytes_all_ +#define _Inout_updates_bytes_all_(s) +#undef _In_reads_to_ptr_ +#define _In_reads_to_ptr_(p) +#undef _In_reads_to_ptr_z_ +#define _In_reads_to_ptr_z_(p) +#undef _Out_writes_to_ptr_ +#define _Out_writes_to_ptr_(p) +#undef _Out_writes_to_ptr_z_ +#define _Out_writes_to_ptr_z_(p) +#undef _In_opt_ +#define _In_opt_ +#undef _Out_opt_ +#define _Out_opt_ +#undef _Inout_opt_ +#define _Inout_opt_ +#undef _In_opt_z_ +#define _In_opt_z_ +#undef _Inout_opt_z_ +#define _Inout_opt_z_ +#undef _In_reads_opt_ +#define _In_reads_opt_(s) +#undef _In_reads_bytes_opt_ +#define _In_reads_bytes_opt_(s) +#undef _Out_writes_opt_ +#define _Out_writes_opt_(s) +#undef _Out_writes_bytes_opt_ +#define _Out_writes_bytes_opt_(s) +#undef _Out_writes_opt_z_ +#define _Out_writes_opt_z_(s) +#undef _Inout_updates_opt_ +#define _Inout_updates_opt_(s) +#undef _Inout_updates_bytes_opt_ +#define _Inout_updates_bytes_opt_(s) +#undef _Inout_updates_opt_z_ +#define _Inout_updates_opt_z_(s) +#undef _Out_writes_to_opt_ +#define _Out_writes_to_opt_(s,c) +#undef _Out_writes_bytes_to_opt_ +#define _Out_writes_bytes_to_opt_(s,c) +#undef _Out_writes_all_opt_ +#define _Out_writes_all_opt_(s) +#undef _Out_writes_bytes_all_opt_ +#define _Out_writes_bytes_all_opt_(s) +#undef _Inout_updates_to_opt_ +#define _Inout_updates_to_opt_(s,c) +#undef _Inout_updates_bytes_to_opt_ +#define _Inout_updates_bytes_to_opt_(s,c) +#undef _Inout_updates_all_opt_ +#define _Inout_updates_all_opt_(s) +#undef _Inout_updates_bytes_all_opt_ +#define _Inout_updates_bytes_all_opt_(s) +#undef _In_reads_to_ptr_opt_ +#define _In_reads_to_ptr_opt_(p) +#undef _In_reads_to_ptr_opt_z_ +#define _In_reads_to_ptr_opt_z_(p) +#undef _Out_writes_to_ptr_opt_ +#define _Out_writes_to_ptr_opt_(p) +#undef _Out_writes_to_ptr_opt_z_ +#define _Out_writes_to_ptr_opt_z_(p) +#undef _Outptr_ +#define _Outptr_ +#undef _Outptr_opt_ +#define _Outptr_opt_ +#undef _Outptr_result_maybenull_ +#define _Outptr_result_maybenull_ +#undef _Outptr_opt_result_maybenull_ +#define _Outptr_opt_result_maybenull_ +#undef _Outptr_z_ +#define _Outptr_z_ +#undef _Outptr_opt_z_ +#define _Outptr_opt_z_ +#undef _Outptr_result_maybenull_z_ +#define _Outptr_result_maybenull_z_ +#undef _Outptr_opt_result_maybenull_z_ +#define _Outptr_opt_result_maybenull_z_ +#undef _COM_Outptr_ +#define _COM_Outptr_ +#undef _COM_Outptr_opt_ +#define _COM_Outptr_opt_ +#undef _COM_Outptr_result_maybenull_ +#define _COM_Outptr_result_maybenull_ +#undef _COM_Outptr_opt_result_maybenull_ +#define _COM_Outptr_opt_result_maybenull_ +#undef _Outptr_result_buffer_ +#define _Outptr_result_buffer_(s) +#undef _Outptr_result_bytebuffer_ +#define _Outptr_result_bytebuffer_(s) +#undef _Outptr_opt_result_buffer_ +#define _Outptr_opt_result_buffer_(s) +#undef _Outptr_opt_result_bytebuffer_ +#define _Outptr_opt_result_bytebuffer_(s) +#undef _Outptr_result_buffer_to_ +#define _Outptr_result_buffer_to_(s,c) +#undef _Outptr_result_bytebuffer_to_ +#define _Outptr_result_bytebuffer_to_(s,c) +#undef _Outptr_opt_result_buffer_to_ +#define _Outptr_opt_result_buffer_to_(s,c) +#undef _Outptr_opt_result_bytebuffer_to_ +#define _Outptr_opt_result_bytebuffer_to_(s,c) +#undef _Ret_ +#define _Ret_ +#undef _Ret_valid_ +#define _Ret_valid_ +#undef _Ret_z_ +#define _Ret_z_ +#undef _Ret_writes_ +#define _Ret_writes_(s) +#undef _Ret_writes_bytes_ +#define _Ret_writes_bytes_(s) +#undef _Ret_writes_z_ +#define _Ret_writes_z_(s) +#undef _Ret_writes_to_ +#define _Ret_writes_to_(s,c) +#undef _Ret_writes_bytes_to_ +#define _Ret_writes_bytes_to_(s,c) +#undef _Ret_writes_maybenull_ +#define _Ret_writes_maybenull_(s) +#undef _Ret_writes_bytes_maybenull_ +#define _Ret_writes_bytes_maybenull_(s) +#undef _Ret_writes_to_maybenull_ +#define _Ret_writes_to_maybenull_(s,c) +#undef _Ret_writes_bytes_to_maybenull_ +#define _Ret_writes_bytes_to_maybenull_(s,c) +#undef _Ret_writes_maybenull_z_ +#define _Ret_writes_maybenull_z_(s) +#undef _Ret_maybenull_ +#define _Ret_maybenull_ +#undef _Ret_maybenull_z_ +#define _Ret_maybenull_z_ +#undef _Field_size_ +#define _Field_size_(s) +#undef _Field_size_opt_ +#define _Field_size_opt_(s) +#undef _Field_size_bytes_ +#define _Field_size_bytes_(s) +#undef _Field_size_bytes_opt_ +#define _Field_size_bytes_opt_(s) +#undef _Field_size_part_ +#define _Field_size_part_(s,c) +#undef _Field_size_part_opt_ +#define _Field_size_part_opt_(s,c) +#undef _Field_size_bytes_part_ +#define _Field_size_bytes_part_(s,c) +#undef _Field_size_bytes_part_opt_ +#define _Field_size_bytes_part_opt_(s,c) +#undef _Field_size_full_ +#define _Field_size_full_(s) +#undef _Field_size_full_opt_ +#define _Field_size_full_opt_(s) +#undef _Field_size_bytes_full_ +#define _Field_size_bytes_full_(s) +#undef _Field_size_bytes_full_opt_ +#define _Field_size_bytes_full_opt_(s) +#undef _Printf_format_string_ +#define _Printf_format_string_ +#undef _Scanf_format_string_ +#define _Scanf_format_string_ +#undef _Scanf_s_format_string_ +#define _Scanf_s_format_string_ +#undef _Printf_format_string_params_ +#define _Printf_format_string_params_(x) +#undef _Scanf_format_string_params_ +#define _Scanf_format_string_params_(x) +#undef _Scanf_s_format_string_params_ +#define _Scanf_s_format_string_params_(x) +#undef _In_range_ +#define _In_range_(l,h) +#undef _Out_range_ +#define _Out_range_(l,h) +#undef _Ret_range_ +#define _Ret_range_(l,h) +#undef _Deref_in_range_ +#define _Deref_in_range_(l,h) +#undef _Deref_out_range_ +#define _Deref_out_range_(l,h) +#undef _Deref_inout_range_ +#define _Deref_inout_range_(l,h) +#undef _Field_range_ +#define _Field_range_(l,h) +#undef _Pre_equal_to_ +#define _Pre_equal_to_(e) +#undef _Post_equal_to_ +#define _Post_equal_to_(e) +#undef _Struct_size_bytes_ +#define _Struct_size_bytes_(s) +#undef _Analysis_assume_ +#define _Analysis_assume_ +#undef _Analysis_mode_ +#define _Analysis_mode_(m) +#undef _Analysis_noreturn_ +#define _Analysis_noreturn_ +#undef _Raises_SEH_exception_ +#define _Raises_SEH_exception_ +#undef _Maybe_raises_SEH_exception_ +#define _Maybe_raises_SEH_exception_ +#undef _Function_class_ +#define _Function_class_(n) +#undef _Literal_ +#define _Literal_ +#undef _Notliteral_ +#define _Notliteral_ +#undef _Enum_is_bitflag_ +#define _Enum_is_bitflag_ +#undef _Strict_type_match_ +#define _Strict_type_match_ +#undef _Points_to_data_ +#define _Points_to_data_ +#undef _Interlocked_operand_ +#define _Interlocked_operand_ +#undef _IRQL_raises_ +#define _IRQL_raises_(i) +#undef _IRQL_requires_ +#define _IRQL_requires_(i) +#undef _IRQL_requires_max_ +#define _IRQL_requires_max_(i) +#undef _IRQL_requires_min_ +#define _IRQL_requires_min_(i) +#undef _IRQL_saves_ +#define _IRQL_saves_ +#undef _IRQL_saves_global_ +#define _IRQL_saves_global_(k,s) +#undef _IRQL_restores_ +#define _IRQL_restores_ +#undef _IRQL_restores_global_ +#define _IRQL_restores_global_(k,s) +#undef _IRQL_always_function_min_ +#define _IRQL_always_function_min_(i) +#undef _IRQL_always_function_max_ +#define _IRQL_always_function_max_(i) +#undef _IRQL_requires_same_ +#define _IRQL_requires_same_ +#undef _IRQL_uses_cancel_ +#define _IRQL_uses_cancel_ +#undef _IRQL_is_cancel_ +#define _IRQL_is_cancel_ +#undef _Kernel_float_saved_ +#define _Kernel_float_saved_ +#undef _Kernel_float_restored_ +#define _Kernel_float_restored_ +#undef _Kernel_float_used_ +#define _Kernel_float_used_ +#undef _Kernel_acquires_resource_ +#define _Kernel_acquires_resource_(k) +#undef _Kernel_releases_resource_ +#define _Kernel_releases_resource_(k) +#undef _Kernel_requires_resource_held_ +#define _Kernel_requires_resource_held_(k) +#undef _Kernel_requires_resource_not_held_ +#define _Kernel_requires_resource_not_held_(k) +#undef _Kernel_clear_do_init_ +#define _Kernel_clear_do_init_(yn) +#undef _Kernel_IoGetDmaAdapter_ +#define _Kernel_IoGetDmaAdapter_ +#undef _Outref_ +#define _Outref_ +#undef _Outref_result_maybenull_ +#define _Outref_result_maybenull_ +#undef _Outref_result_buffer_ +#define _Outref_result_buffer_(s) +#undef _Outref_result_bytebuffer_ +#define _Outref_result_bytebuffer_(s) +#undef _Outref_result_buffer_to_ +#define _Outref_result_buffer_to_(s,c) +#undef _Outref_result_bytebuffer_to_ +#define _Outref_result_bytebuffer_to_(s,c) +#undef _Outref_result_buffer_all_ +#define _Outref_result_buffer_all_(s) +#undef _Outref_result_bytebuffer_all_ +#define _Outref_result_bytebuffer_all_(s) +#undef _Outref_result_buffer_maybenull_ +#define _Outref_result_buffer_maybenull_(s) +#undef _Outref_result_bytebuffer_maybenull_ +#define _Outref_result_bytebuffer_maybenull_(s) +#undef _Outref_result_buffer_to_maybenull_ +#define _Outref_result_buffer_to_maybenull_(s,c) +#undef _Outref_result_bytebuffer_to_maybenull_ +#define _Outref_result_bytebuffer_to_maybenull_(s,c) +#undef _Outref_result_buffer_all_maybenull_ +#define _Outref_result_buffer_all_maybenull_(s) +#undef _Outref_result_bytebuffer_all_maybenull_ +#define _Outref_result_bytebuffer_all_maybenull_(s) +#undef _In_defensive_ +#define _In_defensive_(a) +#undef _Out_defensive_ +#define _Out_defensive_(a) +#undef _Inout_defensive_ +#define _Inout_defensive_(a) +#undef _Outptr_result_nullonfailure_ +#define _Outptr_result_nullonfailure_ +#undef _Outptr_opt_result_nullonfailure_ +#define _Outptr_opt_result_nullonfailure_ +#undef _Outref_result_nullonfailure_ +#define _Outref_result_nullonfailure_ +#undef _Result_nullonfailure_ +#define _Result_nullonfailure_ +#undef _Result_zeroonfailure_ +#define _Result_zeroonfailure_ +#undef _Acquires_nonreentrant_lock_ +#define _Acquires_nonreentrant_lock_(e) +#undef _Releases_nonreentrant_lock_ +#define _Releases_nonreentrant_lock_(e) +#undef _Reserved_ +#define _Reserved_ _Pre_equal_to_(0) _Pre_ _Null_ +#undef _Pre_z_ +#define _Pre_z_ _Pre_ _Null_terminated_ +#undef _Post_z_ +#define _Post_z_ _Post_ _Null_terminated_ +#undef _Prepost_z_ +#define _Prepost_z_ _Pre_z_ _Post_z_ +#undef _Pre_null_ +#define _Pre_null_ _Pre_ _Null_ +#undef _Pre_maybenull_ +#define _Pre_maybenull_ _Pre_ _Maybenull_ +#undef _Pre_notnull_ +#define _Pre_notnull_ _Pre_ _Notnull_ +#undef _Pre_valid_ +#define _Pre_valid_ _Pre_notnull_ _Pre_ _Valid_ +#undef _Pre_opt_valid_ +#define _Pre_opt_valid_ _Pre_maybenull_ _Pre_ _Valid_ +#undef _Post_valid_ +#define _Post_valid_ _Post_ _Valid_ +#undef _Post_invalid_ +#define _Post_invalid_ _Post_ _Deref_ _Notvalid_ +#undef _Post_ptr_invalid_ +#define _Post_ptr_invalid_ _Post_ _Notvalid_ +#undef _Pre_readable_size_ +#define _Pre_readable_size_(s) _Pre_ _Readable_elements_(s) _Pre_ _Valid_ +#undef _Pre_writable_size_ +#define _Pre_writable_size_(s) _Pre_ _Writable_elements_(s) +#undef _Pre_readable_byte_size_ +#define _Pre_readable_byte_size_(s) _Pre_ _Readable_bytes_(s) _Pre_ _Valid_ +#undef _Pre_writable_byte_size_ +#define _Pre_writable_byte_size_(s) _Pre_ _Writable_bytes_(s) +#undef _Post_readable_size_ +#define _Post_readable_size_(s) _Post_ _Readable_elements_(s) _Post_ _Valid_ +#undef _Post_writable_size_ +#define _Post_writable_size_(s) _Post_ _Writable_elements_(s) +#undef _Post_readable_byte_size_ +#define _Post_readable_byte_size_(s) _Post_ _Readable_bytes_(s) _Post_ _Valid_ +#undef _Post_writable_byte_size_ +#define _Post_writable_byte_size_(s) _Post_ _Writable_bytes_(s) + +#endif /* _NO_SAL_2_H_ */ diff --git a/src/coreclr/src/nativeaot/Runtime/unix/poppack.h b/src/coreclr/src/nativeaot/Runtime/unix/poppack.h new file mode 100644 index 0000000000000..b5c2319b54171 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/unix/poppack.h @@ -0,0 +1,39 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// + +// +// =========================================================================== +// File: poppack.h +// +// =========================================================================== +/* +Abstract: + + This file turns packing of structures off. (That is, it enables + automatic alignment of structure fields.) An include file is needed + because various compilers do this in different ways. + + poppack.h is the complement to pshpack?.h. An inclusion of poppack.h + MUST ALWAYS be preceded by an inclusion of one of pshpack?.h, in one-to-one + correspondence. + + For Microsoft compatible compilers, this file uses the pop option + to the pack pragma so that it can restore the previous saved by the + pshpack?.h include file. + +*/ + +#if ! (defined(lint) || defined(RC_INVOKED)) +#if ( _MSC_VER >= 800 && !defined(_M_I86)) || defined(_PUSHPOP_SUPPORTED) +#pragma warning(disable:4103) +#if !(defined( MIDL_PASS )) || defined( __midl ) +#pragma pack(pop) +#else +#pragma pack() +#endif +#else +#pragma pack() +#endif +#endif // ! (defined(lint) || defined(RC_INVOKED)) + diff --git a/src/coreclr/src/nativeaot/Runtime/unix/pshpack1.h b/src/coreclr/src/nativeaot/Runtime/unix/pshpack1.h new file mode 100644 index 0000000000000..5f1e59e285db8 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/unix/pshpack1.h @@ -0,0 +1,38 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// + +// +// =========================================================================== +// File: pshpack1.h +// +// =========================================================================== + +/*++ + +Abstract: + + This file turns 1 byte packing of structures on. (That is, it disables + automatic alignment of structure fields.) An include file is needed + because various compilers do this in different ways. For Microsoft + compatible compilers, this files uses the push option to the pack pragma + so that the poppack.h include file can restore the previous packing + reliably. + + The file poppack.h is the complement to this file. + +--*/ + +#if ! (defined(lint) || defined(RC_INVOKED)) +#if ( _MSC_VER >= 800 && !defined(_M_I86)) || defined(_PUSHPOP_SUPPORTED) +#pragma warning(disable:4103) +#if !(defined( MIDL_PASS )) || defined( __midl ) +#pragma pack(push,1) +#else +#pragma pack(1) +#endif +#else +#pragma pack(1) +#endif +#endif // ! (defined(lint) || defined(RC_INVOKED)) + diff --git a/src/coreclr/src/nativeaot/Runtime/unix/pshpack4.h b/src/coreclr/src/nativeaot/Runtime/unix/pshpack4.h new file mode 100644 index 0000000000000..7d4de930c2dff --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/unix/pshpack4.h @@ -0,0 +1,38 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// + +// +// =========================================================================== +// File: pshpack4.h +// +// =========================================================================== + +/*++ + +Abstract: + + This file turns 4 byte packing of structures on. (That is, it disables + automatic alignment of structure fields.) An include file is needed + because various compilers do this in different ways. For Microsoft + compatible compilers, this files uses the push option to the pack pragma + so that the poppack.h include file can restore the previous packing + reliably. + + The file poppack.h is the complement to this file. + +--*/ + +#if ! (defined(lint) || defined(RC_INVOKED)) +#if ( _MSC_VER >= 800 && !defined(_M_I86)) || defined(_PUSHPOP_SUPPORTED) +#pragma warning(disable:4103) +#if !(defined( MIDL_PASS )) || defined( __midl ) +#pragma pack(push,4) +#else +#pragma pack(4) +#endif +#else +#pragma pack(4) +#endif +#endif // ! (defined(lint) || defined(RC_INVOKED)) + diff --git a/src/coreclr/src/nativeaot/Runtime/unix/sal.h b/src/coreclr/src/nativeaot/Runtime/unix/sal.h new file mode 100644 index 0000000000000..e651378912826 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/unix/sal.h @@ -0,0 +1,2957 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +/*** +*sal.h - markers for documenting the semantics of APIs +* + +* +*Purpose: +* sal.h provides a set of annotations to describe how a function uses its +* parameters - the assumptions it makes about them, and the guarantees it makes +* upon finishing. +****/ +#pragma once + +/*========================================================================== + + The comments in this file are intended to give basic understanding of + the usage of SAL, the Microsoft Source Code Annotation Language. + For more details, please see http://go.microsoft.com/fwlink/?LinkID=242134 + + The macros are defined in 3 layers, plus the structural set: + + _In_/_Out_/_Ret_ Layer: + ---------------------- + This layer provides the highest abstraction and its macros should be used + in most cases. These macros typically start with: + _In_ : input parameter to a function, unmodified by called function + _Out_ : output parameter, written to by called function, pointed-to + location not expected to be initialized prior to call + _Outptr_ : like _Out_ when returned variable is a pointer type + (so param is pointer-to-pointer type). Called function + provides/allocated space. + _Outref_ : like _Outptr_, except param is reference-to-pointer type. + _Inout_ : inout parameter, read from and potentially modified by + called function. + _Ret_ : for return values + _Field_ : class/struct field invariants + For common usage, this class of SAL provides the most concise annotations. + Note that _In_/_Out_/_Inout_/_Outptr_ annotations are designed to be used + with a parameter target. Using them with _At_ to specify non-parameter + targets may yield unexpected results. + + This layer also includes a number of other properties that can be specified + to extend the ability of code analysis, most notably: + -- Designating parameters as format strings for printf/scanf/scanf_s + -- Requesting stricter type checking for C enum parameters + + _Pre_/_Post_ Layer: + ------------------ + The macros of this layer only should be used when there is no suitable macro + in the _In_/_Out_ layer. Its macros start with _Pre_ or _Post_. + This layer provides the most flexibility for annotations. + + Implementation Abstraction Layer: + -------------------------------- + Macros from this layer should never be used directly. The layer only exists + to hide the implementation of the annotation macros. + + Structural Layer: + ---------------- + These annotations, like _At_ and _When_, are used with annotations from + any of the other layers as modifiers, indicating exactly when and where + the annotations apply. + + + Common syntactic conventions: + ---------------------------- + + Usage: + ----- + _In_, _Out_, _Inout_, _Pre_, _Post_, are for formal parameters. + _Ret_, _Deref_ret_ must be used for return values. + + Nullness: + -------- + If the parameter can be NULL as a precondition to the function, the + annotation contains _opt. If the macro does not contain '_opt' the + parameter cannot be NULL. + + If an out/inout parameter returns a null pointer as a postcondition, this is + indicated by _Ret_maybenull_ or _result_maybenull_. If the macro is not + of this form, then the result will not be NULL as a postcondition. + _Outptr_ - output value is not NULL + _Outptr_result_maybenull_ - output value might be NULL + + String Type: + ----------- + _z: NullTerminated string + for _In_ parameters the buffer must have the specified stringtype before the call + for _Out_ parameters the buffer must have the specified stringtype after the call + for _Inout_ parameters both conditions apply + + Extent Syntax: + ------------- + Buffer sizes are expressed as element counts, unless the macro explicitly + contains _byte_ or _bytes_. Some annotations specify two buffer sizes, in + which case the second is used to indicate how much of the buffer is valid + as a postcondition. This table outlines the precondition buffer allocation + size, precondition number of valid elements, postcondition allocation size, + and postcondition number of valid elements for representative buffer size + annotations: + Pre | Pre | Post | Post + alloc | valid | alloc | valid + Annotation elems | elems | elems | elems + ---------- ------------------------------------ + _In_reads_(s) s | s | s | s + _Inout_updates_(s) s | s | s | s + _Inout_updates_to_(s,c) s | s | s | c + _Out_writes_(s) s | 0 | s | s + _Out_writes_to_(s,c) s | 0 | s | c + _Outptr_result_buffer_(s) ? | ? | s | s + _Outptr_result_buffer_to_(s,c) ? | ? | s | c + + For the _Outptr_ annotations, the buffer in question is at one level of + dereference. The called function is responsible for supplying the buffer. + + Success and failure: + ------------------- + The SAL concept of success allows functions to define expressions that can + be tested by the caller, which if it evaluates to non-zero, indicates the + function succeeded, which means that its postconditions are guaranteed to + hold. Otherwise, if the expression evaluates to zero, the function is + considered to have failed, and the postconditions are not guaranteed. + + The success criteria can be specified with the _Success_(expr) annotation: + _Success_(return != FALSE) BOOL + PathCanonicalizeA(_Out_writes_(MAX_PATH) LPSTR pszBuf, LPCSTR pszPath) : + pszBuf is only guaranteed to be NULL-terminated when TRUE is returned, + and FALSE indiates failure. In common practice, callers check for zero + vs. non-zero returns, so it is preferable to express the success + criteria in terms of zero/non-zero, not checked for exactly TRUE. + + Functions can specify that some postconditions will still hold, even when + the function fails, using _On_failure_(anno-list), or postconditions that + hold regardless of success or failure using _Always_(anno-list). + + The annotation _Return_type_success_(expr) may be used with a typedef to + give a default _Success_ criteria to all functions returning that type. + This is the case for common Windows API status types, including + HRESULT and NTSTATUS. This may be overridden on a per-function basis by + specifying a _Success_ annotation locally. + +============================================================================*/ + +#define __ATTR_SAL + +#ifndef _SAL_VERSION /*IFSTRIP=IGN*/ +#define _SAL_VERSION 20 +#endif + +#ifdef _PREFAST_ // [ + +// choose attribute or __declspec implementation +#ifndef _USE_DECLSPECS_FOR_SAL // [ +#define _USE_DECLSPECS_FOR_SAL 1 +#endif // ] + +#if _USE_DECLSPECS_FOR_SAL // [ +#undef _USE_ATTRIBUTES_FOR_SAL +#define _USE_ATTRIBUTES_FOR_SAL 0 +#elif !defined(_USE_ATTRIBUTES_FOR_SAL) // ][ +#if _MSC_VER >= 1400 /*IFSTRIP=IGN*/ // [ +#define _USE_ATTRIBUTES_FOR_SAL 1 +#else // ][ +#define _USE_ATTRIBUTES_FOR_SAL 0 +#endif // ] +#endif // ] + + +#if !_USE_DECLSPECS_FOR_SAL // [ +#if !_USE_ATTRIBUTES_FOR_SAL // [ +#if _MSC_VER >= 1400 /*IFSTRIP=IGN*/ // [ +#undef _USE_ATTRIBUTES_FOR_SAL +#define _USE_ATTRIBUTES_FOR_SAL 1 +#else // ][ +#undef _USE_DECLSPECS_FOR_SAL +#define _USE_DECLSPECS_FOR_SAL 1 +#endif // ] +#endif // ] +#endif // ] + +#else + +// Disable expansion of SAL macros in non-Prefast mode to +// improve compiler throughput. +#ifndef _USE_DECLSPECS_FOR_SAL // [ +#define _USE_DECLSPECS_FOR_SAL 0 +#endif // ] +#ifndef _USE_ATTRIBUTES_FOR_SAL // [ +#define _USE_ATTRIBUTES_FOR_SAL 0 +#endif // ] + +#endif // ] + +// safeguard for MIDL and RC builds +#if _USE_DECLSPECS_FOR_SAL && ( defined( MIDL_PASS ) || defined(__midl) || defined(RC_INVOKED) || !defined(_PREFAST_) ) /*IFSTRIP=IGN*/ // [ +#undef _USE_DECLSPECS_FOR_SAL +#define _USE_DECLSPECS_FOR_SAL 0 +#endif // ] +#if _USE_ATTRIBUTES_FOR_SAL && ( !defined(_MSC_EXTENSIONS) || defined( MIDL_PASS ) || defined(__midl) || defined(RC_INVOKED) ) /*IFSTRIP=IGN*/ // [ +#undef _USE_ATTRIBUTES_FOR_SAL +#define _USE_ATTRIBUTES_FOR_SAL 0 +#endif // ] + +#if _USE_DECLSPECS_FOR_SAL || _USE_ATTRIBUTES_FOR_SAL + +// Special enum type for Y/N/M +enum __SAL_YesNo {_SAL_notpresent, _SAL_no, _SAL_maybe, _SAL_yes, _SAL_default}; + +#endif + +#if defined(BUILD_WINDOWS) && !_USE_ATTRIBUTES_FOR_SAL /*IFSTRIP=IGN*/ +#define _SAL1_Source_(Name, args, annotes) _SA_annotes3(SAL_name, #Name, "", "1") _GrouP_(annotes _SAL_nop_impl_) +#define _SAL1_1_Source_(Name, args, annotes) _SA_annotes3(SAL_name, #Name, "", "1.1") _GrouP_(annotes _SAL_nop_impl_) +#define _SAL1_2_Source_(Name, args, annotes) _SA_annotes3(SAL_name, #Name, "", "1.2") _GrouP_(annotes _SAL_nop_impl_) +#define _SAL2_Source_(Name, args, annotes) _SA_annotes3(SAL_name, #Name, "", "2") _GrouP_(annotes _SAL_nop_impl_) +#else +#define _SAL1_Source_(Name, args, annotes) _SA_annotes3(SAL_name, #Name, "", "1") _Group_(annotes _SAL_nop_impl_) +#define _SAL1_1_Source_(Name, args, annotes) _SA_annotes3(SAL_name, #Name, "", "1.1") _Group_(annotes _SAL_nop_impl_) +#define _SAL1_2_Source_(Name, args, annotes) _SA_annotes3(SAL_name, #Name, "", "1.2") _Group_(annotes _SAL_nop_impl_) +#define _SAL2_Source_(Name, args, annotes) _SA_annotes3(SAL_name, #Name, "", "2") _Group_(annotes _SAL_nop_impl_) +#endif + +//============================================================================ +// Structural SAL: +// These annotations modify the use of other annotations. They may +// express the annotation target (i.e. what parameter/field the annotation +// applies to) or the condition under which the annotation is applicable. +//============================================================================ + +// _At_(target, annos) specifies that the annotations listed in 'annos' is to +// be applied to 'target' rather than to the identifier which is the current +// lexical target. +#define _At_(target, annos) _At_impl_(target, annos _SAL_nop_impl_) + +// _At_buffer_(target, iter, bound, annos) is similar to _At_, except that +// target names a buffer, and each annotation in annos is applied to each +// element of target up to bound, with the variable named in iter usable +// by the annotations to refer to relevant offsets within target. +#define _At_buffer_(target, iter, bound, annos) _At_buffer_impl_(target, iter, bound, annos _SAL_nop_impl_) + +// _When_(expr, annos) specifies that the annotations listed in 'annos' only +// apply when 'expr' evaluates to non-zero. +#define _When_(expr, annos) _When_impl_(expr, annos _SAL_nop_impl_) +#define _Group_(annos) _Group_impl_(annos _SAL_nop_impl_) +#define _GrouP_(annos) _GrouP_impl_(annos _SAL_nop_impl_) + +// indicates whether normal post conditions apply to a function +#define _Success_(expr) _SAL2_Source_(_Success_, (expr), _Success_impl_(expr)) + +// indicates whether post conditions apply to a function returning +// the type that this annotation is applied to +#define _Return_type_success_(expr) _SAL2_Source_(_Return_type_success_, (expr), _Success_impl_(expr)) + +// Establish postconditions that apply only if the function does not succeed +#define _On_failure_(annos) _On_failure_impl_(annos _SAL_nop_impl_) + +// Establish postconditions that apply in both success and failure cases. +// Only applicable with functions that have _Success_ or _Return_type_succss_. +#define _Always_(annos) _Always_impl_(annos _SAL_nop_impl_) + +// Usable on a function defintion. Asserts that a function declaration is +// in scope, and its annotations are to be used. There are no other annotations +// allowed on the function definition. +#define _Use_decl_annotations_ _Use_decl_anno_impl_ + +// _Notref_ may precede a _Deref_ or "real" annotation, and removes one +// level of dereference if the parameter is a C++ reference (&). If the +// net deref on a "real" annotation is negative, it is simply discarded. +#define _Notref_ _Notref_impl_ + +// Annotations for defensive programming styles. +#define _Pre_defensive_ _SA_annotes0(SAL_pre_defensive) +#define _Post_defensive_ _SA_annotes0(SAL_post_defensive) + +#define _In_defensive_(annotes) _Pre_defensive_ _Group_(annotes) +#define _Out_defensive_(annotes) _Post_defensive_ _Group_(annotes) +#define _Inout_defensive_(annotes) _Pre_defensive_ _Post_defensive_ _Group_(annotes) + +//============================================================================ +// _In_\_Out_ Layer: +//============================================================================ + +// Reserved pointer parameters, must always be NULL. +#define _Reserved_ _SAL2_Source_(_Reserved_, (), _Pre1_impl_(__null_impl)) + +// _Const_ allows specification that any namable memory location is considered +// readonly for a given call. +#define _Const_ _SAL2_Source_(_Const_, (), _Pre1_impl_(__readaccess_impl_notref)) + + +// Input parameters -------------------------- + +// _In_ - Annotations for parameters where data is passed into the function, but not modified. +// _In_ by itself can be used with non-pointer types (although it is redundant). + +// e.g. void SetPoint( _In_ const POINT* pPT ); +#define _In_ _SAL2_Source_(_In_, (), _Pre1_impl_(__notnull_impl_notref) _Pre_valid_impl_ _Deref_pre1_impl_(__readaccess_impl_notref)) +#define _In_opt_ _SAL2_Source_(_In_opt_, (), _Pre1_impl_(__maybenull_impl_notref) _Pre_valid_impl_ _Deref_pre_readonly_) + +// nullterminated 'in' parameters. +// e.g. void CopyStr( _In_z_ const char* szFrom, _Out_z_cap_(cchTo) char* szTo, size_t cchTo ); +#define _In_z_ _SAL2_Source_(_In_z_, (), _In_ _Pre1_impl_(__zterm_impl)) +#define _In_opt_z_ _SAL2_Source_(_In_opt_z_, (), _In_opt_ _Pre1_impl_(__zterm_impl)) + + +// 'input' buffers with given size + +#define _In_reads_(size) _SAL2_Source_(_In_reads_, (size), _Pre_count_(size) _Deref_pre_readonly_) +#define _In_reads_opt_(size) _SAL2_Source_(_In_reads_opt_, (size), _Pre_opt_count_(size) _Deref_pre_readonly_) +#define _In_reads_bytes_(size) _SAL2_Source_(_In_reads_bytes_, (size), _Pre_bytecount_(size) _Deref_pre_readonly_) +#define _In_reads_bytes_opt_(size) _SAL2_Source_(_In_reads_bytes_opt_, (size), _Pre_opt_bytecount_(size) _Deref_pre_readonly_) +#define _In_reads_z_(size) _SAL2_Source_(_In_reads_z_, (size), _In_reads_(size) _Pre_z_) +#define _In_reads_opt_z_(size) _SAL2_Source_(_In_reads_opt_z_, (size), _Pre_opt_count_(size) _Deref_pre_readonly_ _Pre_opt_z_) +#define _In_reads_or_z_(size) _SAL2_Source_(_In_reads_or_z_, (size), _In_ _When_(_String_length_(_Curr_) < (size), _Pre_z_) _When_(_String_length_(_Curr_) >= (size), _Pre1_impl_(__count_impl(size)))) +#define _In_reads_or_z_opt_(size) _SAL2_Source_(_In_reads_or_z_opt_, (size), _In_opt_ _When_(_String_length_(_Curr_) < (size), _Pre_z_) _When_(_String_length_(_Curr_) >= (size), _Pre1_impl_(__count_impl(size)))) + + +// 'input' buffers valid to the given end pointer + +#define _In_reads_to_ptr_(ptr) _SAL2_Source_(_In_reads_to_ptr_, (ptr), _Pre_ptrdiff_count_(ptr) _Deref_pre_readonly_) +#define _In_reads_to_ptr_opt_(ptr) _SAL2_Source_(_In_reads_to_ptr_opt_, (ptr), _Pre_opt_ptrdiff_count_(ptr) _Deref_pre_readonly_) +#define _In_reads_to_ptr_z_(ptr) _SAL2_Source_(_In_reads_to_ptr_z_, (ptr), _In_reads_to_ptr_(ptr) _Pre_z_) +#define _In_reads_to_ptr_opt_z_(ptr) _SAL2_Source_(_In_reads_to_ptr_opt_z_, (ptr), _Pre_opt_ptrdiff_count_(ptr) _Deref_pre_readonly_ _Pre_opt_z_) + + + +// Output parameters -------------------------- + +// _Out_ - Annotations for pointer or reference parameters where data passed back to the caller. +// These are mostly used where the pointer/reference is to a non-pointer type. +// _Outptr_/_Outref) (see below) are typically used to return pointers via parameters. + +// e.g. void GetPoint( _Out_ POINT* pPT ); +#define _Out_ _SAL2_Source_(_Out_, (), _Out_impl_) +#define _Out_opt_ _SAL2_Source_(_Out_opt_, (), _Out_opt_impl_) + +#define _Out_writes_(size) _SAL2_Source_(_Out_writes_, (size), _Pre_cap_(size) _Post_valid_impl_) +#define _Out_writes_opt_(size) _SAL2_Source_(_Out_writes_opt_, (size), _Pre_opt_cap_(size) _Post_valid_impl_) +#define _Out_writes_bytes_(size) _SAL2_Source_(_Out_writes_bytes_, (size), _Pre_bytecap_(size) _Post_valid_impl_) +#define _Out_writes_bytes_opt_(size) _SAL2_Source_(_Out_writes_bytes_opt_, (size), _Pre_opt_bytecap_(size) _Post_valid_impl_) +#define _Out_writes_z_(size) _SAL2_Source_(_Out_writes_z_, (size), _Pre_cap_(size) _Post_valid_impl_ _Post_z_) +#define _Out_writes_opt_z_(size) _SAL2_Source_(_Out_writes_opt_z_, (size), _Pre_opt_cap_(size) _Post_valid_impl_ _Post_z_) + +#define _Out_writes_to_(size,count) _SAL2_Source_(_Out_writes_to_, (size,count), _Pre_cap_(size) _Post_valid_impl_ _Post_count_(count)) +#define _Out_writes_to_opt_(size,count) _SAL2_Source_(_Out_writes_to_opt_, (size,count), _Pre_opt_cap_(size) _Post_valid_impl_ _Post_count_(count)) +#define _Out_writes_all_(size) _SAL2_Source_(_Out_writes_all_, (size), _Out_writes_to_(_Old_(size), _Old_(size))) +#define _Out_writes_all_opt_(size) _SAL2_Source_(_Out_writes_all_opt_, (size), _Out_writes_to_opt_(_Old_(size), _Old_(size))) + +#define _Out_writes_bytes_to_(size,count) _SAL2_Source_(_Out_writes_bytes_to_, (size,count), _Pre_bytecap_(size) _Post_valid_impl_ _Post_bytecount_(count)) +#define _Out_writes_bytes_to_opt_(size,count) _SAL2_Source_(_Out_writes_bytes_to_opt_, (size,count), _Pre_opt_bytecap_(size) _Post_valid_impl_ _Post_bytecount_(count)) +#define _Out_writes_bytes_all_(size) _SAL2_Source_(_Out_writes_bytes_all_, (size), _Out_writes_bytes_to_(_Old_(size), _Old_(size))) +#define _Out_writes_bytes_all_opt_(size) _SAL2_Source_(_Out_writes_bytes_all_opt_, (size), _Out_writes_bytes_to_opt_(_Old_(size), _Old_(size))) + +#define _Out_writes_to_ptr_(ptr) _SAL2_Source_(_Out_writes_to_ptr_, (ptr), _Pre_ptrdiff_cap_(ptr) _Post_valid_impl_) +#define _Out_writes_to_ptr_opt_(ptr) _SAL2_Source_(_Out_writes_to_ptr_opt_, (ptr), _Pre_opt_ptrdiff_cap_(ptr) _Post_valid_impl_) +#define _Out_writes_to_ptr_z_(ptr) _SAL2_Source_(_Out_writes_to_ptr_z_, (ptr), _Pre_ptrdiff_cap_(ptr) _Post_valid_impl_ Post_z_) +#define _Out_writes_to_ptr_opt_z_(ptr) _SAL2_Source_(_Out_writes_to_ptr_opt_z_, (ptr), _Pre_opt_ptrdiff_cap_(ptr) _Post_valid_impl_ Post_z_) + + +// Inout parameters ---------------------------- + +// _Inout_ - Annotations for pointer or reference parameters where data is passed in and +// potentially modified. +// void ModifyPoint( _Inout_ POINT* pPT ); +// void ModifyPointByRef( _Inout_ POINT& pPT ); + +#define _Inout_ _SAL2_Source_(_Inout_, (), _Prepost_valid_) +#define _Inout_opt_ _SAL2_Source_(_Inout_opt_, (), _Prepost_opt_valid_) + +// For modifying string buffers +// void toupper( _Inout_z_ char* sz ); +#define _Inout_z_ _SAL2_Source_(_Inout_z_, (), _Prepost_z_) +#define _Inout_opt_z_ _SAL2_Source_(_Inout_opt_z_, (), _Prepost_opt_z_) + +// For modifying buffers with explicit element size +#define _Inout_updates_(size) _SAL2_Source_(_Inout_updates_, (size), _Pre_cap_(size) _Pre_valid_impl_ _Post_valid_impl_) +#define _Inout_updates_opt_(size) _SAL2_Source_(_Inout_updates_opt_, (size), _Pre_opt_cap_(size) _Pre_valid_impl_ _Post_valid_impl_) +#define _Inout_updates_z_(size) _SAL2_Source_(_Inout_updates_z_, (size), _Pre_cap_(size) _Pre_valid_impl_ _Post_valid_impl_ _Pre1_impl_(__zterm_impl) _Post1_impl_(__zterm_impl)) +#define _Inout_updates_opt_z_(size) _SAL2_Source_(_Inout_updates_opt_z_, (size), _Pre_opt_cap_(size) _Pre_valid_impl_ _Post_valid_impl_ _Pre1_impl_(__zterm_impl) _Post1_impl_(__zterm_impl)) + +#define _Inout_updates_to_(size,count) _SAL2_Source_(_Inout_updates_to_, (size,count), _Out_writes_to_(size,count) _Pre_valid_impl_ _Pre1_impl_(__count_impl(count))) +#define _Inout_updates_to_opt_(size,count) _SAL2_Source_(_Inout_updates_to_opt_, (size,count), _Out_writes_to_opt_(size,count) _Pre_valid_impl_ _Pre1_impl_(__count_impl(count))) + +#define _Inout_updates_all_(size) _SAL2_Source_(_Inout_updates_all_, (size), _Inout_updates_to_(_Old_(size), _Old_(size))) +#define _Inout_updates_all_opt_(size) _SAL2_Source_(_Inout_updates_all_opt_, (size), _Inout_updates_to_opt_(_Old_(size), _Old_(size))) + +// For modifying buffers with explicit byte size +#define _Inout_updates_bytes_(size) _SAL2_Source_(_Inout_updates_bytes_, (size), _Pre_bytecap_(size) _Pre_valid_impl_ _Post_valid_impl_) +#define _Inout_updates_bytes_opt_(size) _SAL2_Source_(_Inout_updates_bytes_opt_, (size), _Pre_opt_bytecap_(size) _Pre_valid_impl_ _Post_valid_impl_) + +#define _Inout_updates_bytes_to_(size,count) _SAL2_Source_(_Inout_updates_bytes_to_, (size,count), _Out_writes_bytes_to_(size,count) _Pre_valid_impl_ _Pre1_impl_(__bytecount_impl(count))) +#define _Inout_updates_bytes_to_opt_(size,count) _SAL2_Source_(_Inout_updates_bytes_to_opt_, (size,count), _Out_writes_bytes_to_opt_(size,count) _Pre_valid_impl_ _Pre1_impl_(__bytecount_impl(count))) + +#define _Inout_updates_bytes_all_(size) _SAL2_Source_(_Inout_updates_bytes_all_, (size), _Inout_updates_bytes_to_(_Old_(size), _Old_(size))) +#define _Inout_updates_bytes_all_opt_(size) _SAL2_Source_(_Inout_updates_bytes_all_opt_, (size), _Inout_updates_bytes_to_opt_(_Old_(size), _Old_(size))) + + +// Pointer to pointer parameters ------------------------- + +// _Outptr_ - Annotations for output params returning pointers +// These describe parameters where the called function provides the buffer: +// HRESULT SHStrDupW(_In_ LPCWSTR psz, _Outptr_ LPWSTR *ppwsz); +// The caller passes the address of an LPWSTR variable as ppwsz, and SHStrDupW allocates +// and initializes memory and returns the pointer to the new LPWSTR in *ppwsz. +// +// _Outptr_opt_ - describes parameters that are allowed to be NULL. +// _Outptr_*_result_maybenull_ - describes parameters where the called function might return NULL to the caller. +// +// Example: +// void MyFunc(_Outptr_opt_ int **ppData1, _Outptr_result_maybenull_ int **ppData2); +// Callers: +// MyFunc(NULL, NULL); // error: parameter 2, ppData2, should not be NULL +// MyFunc(&pData1, &pData2); // ok: both non-NULL +// if (*pData1 == *pData2) ... // error: pData2 might be NULL after call + +#define _Outptr_ _SAL2_Source_(_Outptr_, (), _Out_impl_ _Deref_post2_impl_(__notnull_impl_notref, __count_impl(1))) +#define _Outptr_result_maybenull_ _SAL2_Source_(_Outptr_result_maybenull_, (), _Out_impl_ _Deref_post2_impl_(__maybenull_impl_notref, __count_impl(1))) +#define _Outptr_opt_ _SAL2_Source_(_Outptr_opt_, (), _Out_opt_impl_ _Deref_post2_impl_(__notnull_impl_notref, __count_impl(1))) +#define _Outptr_opt_result_maybenull_ _SAL2_Source_(_Outptr_opt_result_maybenull_, (), _Out_opt_impl_ _Deref_post2_impl_(__maybenull_impl_notref, __count_impl(1))) + +// Annotations for _Outptr_ parameters returning pointers to null terminated strings. + +#define _Outptr_result_z_ _SAL2_Source_(_Outptr_result_z_, (), _Out_impl_ _Deref_post_z_) +#define _Outptr_opt_result_z_ _SAL2_Source_(_Outptr_opt_result_z_, (), _Out_opt_impl_ _Deref_post_z_) +#define _Outptr_result_maybenull_z_ _SAL2_Source_(_Outptr_result_maybenull_z_, (), _Out_impl_ _Deref_post_opt_z_) +#define _Outptr_opt_result_maybenull_z_ _SAL2_Source_(_Outptr_opt_result_maybenull_z_, (), _Out_opt_impl_ _Deref_post_opt_z_) + +// Annotations for _Outptr_ parameters where the output pointer is set to NULL if the function fails. + +#define _Outptr_result_nullonfailure_ _SAL2_Source_(_Outptr_result_nullonfailure_, (), _Outptr_ _On_failure_(_Deref_post_null_)) +#define _Outptr_opt_result_nullonfailure_ _SAL2_Source_(_Outptr_opt_result_nullonfailure_, (), _Outptr_opt_ _On_failure_(_Deref_post_null_)) + +// Annotations for _Outptr_ parameters which return a pointer to a ref-counted COM object, +// following the COM convention of setting the output to NULL on failure. +// The current implementation is identical to _Outptr_result_nullonfailure_. +// For pointers to types that are not COM objects, _Outptr_result_nullonfailure_ is preferred. + +#define _COM_Outptr_ _SAL2_Source_(_COM_Outptr_, (), _Outptr_ _On_failure_(_Deref_post_null_)) +#define _COM_Outptr_result_maybenull_ _SAL2_Source_(_COM_Outptr_result_maybenull_, (), _Outptr_result_maybenull_ _On_failure_(_Deref_post_null_)) +#define _COM_Outptr_opt_ _SAL2_Source_(_COM_Outptr_opt_, (), _Outptr_opt_ _On_failure_(_Deref_post_null_)) +#define _COM_Outptr_opt_result_maybenull_ _SAL2_Source_(_COM_Outptr_opt_result_maybenull_, (), _Outptr_opt_result_maybenull_ _On_failure_(_Deref_post_null_)) + +// Annotations for _Outptr_ parameters returning a pointer to buffer with a specified number of elements/bytes + +#define _Outptr_result_buffer_(size) _SAL2_Source_(_Outptr_result_buffer_, (size), _Out_impl_ _Deref_post2_impl_(__notnull_impl_notref, __cap_impl(size))) +#define _Outptr_opt_result_buffer_(size) _SAL2_Source_(_Outptr_opt_result_buffer_, (size), _Out_opt_impl_ _Deref_post2_impl_(__notnull_impl_notref, __cap_impl(size))) +#define _Outptr_result_buffer_to_(size, count) _SAL2_Source_(_Outptr_result_buffer_to_, (size, count), _Out_impl_ _Deref_post3_impl_(__notnull_impl_notref, __cap_impl(size), __count_impl(count))) +#define _Outptr_opt_result_buffer_to_(size, count) _SAL2_Source_(_Outptr_opt_result_buffer_to_, (size, count), _Out_opt_impl_ _Deref_post3_impl_(__notnull_impl_notref, __cap_impl(size), __count_impl(count))) + +#define _Outptr_result_buffer_all_(size) _SAL2_Source_(_Outptr_result_buffer_all_, (size), _Out_impl_ _Deref_post2_impl_(__notnull_impl_notref, __count_impl(size))) +#define _Outptr_opt_result_buffer_all_(size) _SAL2_Source_(_Outptr_opt_result_buffer_all_, (size), _Out_opt_impl_ _Deref_post2_impl_(__notnull_impl_notref, __count_impl(size))) + +#define _Outptr_result_buffer_maybenull_(size) _SAL2_Source_(_Outptr_result_buffer_maybenull_, (size), _Out_impl_ _Deref_post2_impl_(__maybenull_impl_notref, __cap_impl(size))) +#define _Outptr_opt_result_buffer_maybenull_(size) _SAL2_Source_(_Outptr_opt_result_buffer_maybenull_, (size), _Out_opt_impl_ _Deref_post2_impl_(__maybenull_impl_notref, __cap_impl(size))) +#define _Outptr_result_buffer_to_maybenull_(size, count) _SAL2_Source_(_Outptr_result_buffer_to_maybenull_, (size, count), _Out_impl_ _Deref_post3_impl_(__maybenull_impl_notref, __cap_impl(size), __count_impl(count))) +#define _Outptr_opt_result_buffer_to_maybenull_(size, count) _SAL2_Source_(_Outptr_opt_result_buffer_to_maybenull_, (size, count), _Out_opt_impl_ _Deref_post3_impl_(__maybenull_impl_notref, __cap_impl(size), __count_impl(count))) + +#define _Outptr_result_buffer_all_maybenull_(size) _SAL2_Source_(_Outptr_result_buffer_all_maybenull_, (size), _Out_impl_ _Deref_post2_impl_(__maybenull_impl_notref, __count_impl(size))) +#define _Outptr_opt_result_buffer_all_maybenull_(size) _SAL2_Source_(_Outptr_opt_result_buffer_all_maybenull_, (size), _Out_opt_impl_ _Deref_post2_impl_(__maybenull_impl_notref, __count_impl(size))) + +#define _Outptr_result_bytebuffer_(size) _SAL2_Source_(_Outptr_result_bytebuffer_, (size), _Out_impl_ _Deref_post2_impl_(__notnull_impl_notref, __bytecap_impl(size))) +#define _Outptr_opt_result_bytebuffer_(size) _SAL2_Source_(_Outptr_opt_result_bytebuffer_, (size), _Out_opt_impl_ _Deref_post2_impl_(__notnull_impl_notref, __bytecap_impl(size))) +#define _Outptr_result_bytebuffer_to_(size, count) _SAL2_Source_(_Outptr_result_bytebuffer_to_, (size, count), _Out_impl_ _Deref_post3_impl_(__notnull_impl_notref, __bytecap_impl(size), __bytecount_impl(count))) +#define _Outptr_opt_result_bytebuffer_to_(size, count) _SAL2_Source_(_Outptr_opt_result_bytebuffer_to_, (size, count), _Out_opt_impl_ _Deref_post3_impl_(__notnull_impl_notref, __bytecap_impl(size), __bytecount_impl(count))) + +#define _Outptr_result_bytebuffer_all_(size) _SAL2_Source_(_Outptr_result_bytebuffer_all_, (size), _Out_impl_ _Deref_post2_impl_(__notnull_impl_notref, __bytecount_impl(size))) +#define _Outptr_opt_result_bytebuffer_all_(size) _SAL2_Source_(_Outptr_opt_result_bytebuffer_all_, (size), _Out_opt_impl_ _Deref_post2_impl_(__notnull_impl_notref, __bytecount_impl(size))) + +#define _Outptr_result_bytebuffer_maybenull_(size) _SAL2_Source_(_Outptr_result_bytebuffer_maybenull_, (size), _Out_impl_ _Deref_post2_impl_(__maybenull_impl_notref, __bytecap_impl(size))) +#define _Outptr_opt_result_bytebuffer_maybenull_(size) _SAL2_Source_(_Outptr_opt_result_bytebuffer_maybenull_, (size), _Out_opt_impl_ _Deref_post2_impl_(__maybenull_impl_notref, __bytecap_impl(size))) +#define _Outptr_result_bytebuffer_to_maybenull_(size, count) _SAL2_Source_(_Outptr_result_bytebuffer_to_maybenull_, (size, count), _Out_impl_ _Deref_post3_impl_(__maybenull_impl_notref, __bytecap_impl(size), __bytecount_impl(count))) +#define _Outptr_opt_result_bytebuffer_to_maybenull_(size, count) _SAL2_Source_(_Outptr_opt_result_bytebuffer_to_maybenull_, (size, count), _Out_opt_impl_ _Deref_post3_impl_(__maybenull_impl_notref, __bytecap_impl(size), __bytecount_impl(count))) + +#define _Outptr_result_bytebuffer_all_maybenull_(size) _SAL2_Source_(_Outptr_result_bytebuffer_all_maybenull_, (size), _Out_impl_ _Deref_post2_impl_(__maybenull_impl_notref, __bytecount_impl(size))) +#define _Outptr_opt_result_bytebuffer_all_maybenull_(size) _SAL2_Source_(_Outptr_opt_result_bytebuffer_all_maybenull_, (size), _Out_opt_impl_ _Deref_post2_impl_(__maybenull_impl_notref, __bytecount_impl(size))) + +// Annotations for output reference to pointer parameters. + +#define _Outref_ _SAL2_Source_(_Outref_, (), _Out_impl_ _Post_notnull_) +#define _Outref_result_maybenull_ _SAL2_Source_(_Outref_result_maybenull_, (), _Pre2_impl_(__notnull_impl_notref, __cap_c_one_notref_impl) _Post_maybenull_ _Post_valid_impl_) + +#define _Outref_result_buffer_(size) _SAL2_Source_(_Outref_result_buffer_, (size), _Outref_ _Post1_impl_(__cap_impl(size))) +#define _Outref_result_bytebuffer_(size) _SAL2_Source_(_Outref_result_bytebuffer_, (size), _Outref_ _Post1_impl_(__bytecap_impl(size))) +#define _Outref_result_buffer_to_(size, count) _SAL2_Source_(_Outref_result_buffer_to_, (size, count), _Outref_result_buffer_(size) _Post1_impl_(__count_impl(count))) +#define _Outref_result_bytebuffer_to_(size, count) _SAL2_Source_(_Outref_result_bytebuffer_to_, (size, count), _Outref_result_bytebuffer_(size) _Post1_impl_(__bytecount_impl(count))) +#define _Outref_result_buffer_all_(size) _SAL2_Source_(_Outref_result_buffer_all_, (size), _Outref_result_buffer_to_(size, _Old_(size))) +#define _Outref_result_bytebuffer_all_(size) _SAL2_Source_(_Outref_result_bytebuffer_all_, (size), _Outref_result_bytebuffer_to_(size, _Old_(size))) + +#define _Outref_result_buffer_maybenull_(size) _SAL2_Source_(_Outref_result_buffer_maybenull_, (size), _Outref_result_maybenull_ _Post1_impl_(__cap_impl(size))) +#define _Outref_result_bytebuffer_maybenull_(size) _SAL2_Source_(_Outref_result_bytebuffer_maybenull_, (size), _Outref_result_maybenull_ _Post1_impl_(__bytecap_impl(size))) +#define _Outref_result_buffer_to_maybenull_(size, count) _SAL2_Source_(_Outref_result_buffer_to_maybenull_, (size, count), _Outref_result_buffer_maybenull_(size) _Post1_impl_(__count_impl(count))) +#define _Outref_result_bytebuffer_to_maybenull_(size, count) _SAL2_Source_(_Outref_result_bytebuffer_to_maybenull_, (size, count), _Outref_result_bytebuffer_maybenull_(size) _Post1_impl_(__bytecount_impl(count))) +#define _Outref_result_buffer_all_maybenull_(size) _SAL2_Source_(_Outref_result_buffer_all_maybenull_, (size), _Outref_result_buffer_to_maybenull_(size, _Old_(size))) +#define _Outref_result_bytebuffer_all_maybenull_(size) _SAL2_Source_(_Outref_result_bytebuffer_all_maybenull_, (size), _Outref_result_bytebuffer_to_maybenull_(size, _Old_(size))) + +// Annotations for output reference to pointer parameters that guarantee +// that the pointer is set to NULL on failure. +#define _Outref_result_nullonfailure_ _SAL2_Source_(_Outref_result_nullonfailure_, (), _Outref_ _On_failure_(_Post_null_)) + +// Generic annotations to set output value of a by-pointer or by-reference parameter to null/zero on failure. +#define _Result_nullonfailure_ _SAL2_Source_(_Result_nullonfailure_, (), _On_failure_(_Notref_impl_ _Deref_impl_ _Post_null_)) +#define _Result_zeroonfailure_ _SAL2_Source_(_Result_zeroonfailure_, (), _On_failure_(_Notref_impl_ _Deref_impl_ _Out_range_(==, 0))) + + +// return values ------------------------------- + +// +// _Ret_ annotations +// +// describing conditions that hold for return values after the call + +// e.g. _Ret_z_ CString::operator const WCHAR*() const throw(); +#define _Ret_z_ _SAL2_Source_(_Ret_z_, (), _Ret2_impl_(__notnull_impl, __zterm_impl) _Ret_valid_impl_) +#define _Ret_maybenull_z_ _SAL2_Source_(_Ret_maybenull_z_, (), _Ret2_impl_(__maybenull_impl,__zterm_impl) _Ret_valid_impl_) + +// used with allocated but not yet initialized objects +#define _Ret_notnull_ _SAL2_Source_(_Ret_notnull_, (), _Ret1_impl_(__notnull_impl)) +#define _Ret_maybenull_ _SAL2_Source_(_Ret_maybenull_, (), _Ret1_impl_(__maybenull_impl)) +#define _Ret_null_ _SAL2_Source_(_Ret_null_, (), _Ret1_impl_(__null_impl)) + +// used with allocated and initialized objects +// returns single valid object +#define _Ret_valid_ _SAL2_Source_(_Ret_valid_, (), _Ret1_impl_(__notnull_impl_notref) _Ret_valid_impl_) + +// returns pointer to initialized buffer of specified size +#define _Ret_writes_(size) _SAL2_Source_(_Ret_writes_, (size), _Ret2_impl_(__notnull_impl, __count_impl(size)) _Ret_valid_impl_) +#define _Ret_writes_z_(size) _SAL2_Source_(_Ret_writes_z_, (size), _Ret3_impl_(__notnull_impl, __count_impl(size), __zterm_impl) _Ret_valid_impl_) +#define _Ret_writes_bytes_(size) _SAL2_Source_(_Ret_writes_bytes_, (size), _Ret2_impl_(__notnull_impl, __bytecount_impl(size)) _Ret_valid_impl_) +#define _Ret_writes_maybenull_(size) _SAL2_Source_(_Ret_writes_maybenull_, (size), _Ret2_impl_(__maybenull_impl,__count_impl(size)) _Ret_valid_impl_) +#define _Ret_writes_maybenull_z_(size) _SAL2_Source_(_Ret_writes_maybenull_z_, (size), _Ret3_impl_(__maybenull_impl,__count_impl(size),__zterm_impl) _Ret_valid_impl_) +#define _Ret_writes_bytes_maybenull_(size) _SAL2_Source_(_Ret_writes_bytes_maybenull_, (size), _Ret2_impl_(__maybenull_impl,__bytecount_impl(size)) _Ret_valid_impl_) + +// returns pointer to partially initialized buffer, with total size 'size' and initialized size 'count' +#define _Ret_writes_to_(size,count) _SAL2_Source_(_Ret_writes_to_, (size,count), _Ret3_impl_(__notnull_impl, __cap_impl(size), __count_impl(count)) _Ret_valid_impl_) +#define _Ret_writes_bytes_to_(size,count) _SAL2_Source_(_Ret_writes_bytes_to_, (size,count), _Ret3_impl_(__notnull_impl, __bytecap_impl(size), __bytecount_impl(count)) _Ret_valid_impl_) +#define _Ret_writes_to_maybenull_(size,count) _SAL2_Source_(_Ret_writes_to_maybenull_, (size,count), _Ret3_impl_(__maybenull_impl, __cap_impl(size), __count_impl(count)) _Ret_valid_impl_) +#define _Ret_writes_bytes_to_maybenull_(size,count) _SAL2_Source_(_Ret_writes_bytes_to_maybenull_, (size,count), _Ret3_impl_(__maybenull_impl, __bytecap_impl(size), __bytecount_impl(count)) _Ret_valid_impl_) + + +// Annotations for strict type checking +#define _Points_to_data_ _SAL2_Source_(_Points_to_data_, (), _Pre_ _Points_to_data_impl_) +#define _Literal_ _SAL2_Source_(_Literal_, (), _Pre_ _Literal_impl_) +#define _Notliteral_ _SAL2_Source_(_Notliteral_, (), _Pre_ _Notliteral_impl_) + +// Check the return value of a function e.g. _Check_return_ ErrorCode Foo(); +#define _Check_return_ _SAL2_Source_(_Check_return_, (), _Check_return_impl_) +#define _Must_inspect_result_ _SAL2_Source_(_Must_inspect_result_, (), _Must_inspect_impl_ _Check_return_impl_) + +// e.g. MyPrintF( _Printf_format_string_ const WCHAR* wzFormat, ... ); +#define _Printf_format_string_ _SAL2_Source_(_Printf_format_string_, (), _Printf_format_string_impl_) +#define _Scanf_format_string_ _SAL2_Source_(_Scanf_format_string_, (), _Scanf_format_string_impl_) +#define _Scanf_s_format_string_ _SAL2_Source_(_Scanf_s_format_string_, (), _Scanf_s_format_string_impl_) + +#define _Format_string_impl_(kind,where) _SA_annotes2(SAL_IsFormatString2, kind, where) +#define _Printf_format_string_params_(x) _SAL2_Source_(_Printf_format_string_params_, (x), _Format_string_impl_("printf", x)) +#define _Scanf_format_string_params_(x) _SAL2_Source_(_Scanf_format_string_params_, (x), _Format_string_impl_("scanf", x)) +#define _Scanf_s_format_string_params_(x) _SAL2_Source_(_Scanf_s_format_string_params_, (x), _Format_string_impl_("scanf_s", x)) + +// annotations to express value of integral or pointer parameter +#define _In_range_(lb,ub) _SAL2_Source_(_In_range_, (lb,ub), _In_range_impl_(lb,ub)) +#define _Out_range_(lb,ub) _SAL2_Source_(_Out_range_, (lb,ub), _Out_range_impl_(lb,ub)) +#define _Ret_range_(lb,ub) _SAL2_Source_(_Ret_range_, (lb,ub), _Ret_range_impl_(lb,ub)) +#define _Deref_in_range_(lb,ub) _SAL2_Source_(_Deref_in_range_, (lb,ub), _Deref_in_range_impl_(lb,ub)) +#define _Deref_out_range_(lb,ub) _SAL2_Source_(_Deref_out_range_, (lb,ub), _Deref_out_range_impl_(lb,ub)) +#define _Deref_ret_range_(lb,ub) _SAL2_Source_(_Deref_ret_range_, (lb,ub), _Deref_ret_range_impl_(lb,ub)) +#define _Pre_equal_to_(expr) _SAL2_Source_(_Pre_equal_to_, (expr), _In_range_(==, expr)) +#define _Post_equal_to_(expr) _SAL2_Source_(_Post_equal_to_, (expr), _Out_range_(==, expr)) + +// annotation to express that a value (usually a field of a mutable class) +// is not changed by a function call +#define _Unchanged_(e) _SAL2_Source_(_Unchanged_, (e), _At_(e, _Post_equal_to_(_Old_(e)) _Const_)) + +// Annotations to allow expressing generalized pre and post conditions. +// 'cond' may be any valid SAL expression that is considered to be true as a precondition +// or postcondition (respsectively). +#define _Pre_satisfies_(cond) _SAL2_Source_(_Pre_satisfies_, (cond), _Pre_satisfies_impl_(cond)) +#define _Post_satisfies_(cond) _SAL2_Source_(_Post_satisfies_, (cond), _Post_satisfies_impl_(cond)) + +// Annotations to express struct, class and field invariants +#define _Struct_size_bytes_(size) _SAL2_Source_(_Struct_size_bytes_, (size), _Writable_bytes_(size)) + +#define _Field_size_(size) _SAL2_Source_(_Field_size_, (size), _Notnull_ _Writable_elements_(size)) +#define _Field_size_opt_(size) _SAL2_Source_(_Field_size_opt_, (size), _Maybenull_ _Writable_elements_(size)) +#define _Field_size_part_(size, count) _SAL2_Source_(_Field_size_part_, (size, count), _Notnull_ _Writable_elements_(size) _Readable_elements_(count)) +#define _Field_size_part_opt_(size, count) _SAL2_Source_(_Field_size_part_opt_, (size, count), _Maybenull_ _Writable_elements_(size) _Readable_elements_(count)) +#define _Field_size_full_(size) _SAL2_Source_(_Field_size_full_, (size), _Field_size_part_(size, size)) +#define _Field_size_full_opt_(size) _SAL2_Source_(_Field_size_full_opt_, (size), _Field_size_part_opt_(size, size)) + +#define _Field_size_bytes_(size) _SAL2_Source_(_Field_size_bytes_, (size), _Notnull_ _Writable_bytes_(size)) +#define _Field_size_bytes_opt_(size) _SAL2_Source_(_Field_size_bytes_opt_, (size), _Maybenull_ _Writable_bytes_(size)) +#define _Field_size_bytes_part_(size, count) _SAL2_Source_(_Field_size_bytes_part_, (size, count), _Notnull_ _Writable_bytes_(size) _Readable_bytes_(count)) +#define _Field_size_bytes_part_opt_(size, count) _SAL2_Source_(_Field_size_bytes_part_opt_, (size, count), _Maybenull_ _Writable_bytes_(size) _Readable_bytes_(count)) +#define _Field_size_bytes_full_(size) _SAL2_Source_(_Field_size_bytes_full_, (size), _Field_size_bytes_part_(size, size)) +#define _Field_size_bytes_full_opt_(size) _SAL2_Source_(_Field_size_bytes_full_opt_, (size), _Field_size_bytes_part_opt_(size, size)) + +#define _Field_z_ _SAL2_Source_(_Field_z_, (), _Null_terminated_) + +#define _Field_range_(min,max) _SAL2_Source_(_Field_range_, (min,max), _Field_range_impl_(min,max)) + +//============================================================================ +// _Pre_\_Post_ Layer: +//============================================================================ + +// +// Raw Pre/Post for declaring custom pre/post conditions +// + +#define _Pre_ _Pre_impl_ +#define _Post_ _Post_impl_ + +// +// Validity property +// + +#define _Valid_ _Valid_impl_ +#define _Notvalid_ _Notvalid_impl_ +#define _Maybevalid_ _Maybevalid_impl_ + +// +// Buffer size properties +// + +// Expressing buffer sizes without specifying pre or post condition +#define _Readable_bytes_(size) _SAL2_Source_(_Readable_bytes_, (size), _Readable_bytes_impl_(size)) +#define _Readable_elements_(size) _SAL2_Source_(_Readable_elements_, (size), _Readable_elements_impl_(size)) +#define _Writable_bytes_(size) _SAL2_Source_(_Writable_bytes_, (size), _Writable_bytes_impl_(size)) +#define _Writable_elements_(size) _SAL2_Source_(_Writable_elements_, (size), _Writable_elements_impl_(size)) + +#define _Null_terminated_ _SAL2_Source_(_Null_terminated_, (), _Null_terminated_impl_) +#define _NullNull_terminated_ _SAL2_Source_(_NullNull_terminated_, (), _NullNull_terminated_impl_) + +// Expressing buffer size as pre or post condition +#define _Pre_readable_size_(size) _SAL2_Source_(_Pre_readable_size_, (size), _Pre1_impl_(__count_impl(size)) _Pre_valid_impl_) +#define _Pre_writable_size_(size) _SAL2_Source_(_Pre_writable_size_, (size), _Pre1_impl_(__cap_impl(size))) +#define _Pre_readable_byte_size_(size) _SAL2_Source_(_Pre_readable_byte_size_, (size), _Pre1_impl_(__bytecount_impl(size)) _Pre_valid_impl_) +#define _Pre_writable_byte_size_(size) _SAL2_Source_(_Pre_writable_byte_size_, (size), _Pre1_impl_(__bytecap_impl(size))) + +#define _Post_readable_size_(size) _SAL2_Source_(_Post_readable_size_, (size), _Post1_impl_(__count_impl(size)) _Post_valid_impl_) +#define _Post_writable_size_(size) _SAL2_Source_(_Post_writable_size_, (size), _Post1_impl_(__cap_impl(size))) +#define _Post_readable_byte_size_(size) _SAL2_Source_(_Post_readable_byte_size_, (size), _Post1_impl_(__bytecount_impl(size)) _Post_valid_impl_) +#define _Post_writable_byte_size_(size) _SAL2_Source_(_Post_writable_byte_size_, (size), _Post1_impl_(__bytecap_impl(size))) + +// +// Pointer null-ness properties +// +#define _Null_ _Null_impl_ +#define _Notnull_ _Notnull_impl_ +#define _Maybenull_ _Maybenull_impl_ + +// +// _Pre_ annotations --- +// +// describing conditions that must be met before the call of the function + +// e.g. int strlen( _Pre_z_ const char* sz ); +// buffer is a zero terminated string +#define _Pre_z_ _SAL2_Source_(_Pre_z_, (), _Pre1_impl_(__zterm_impl) _Pre_valid_impl_) + +// valid size unknown or indicated by type (e.g.:LPSTR) +#define _Pre_valid_ _SAL2_Source_(_Pre_valid_, (), _Pre1_impl_(__notnull_impl_notref) _Pre_valid_impl_) +#define _Pre_opt_valid_ _SAL2_Source_(_Pre_opt_valid_, (), _Pre1_impl_(__maybenull_impl_notref) _Pre_valid_impl_) + +#define _Pre_invalid_ _SAL2_Source_(_Pre_invalid_, (), _Deref_pre1_impl_(__notvalid_impl)) + +// Overrides recursive valid when some field is not yet initialized when using _Inout_ +#define _Pre_unknown_ _SAL2_Source_(_Pre_unknown_, (), _Pre1_impl_(__maybevalid_impl)) + +// used with allocated but not yet initialized objects +#define _Pre_notnull_ _SAL2_Source_(_Pre_notnull_, (), _Pre1_impl_(__notnull_impl_notref)) +#define _Pre_maybenull_ _SAL2_Source_(_Pre_maybenull_, (), _Pre1_impl_(__maybenull_impl_notref)) +#define _Pre_null_ _SAL2_Source_(_Pre_null_, (), _Pre1_impl_(__null_impl_notref)) + +// +// _Post_ annotations --- +// +// describing conditions that hold after the function call + +// void CopyStr( _In_z_ const char* szFrom, _Pre_cap_(cch) _Post_z_ char* szFrom, size_t cchFrom ); +// buffer will be a zero-terminated string after the call +#define _Post_z_ _SAL2_Source_(_Post_z_, (), _Post1_impl_(__zterm_impl) _Post_valid_impl_) + +// e.g. HRESULT InitStruct( _Post_valid_ Struct* pobj ); +#define _Post_valid_ _SAL2_Source_(_Post_valid_, (), _Post_valid_impl_) +#define _Post_invalid_ _SAL2_Source_(_Post_invalid_, (), _Deref_post1_impl_(__notvalid_impl)) + +// e.g. void free( _Post_ptr_invalid_ void* pv ); +#define _Post_ptr_invalid_ _SAL2_Source_(_Post_ptr_invalid_, (), _Post1_impl_(__notvalid_impl)) + +// e.g. void ThrowExceptionIfNull( _Post_notnull_ const void* pv ); +#define _Post_notnull_ _SAL2_Source_(_Post_notnull_, (), _Post1_impl_(__notnull_impl)) + +// e.g. HRESULT GetObject(_Outptr_ _On_failure_(_At_(*p, _Post_null_)) T **p); +#define _Post_null_ _SAL2_Source_(_Post_null_, (), _Post1_impl_(__null_impl)) + +#define _Post_maybenull_ _SAL2_Source_(_Post_maybenull_, (), _Post1_impl_(__maybenull_impl)) + +#define _Prepost_z_ _SAL2_Source_(_Prepost_z_, (), _Pre_z_ _Post_z_) + + +// #pragma region Input Buffer SAL 1 compatibility macros + +/*========================================================================== + + This section contains definitions for macros defined for VS2010 and earlier. + Usage of these macros is still supported, but the SAL 2 macros defined above + are recommended instead. This comment block is retained to assist in + understanding SAL that still uses the older syntax. + + The macros are defined in 3 layers: + + _In_\_Out_ Layer: + ---------------- + This layer provides the highest abstraction and its macros should be used + in most cases. Its macros start with _In_, _Out_ or _Inout_. For the + typical case they provide the most concise annotations. + + _Pre_\_Post_ Layer: + ------------------ + The macros of this layer only should be used when there is no suitable macro + in the _In_\_Out_ layer. Its macros start with _Pre_, _Post_, _Ret_, + _Deref_pre_ _Deref_post_ and _Deref_ret_. This layer provides the most + flexibility for annotations. + + Implementation Abstraction Layer: + -------------------------------- + Macros from this layer should never be used directly. The layer only exists + to hide the implementation of the annotation macros. + + + Annotation Syntax: + |--------------|----------|----------------|-----------------------------| + | Usage | Nullness | ZeroTerminated | Extent | + |--------------|----------|----------------|-----------------------------| + | _In_ | <> | <> | <> | + | _Out_ | opt_ | z_ | [byte]cap_[c_|x_]( size ) | + | _Inout_ | | | [byte]count_[c_|x_]( size ) | + | _Deref_out_ | | | ptrdiff_cap_( ptr ) | + |--------------| | | ptrdiff_count_( ptr ) | + | _Ret_ | | | | + | _Deref_ret_ | | | | + |--------------| | | | + | _Pre_ | | | | + | _Post_ | | | | + | _Deref_pre_ | | | | + | _Deref_post_ | | | | + |--------------|----------|----------------|-----------------------------| + + Usage: + ----- + _In_, _Out_, _Inout_, _Pre_, _Post_, _Deref_pre_, _Deref_post_ are for + formal parameters. + _Ret_, _Deref_ret_ must be used for return values. + + Nullness: + -------- + If the pointer can be NULL the annotation contains _opt. If the macro + does not contain '_opt' the pointer may not be NULL. + + String Type: + ----------- + _z: NullTerminated string + for _In_ parameters the buffer must have the specified stringtype before the call + for _Out_ parameters the buffer must have the specified stringtype after the call + for _Inout_ parameters both conditions apply + + Extent Syntax: + |------|---------------|---------------| + | Unit | Writ\Readable | Argument Type | + |------|---------------|---------------| + | <> | cap_ | <> | + | byte | count_ | c_ | + | | | x_ | + |------|---------------|---------------| + + 'cap' (capacity) describes the writable size of the buffer and is typically used + with _Out_. The default unit is elements. Use 'bytecap' if the size is given in bytes + 'count' describes the readable size of the buffer and is typically used with _In_. + The default unit is elements. Use 'bytecount' if the size is given in bytes. + + Argument syntax for cap_, bytecap_, count_, bytecount_: + (|return)[+n] e.g. cch, return, cb+2 + + If the buffer size is a constant expression use the c_ postfix. + E.g. cap_c_(20), count_c_(MAX_PATH), bytecount_c_(16) + + If the buffer size is given by a limiting pointer use the ptrdiff_ versions + of the macros. + + If the buffer size is neither a parameter nor a constant expression use the x_ + postfix. e.g. bytecount_x_(num*size) x_ annotations accept any arbitrary string. + No analysis can be done for x_ annotations but they at least tell the tool that + the buffer has some sort of extent description. x_ annotations might be supported + by future compiler versions. + +============================================================================*/ + +// e.g. void SetCharRange( _In_count_(cch) const char* rgch, size_t cch ) +// valid buffer extent described by another parameter +#define _In_count_(size) _SAL1_1_Source_(_In_count_, (size), _Pre_count_(size) _Deref_pre_readonly_) +#define _In_opt_count_(size) _SAL1_1_Source_(_In_opt_count_, (size), _Pre_opt_count_(size) _Deref_pre_readonly_) +#define _In_bytecount_(size) _SAL1_1_Source_(_In_bytecount_, (size), _Pre_bytecount_(size) _Deref_pre_readonly_) +#define _In_opt_bytecount_(size) _SAL1_1_Source_(_In_opt_bytecount_, (size), _Pre_opt_bytecount_(size) _Deref_pre_readonly_) + +// valid buffer extent described by a constant extression +#define _In_count_c_(size) _SAL1_1_Source_(_In_count_c_, (size), _Pre_count_c_(size) _Deref_pre_readonly_) +#define _In_opt_count_c_(size) _SAL1_1_Source_(_In_opt_count_c_, (size), _Pre_opt_count_c_(size) _Deref_pre_readonly_) +#define _In_bytecount_c_(size) _SAL1_1_Source_(_In_bytecount_c_, (size), _Pre_bytecount_c_(size) _Deref_pre_readonly_) +#define _In_opt_bytecount_c_(size) _SAL1_1_Source_(_In_opt_bytecount_c_, (size), _Pre_opt_bytecount_c_(size) _Deref_pre_readonly_) + +// nullterminated 'input' buffers with given size + +// e.g. void SetCharRange( _In_count_(cch) const char* rgch, size_t cch ) +// nullterminated valid buffer extent described by another parameter +#define _In_z_count_(size) _SAL1_1_Source_(_In_z_count_, (size), _Pre_z_ _Pre_count_(size) _Deref_pre_readonly_) +#define _In_opt_z_count_(size) _SAL1_1_Source_(_In_opt_z_count_, (size), _Pre_opt_z_ _Pre_opt_count_(size) _Deref_pre_readonly_) +#define _In_z_bytecount_(size) _SAL1_1_Source_(_In_z_bytecount_, (size), _Pre_z_ _Pre_bytecount_(size) _Deref_pre_readonly_) +#define _In_opt_z_bytecount_(size) _SAL1_1_Source_(_In_opt_z_bytecount_, (size), _Pre_opt_z_ _Pre_opt_bytecount_(size) _Deref_pre_readonly_) + +// nullterminated valid buffer extent described by a constant extression +#define _In_z_count_c_(size) _SAL1_1_Source_(_In_z_count_c_, (size), _Pre_z_ _Pre_count_c_(size) _Deref_pre_readonly_) +#define _In_opt_z_count_c_(size) _SAL1_1_Source_(_In_opt_z_count_c_, (size), _Pre_opt_z_ _Pre_opt_count_c_(size) _Deref_pre_readonly_) +#define _In_z_bytecount_c_(size) _SAL1_1_Source_(_In_z_bytecount_c_, (size), _Pre_z_ _Pre_bytecount_c_(size) _Deref_pre_readonly_) +#define _In_opt_z_bytecount_c_(size) _SAL1_1_Source_(_In_opt_z_bytecount_c_, (size), _Pre_opt_z_ _Pre_opt_bytecount_c_(size) _Deref_pre_readonly_) + +// buffer capacity is described by another pointer +// e.g. void Foo( _In_ptrdiff_count_(pchMax) const char* pch, const char* pchMax ) { while pch < pchMax ) pch++; } +#define _In_ptrdiff_count_(size) _SAL1_1_Source_(_In_ptrdiff_count_, (size), _Pre_ptrdiff_count_(size) _Deref_pre_readonly_) +#define _In_opt_ptrdiff_count_(size) _SAL1_1_Source_(_In_opt_ptrdiff_count_, (size), _Pre_opt_ptrdiff_count_(size) _Deref_pre_readonly_) + +// 'x' version for complex expressions that are not supported by the current compiler version +// e.g. void Set3ColMatrix( _In_count_x_(3*cRows) const Elem* matrix, int cRows ); +#define _In_count_x_(size) _SAL1_1_Source_(_In_count_x_, (size), _Pre_count_x_(size) _Deref_pre_readonly_) +#define _In_opt_count_x_(size) _SAL1_1_Source_(_In_opt_count_x_, (size), _Pre_opt_count_x_(size) _Deref_pre_readonly_) +#define _In_bytecount_x_(size) _SAL1_1_Source_(_In_bytecount_x_, (size), _Pre_bytecount_x_(size) _Deref_pre_readonly_) +#define _In_opt_bytecount_x_(size) _SAL1_1_Source_(_In_opt_bytecount_x_, (size), _Pre_opt_bytecount_x_(size) _Deref_pre_readonly_) + + +// 'out' with buffer size +// e.g. void GetIndeces( _Out_cap_(cIndeces) int* rgIndeces, size_t cIndices ); +// buffer capacity is described by another parameter +#define _Out_cap_(size) _SAL1_1_Source_(_Out_cap_, (size), _Pre_cap_(size) _Post_valid_impl_) +#define _Out_opt_cap_(size) _SAL1_1_Source_(_Out_opt_cap_, (size), _Pre_opt_cap_(size) _Post_valid_impl_) +#define _Out_bytecap_(size) _SAL1_1_Source_(_Out_bytecap_, (size), _Pre_bytecap_(size) _Post_valid_impl_) +#define _Out_opt_bytecap_(size) _SAL1_1_Source_(_Out_opt_bytecap_, (size), _Pre_opt_bytecap_(size) _Post_valid_impl_) + +// buffer capacity is described by a constant expression +#define _Out_cap_c_(size) _SAL1_1_Source_(_Out_cap_c_, (size), _Pre_cap_c_(size) _Post_valid_impl_) +#define _Out_opt_cap_c_(size) _SAL1_1_Source_(_Out_opt_cap_c_, (size), _Pre_opt_cap_c_(size) _Post_valid_impl_) +#define _Out_bytecap_c_(size) _SAL1_1_Source_(_Out_bytecap_c_, (size), _Pre_bytecap_c_(size) _Post_valid_impl_) +#define _Out_opt_bytecap_c_(size) _SAL1_1_Source_(_Out_opt_bytecap_c_, (size), _Pre_opt_bytecap_c_(size) _Post_valid_impl_) + +// buffer capacity is described by another parameter multiplied by a constant expression +#define _Out_cap_m_(mult,size) _SAL1_1_Source_(_Out_cap_m_, (mult,size), _Pre_cap_m_(mult,size) _Post_valid_impl_) +#define _Out_opt_cap_m_(mult,size) _SAL1_1_Source_(_Out_opt_cap_m_, (mult,size), _Pre_opt_cap_m_(mult,size) _Post_valid_impl_) +#define _Out_z_cap_m_(mult,size) _SAL1_1_Source_(_Out_z_cap_m_, (mult,size), _Pre_cap_m_(mult,size) _Post_valid_impl_ _Post_z_) +#define _Out_opt_z_cap_m_(mult,size) _SAL1_1_Source_(_Out_opt_z_cap_m_, (mult,size), _Pre_opt_cap_m_(mult,size) _Post_valid_impl_ _Post_z_) + +// buffer capacity is described by another pointer +// e.g. void Foo( _Out_ptrdiff_cap_(pchMax) char* pch, const char* pchMax ) { while pch < pchMax ) pch++; } +#define _Out_ptrdiff_cap_(size) _SAL1_1_Source_(_Out_ptrdiff_cap_, (size), _Pre_ptrdiff_cap_(size) _Post_valid_impl_) +#define _Out_opt_ptrdiff_cap_(size) _SAL1_1_Source_(_Out_opt_ptrdiff_cap_, (size), _Pre_opt_ptrdiff_cap_(size) _Post_valid_impl_) + +// buffer capacity is described by a complex expression +#define _Out_cap_x_(size) _SAL1_1_Source_(_Out_cap_x_, (size), _Pre_cap_x_(size) _Post_valid_impl_) +#define _Out_opt_cap_x_(size) _SAL1_1_Source_(_Out_opt_cap_x_, (size), _Pre_opt_cap_x_(size) _Post_valid_impl_) +#define _Out_bytecap_x_(size) _SAL1_1_Source_(_Out_bytecap_x_, (size), _Pre_bytecap_x_(size) _Post_valid_impl_) +#define _Out_opt_bytecap_x_(size) _SAL1_1_Source_(_Out_opt_bytecap_x_, (size), _Pre_opt_bytecap_x_(size) _Post_valid_impl_) + +// a zero terminated string is filled into a buffer of given capacity +// e.g. void CopyStr( _In_z_ const char* szFrom, _Out_z_cap_(cchTo) char* szTo, size_t cchTo ); +// buffer capacity is described by another parameter +#define _Out_z_cap_(size) _SAL1_1_Source_(_Out_z_cap_, (size), _Pre_cap_(size) _Post_valid_impl_ _Post_z_) +#define _Out_opt_z_cap_(size) _SAL1_1_Source_(_Out_opt_z_cap_, (size), _Pre_opt_cap_(size) _Post_valid_impl_ _Post_z_) +#define _Out_z_bytecap_(size) _SAL1_1_Source_(_Out_z_bytecap_, (size), _Pre_bytecap_(size) _Post_valid_impl_ _Post_z_) +#define _Out_opt_z_bytecap_(size) _SAL1_1_Source_(_Out_opt_z_bytecap_, (size), _Pre_opt_bytecap_(size) _Post_valid_impl_ _Post_z_) + +// buffer capacity is described by a constant expression +#define _Out_z_cap_c_(size) _SAL1_1_Source_(_Out_z_cap_c_, (size), _Pre_cap_c_(size) _Post_valid_impl_ _Post_z_) +#define _Out_opt_z_cap_c_(size) _SAL1_1_Source_(_Out_opt_z_cap_c_, (size), _Pre_opt_cap_c_(size) _Post_valid_impl_ _Post_z_) +#define _Out_z_bytecap_c_(size) _SAL1_1_Source_(_Out_z_bytecap_c_, (size), _Pre_bytecap_c_(size) _Post_valid_impl_ _Post_z_) +#define _Out_opt_z_bytecap_c_(size) _SAL1_1_Source_(_Out_opt_z_bytecap_c_, (size), _Pre_opt_bytecap_c_(size) _Post_valid_impl_ _Post_z_) + +// buffer capacity is described by a complex expression +#define _Out_z_cap_x_(size) _SAL1_1_Source_(_Out_z_cap_x_, (size), _Pre_cap_x_(size) _Post_valid_impl_ _Post_z_) +#define _Out_opt_z_cap_x_(size) _SAL1_1_Source_(_Out_opt_z_cap_x_, (size), _Pre_opt_cap_x_(size) _Post_valid_impl_ _Post_z_) +#define _Out_z_bytecap_x_(size) _SAL1_1_Source_(_Out_z_bytecap_x_, (size), _Pre_bytecap_x_(size) _Post_valid_impl_ _Post_z_) +#define _Out_opt_z_bytecap_x_(size) _SAL1_1_Source_(_Out_opt_z_bytecap_x_, (size), _Pre_opt_bytecap_x_(size) _Post_valid_impl_ _Post_z_) + +// a zero terminated string is filled into a buffer of given capacity +// e.g. size_t CopyCharRange( _In_count_(cchFrom) const char* rgchFrom, size_t cchFrom, _Out_cap_post_count_(cchTo,return)) char* rgchTo, size_t cchTo ); +#define _Out_cap_post_count_(cap,count) _SAL1_1_Source_(_Out_cap_post_count_, (cap,count), _Pre_cap_(cap) _Post_valid_impl_ _Post_count_(count)) +#define _Out_opt_cap_post_count_(cap,count) _SAL1_1_Source_(_Out_opt_cap_post_count_, (cap,count), _Pre_opt_cap_(cap) _Post_valid_impl_ _Post_count_(count)) +#define _Out_bytecap_post_bytecount_(cap,count) _SAL1_1_Source_(_Out_bytecap_post_bytecount_, (cap,count), _Pre_bytecap_(cap) _Post_valid_impl_ _Post_bytecount_(count)) +#define _Out_opt_bytecap_post_bytecount_(cap,count) _SAL1_1_Source_(_Out_opt_bytecap_post_bytecount_, (cap,count), _Pre_opt_bytecap_(cap) _Post_valid_impl_ _Post_bytecount_(count)) + +// a zero terminated string is filled into a buffer of given capacity +// e.g. size_t CopyStr( _In_z_ const char* szFrom, _Out_z_cap_post_count_(cchTo,return+1) char* szTo, size_t cchTo ); +#define _Out_z_cap_post_count_(cap,count) _SAL1_1_Source_(_Out_z_cap_post_count_, (cap,count), _Pre_cap_(cap) _Post_valid_impl_ _Post_z_count_(count)) +#define _Out_opt_z_cap_post_count_(cap,count) _SAL1_1_Source_(_Out_opt_z_cap_post_count_, (cap,count), _Pre_opt_cap_(cap) _Post_valid_impl_ _Post_z_count_(count)) +#define _Out_z_bytecap_post_bytecount_(cap,count) _SAL1_1_Source_(_Out_z_bytecap_post_bytecount_, (cap,count), _Pre_bytecap_(cap) _Post_valid_impl_ _Post_z_bytecount_(count)) +#define _Out_opt_z_bytecap_post_bytecount_(cap,count) _SAL1_1_Source_(_Out_opt_z_bytecap_post_bytecount_, (cap,count), _Pre_opt_bytecap_(cap) _Post_valid_impl_ _Post_z_bytecount_(count)) + +// only use with dereferenced arguments e.g. '*pcch' +#define _Out_capcount_(capcount) _SAL1_1_Source_(_Out_capcount_, (capcount), _Pre_cap_(capcount) _Post_valid_impl_ _Post_count_(capcount)) +#define _Out_opt_capcount_(capcount) _SAL1_1_Source_(_Out_opt_capcount_, (capcount), _Pre_opt_cap_(capcount) _Post_valid_impl_ _Post_count_(capcount)) +#define _Out_bytecapcount_(capcount) _SAL1_1_Source_(_Out_bytecapcount_, (capcount), _Pre_bytecap_(capcount) _Post_valid_impl_ _Post_bytecount_(capcount)) +#define _Out_opt_bytecapcount_(capcount) _SAL1_1_Source_(_Out_opt_bytecapcount_, (capcount), _Pre_opt_bytecap_(capcount) _Post_valid_impl_ _Post_bytecount_(capcount)) + +#define _Out_capcount_x_(capcount) _SAL1_1_Source_(_Out_capcount_x_, (capcount), _Pre_cap_x_(capcount) _Post_valid_impl_ _Post_count_x_(capcount)) +#define _Out_opt_capcount_x_(capcount) _SAL1_1_Source_(_Out_opt_capcount_x_, (capcount), _Pre_opt_cap_x_(capcount) _Post_valid_impl_ _Post_count_x_(capcount)) +#define _Out_bytecapcount_x_(capcount) _SAL1_1_Source_(_Out_bytecapcount_x_, (capcount), _Pre_bytecap_x_(capcount) _Post_valid_impl_ _Post_bytecount_x_(capcount)) +#define _Out_opt_bytecapcount_x_(capcount) _SAL1_1_Source_(_Out_opt_bytecapcount_x_, (capcount), _Pre_opt_bytecap_x_(capcount) _Post_valid_impl_ _Post_bytecount_x_(capcount)) + +// e.g. GetString( _Out_z_capcount_(*pLen+1) char* sz, size_t* pLen ); +#define _Out_z_capcount_(capcount) _SAL1_1_Source_(_Out_z_capcount_, (capcount), _Pre_cap_(capcount) _Post_valid_impl_ _Post_z_count_(capcount)) +#define _Out_opt_z_capcount_(capcount) _SAL1_1_Source_(_Out_opt_z_capcount_, (capcount), _Pre_opt_cap_(capcount) _Post_valid_impl_ _Post_z_count_(capcount)) +#define _Out_z_bytecapcount_(capcount) _SAL1_1_Source_(_Out_z_bytecapcount_, (capcount), _Pre_bytecap_(capcount) _Post_valid_impl_ _Post_z_bytecount_(capcount)) +#define _Out_opt_z_bytecapcount_(capcount) _SAL1_1_Source_(_Out_opt_z_bytecapcount_, (capcount), _Pre_opt_bytecap_(capcount) _Post_valid_impl_ _Post_z_bytecount_(capcount)) + + +// 'inout' buffers with initialized elements before and after the call +// e.g. void ModifyIndices( _Inout_count_(cIndices) int* rgIndeces, size_t cIndices ); +#define _Inout_count_(size) _SAL1_1_Source_(_Inout_count_, (size), _Prepost_count_(size)) +#define _Inout_opt_count_(size) _SAL1_1_Source_(_Inout_opt_count_, (size), _Prepost_opt_count_(size)) +#define _Inout_bytecount_(size) _SAL1_1_Source_(_Inout_bytecount_, (size), _Prepost_bytecount_(size)) +#define _Inout_opt_bytecount_(size) _SAL1_1_Source_(_Inout_opt_bytecount_, (size), _Prepost_opt_bytecount_(size)) + +#define _Inout_count_c_(size) _SAL1_1_Source_(_Inout_count_c_, (size), _Prepost_count_c_(size)) +#define _Inout_opt_count_c_(size) _SAL1_1_Source_(_Inout_opt_count_c_, (size), _Prepost_opt_count_c_(size)) +#define _Inout_bytecount_c_(size) _SAL1_1_Source_(_Inout_bytecount_c_, (size), _Prepost_bytecount_c_(size)) +#define _Inout_opt_bytecount_c_(size) _SAL1_1_Source_(_Inout_opt_bytecount_c_, (size), _Prepost_opt_bytecount_c_(size)) + +// nullterminated 'inout' buffers with initialized elements before and after the call +// e.g. void ModifyIndices( _Inout_count_(cIndices) int* rgIndeces, size_t cIndices ); +#define _Inout_z_count_(size) _SAL1_1_Source_(_Inout_z_count_, (size), _Prepost_z_ _Prepost_count_(size)) +#define _Inout_opt_z_count_(size) _SAL1_1_Source_(_Inout_opt_z_count_, (size), _Prepost_z_ _Prepost_opt_count_(size)) +#define _Inout_z_bytecount_(size) _SAL1_1_Source_(_Inout_z_bytecount_, (size), _Prepost_z_ _Prepost_bytecount_(size)) +#define _Inout_opt_z_bytecount_(size) _SAL1_1_Source_(_Inout_opt_z_bytecount_, (size), _Prepost_z_ _Prepost_opt_bytecount_(size)) + +#define _Inout_z_count_c_(size) _SAL1_1_Source_(_Inout_z_count_c_, (size), _Prepost_z_ _Prepost_count_c_(size)) +#define _Inout_opt_z_count_c_(size) _SAL1_1_Source_(_Inout_opt_z_count_c_, (size), _Prepost_z_ _Prepost_opt_count_c_(size)) +#define _Inout_z_bytecount_c_(size) _SAL1_1_Source_(_Inout_z_bytecount_c_, (size), _Prepost_z_ _Prepost_bytecount_c_(size)) +#define _Inout_opt_z_bytecount_c_(size) _SAL1_1_Source_(_Inout_opt_z_bytecount_c_, (size), _Prepost_z_ _Prepost_opt_bytecount_c_(size)) + +#define _Inout_ptrdiff_count_(size) _SAL1_1_Source_(_Inout_ptrdiff_count_, (size), _Pre_ptrdiff_count_(size)) +#define _Inout_opt_ptrdiff_count_(size) _SAL1_1_Source_(_Inout_opt_ptrdiff_count_, (size), _Pre_opt_ptrdiff_count_(size)) + +#define _Inout_count_x_(size) _SAL1_1_Source_(_Inout_count_x_, (size), _Prepost_count_x_(size)) +#define _Inout_opt_count_x_(size) _SAL1_1_Source_(_Inout_opt_count_x_, (size), _Prepost_opt_count_x_(size)) +#define _Inout_bytecount_x_(size) _SAL1_1_Source_(_Inout_bytecount_x_, (size), _Prepost_bytecount_x_(size)) +#define _Inout_opt_bytecount_x_(size) _SAL1_1_Source_(_Inout_opt_bytecount_x_, (size), _Prepost_opt_bytecount_x_(size)) + +// e.g. void AppendToLPSTR( _In_ LPCSTR szFrom, _Inout_cap_(cchTo) LPSTR* szTo, size_t cchTo ); +#define _Inout_cap_(size) _SAL1_1_Source_(_Inout_cap_, (size), _Pre_valid_cap_(size) _Post_valid_) +#define _Inout_opt_cap_(size) _SAL1_1_Source_(_Inout_opt_cap_, (size), _Pre_opt_valid_cap_(size) _Post_valid_) +#define _Inout_bytecap_(size) _SAL1_1_Source_(_Inout_bytecap_, (size), _Pre_valid_bytecap_(size) _Post_valid_) +#define _Inout_opt_bytecap_(size) _SAL1_1_Source_(_Inout_opt_bytecap_, (size), _Pre_opt_valid_bytecap_(size) _Post_valid_) + +#define _Inout_cap_c_(size) _SAL1_1_Source_(_Inout_cap_c_, (size), _Pre_valid_cap_c_(size) _Post_valid_) +#define _Inout_opt_cap_c_(size) _SAL1_1_Source_(_Inout_opt_cap_c_, (size), _Pre_opt_valid_cap_c_(size) _Post_valid_) +#define _Inout_bytecap_c_(size) _SAL1_1_Source_(_Inout_bytecap_c_, (size), _Pre_valid_bytecap_c_(size) _Post_valid_) +#define _Inout_opt_bytecap_c_(size) _SAL1_1_Source_(_Inout_opt_bytecap_c_, (size), _Pre_opt_valid_bytecap_c_(size) _Post_valid_) + +#define _Inout_cap_x_(size) _SAL1_1_Source_(_Inout_cap_x_, (size), _Pre_valid_cap_x_(size) _Post_valid_) +#define _Inout_opt_cap_x_(size) _SAL1_1_Source_(_Inout_opt_cap_x_, (size), _Pre_opt_valid_cap_x_(size) _Post_valid_) +#define _Inout_bytecap_x_(size) _SAL1_1_Source_(_Inout_bytecap_x_, (size), _Pre_valid_bytecap_x_(size) _Post_valid_) +#define _Inout_opt_bytecap_x_(size) _SAL1_1_Source_(_Inout_opt_bytecap_x_, (size), _Pre_opt_valid_bytecap_x_(size) _Post_valid_) + +// inout string buffers with writable size +// e.g. void AppendStr( _In_z_ const char* szFrom, _Inout_z_cap_(cchTo) char* szTo, size_t cchTo ); +#define _Inout_z_cap_(size) _SAL1_1_Source_(_Inout_z_cap_, (size), _Pre_z_cap_(size) _Post_z_) +#define _Inout_opt_z_cap_(size) _SAL1_1_Source_(_Inout_opt_z_cap_, (size), _Pre_opt_z_cap_(size) _Post_z_) +#define _Inout_z_bytecap_(size) _SAL1_1_Source_(_Inout_z_bytecap_, (size), _Pre_z_bytecap_(size) _Post_z_) +#define _Inout_opt_z_bytecap_(size) _SAL1_1_Source_(_Inout_opt_z_bytecap_, (size), _Pre_opt_z_bytecap_(size) _Post_z_) + +#define _Inout_z_cap_c_(size) _SAL1_1_Source_(_Inout_z_cap_c_, (size), _Pre_z_cap_c_(size) _Post_z_) +#define _Inout_opt_z_cap_c_(size) _SAL1_1_Source_(_Inout_opt_z_cap_c_, (size), _Pre_opt_z_cap_c_(size) _Post_z_) +#define _Inout_z_bytecap_c_(size) _SAL1_1_Source_(_Inout_z_bytecap_c_, (size), _Pre_z_bytecap_c_(size) _Post_z_) +#define _Inout_opt_z_bytecap_c_(size) _SAL1_1_Source_(_Inout_opt_z_bytecap_c_, (size), _Pre_opt_z_bytecap_c_(size) _Post_z_) + +#define _Inout_z_cap_x_(size) _SAL1_1_Source_(_Inout_z_cap_x_, (size), _Pre_z_cap_x_(size) _Post_z_) +#define _Inout_opt_z_cap_x_(size) _SAL1_1_Source_(_Inout_opt_z_cap_x_, (size), _Pre_opt_z_cap_x_(size) _Post_z_) +#define _Inout_z_bytecap_x_(size) _SAL1_1_Source_(_Inout_z_bytecap_x_, (size), _Pre_z_bytecap_x_(size) _Post_z_) +#define _Inout_opt_z_bytecap_x_(size) _SAL1_1_Source_(_Inout_opt_z_bytecap_x_, (size), _Pre_opt_z_bytecap_x_(size) _Post_z_) + + +// returning pointers to valid objects +#define _Ret_ _SAL1_1_Source_(_Ret_, (), _Ret_valid_) +#define _Ret_opt_ _SAL1_1_Source_(_Ret_opt_, (), _Ret_opt_valid_) + +// annotations to express 'boundedness' of integral value parameter +#define _In_bound_ _SAL1_1_Source_(_In_bound_, (), _In_bound_impl_) +#define _Out_bound_ _SAL1_1_Source_(_Out_bound_, (), _Out_bound_impl_) +#define _Ret_bound_ _SAL1_1_Source_(_Ret_bound_, (), _Ret_bound_impl_) +#define _Deref_in_bound_ _SAL1_1_Source_(_Deref_in_bound_, (), _Deref_in_bound_impl_) +#define _Deref_out_bound_ _SAL1_1_Source_(_Deref_out_bound_, (), _Deref_out_bound_impl_) +#define _Deref_inout_bound_ _SAL1_1_Source_(_Deref_inout_bound_, (), _Deref_in_bound_ _Deref_out_bound_) +#define _Deref_ret_bound_ _SAL1_1_Source_(_Deref_ret_bound_, (), _Deref_ret_bound_impl_) + +// e.g. HRESULT HrCreatePoint( _Deref_out_opt_ POINT** ppPT ); +#define _Deref_out_ _SAL1_1_Source_(_Deref_out_, (), _Out_ _Deref_post_valid_) +#define _Deref_out_opt_ _SAL1_1_Source_(_Deref_out_opt_, (), _Out_ _Deref_post_opt_valid_) +#define _Deref_opt_out_ _SAL1_1_Source_(_Deref_opt_out_, (), _Out_opt_ _Deref_post_valid_) +#define _Deref_opt_out_opt_ _SAL1_1_Source_(_Deref_opt_out_opt_, (), _Out_opt_ _Deref_post_opt_valid_) + +// e.g. void CloneString( _In_z_ const WCHAR* wzFrom, _Deref_out_z_ WCHAR** pWzTo ); +#define _Deref_out_z_ _SAL1_1_Source_(_Deref_out_z_, (), _Out_ _Deref_post_z_) +#define _Deref_out_opt_z_ _SAL1_1_Source_(_Deref_out_opt_z_, (), _Out_ _Deref_post_opt_z_) +#define _Deref_opt_out_z_ _SAL1_1_Source_(_Deref_opt_out_z_, (), _Out_opt_ _Deref_post_z_) +#define _Deref_opt_out_opt_z_ _SAL1_1_Source_(_Deref_opt_out_opt_z_, (), _Out_opt_ _Deref_post_opt_z_) + +// +// _Deref_pre_ --- +// +// describing conditions for array elements of dereferenced pointer parameters that must be met before the call + +// e.g. void SaveStringArray( _In_count_(cStrings) _Deref_pre_z_ const WCHAR* const rgpwch[] ); +#define _Deref_pre_z_ _SAL1_1_Source_(_Deref_pre_z_, (), _Deref_pre1_impl_(__notnull_impl_notref) _Deref_pre1_impl_(__zterm_impl) _Pre_valid_impl_) +#define _Deref_pre_opt_z_ _SAL1_1_Source_(_Deref_pre_opt_z_, (), _Deref_pre1_impl_(__maybenull_impl_notref) _Deref_pre1_impl_(__zterm_impl) _Pre_valid_impl_) + +// e.g. void FillInArrayOfStr32( _In_count_(cStrings) _Deref_pre_cap_c_(32) _Deref_post_z_ WCHAR* const rgpwch[] ); +// buffer capacity is described by another parameter +#define _Deref_pre_cap_(size) _SAL1_1_Source_(_Deref_pre_cap_, (size), _Deref_pre1_impl_(__notnull_impl_notref) _Deref_pre1_impl_(__cap_impl(size))) +#define _Deref_pre_opt_cap_(size) _SAL1_1_Source_(_Deref_pre_opt_cap_, (size), _Deref_pre1_impl_(__maybenull_impl_notref) _Deref_pre1_impl_(__cap_impl(size))) +#define _Deref_pre_bytecap_(size) _SAL1_1_Source_(_Deref_pre_bytecap_, (size), _Deref_pre1_impl_(__notnull_impl_notref) _Deref_pre1_impl_(__bytecap_impl(size))) +#define _Deref_pre_opt_bytecap_(size) _SAL1_1_Source_(_Deref_pre_opt_bytecap_, (size), _Deref_pre1_impl_(__maybenull_impl_notref) _Deref_pre1_impl_(__bytecap_impl(size))) + +// buffer capacity is described by a constant expression +#define _Deref_pre_cap_c_(size) _SAL1_1_Source_(_Deref_pre_cap_c_, (size), _Deref_pre1_impl_(__notnull_impl_notref) _Deref_pre1_impl_(__cap_c_impl(size))) +#define _Deref_pre_opt_cap_c_(size) _SAL1_1_Source_(_Deref_pre_opt_cap_c_, (size), _Deref_pre1_impl_(__maybenull_impl_notref) _Deref_pre1_impl_(__cap_c_impl(size))) +#define _Deref_pre_bytecap_c_(size) _SAL1_1_Source_(_Deref_pre_bytecap_c_, (size), _Deref_pre1_impl_(__notnull_impl_notref) _Deref_pre1_impl_(__bytecap_c_impl(size))) +#define _Deref_pre_opt_bytecap_c_(size) _SAL1_1_Source_(_Deref_pre_opt_bytecap_c_, (size), _Deref_pre1_impl_(__maybenull_impl_notref) _Deref_pre1_impl_(__bytecap_c_impl(size))) + +// buffer capacity is described by a complex condition +#define _Deref_pre_cap_x_(size) _SAL1_1_Source_(_Deref_pre_cap_x_, (size), _Deref_pre1_impl_(__notnull_impl_notref) _Deref_pre1_impl_(__cap_x_impl(size))) +#define _Deref_pre_opt_cap_x_(size) _SAL1_1_Source_(_Deref_pre_opt_cap_x_, (size), _Deref_pre1_impl_(__maybenull_impl_notref) _Deref_pre1_impl_(__cap_x_impl(size))) +#define _Deref_pre_bytecap_x_(size) _SAL1_1_Source_(_Deref_pre_bytecap_x_, (size), _Deref_pre1_impl_(__notnull_impl_notref) _Deref_pre1_impl_(__bytecap_x_impl(size))) +#define _Deref_pre_opt_bytecap_x_(size) _SAL1_1_Source_(_Deref_pre_opt_bytecap_x_, (size), _Deref_pre1_impl_(__maybenull_impl_notref) _Deref_pre1_impl_(__bytecap_x_impl(size))) + +// convenience macros for nullterminated buffers with given capacity +#define _Deref_pre_z_cap_(size) _SAL1_1_Source_(_Deref_pre_z_cap_, (size), _Deref_pre1_impl_(__notnull_impl_notref) _Deref_pre2_impl_(__zterm_impl,__cap_impl(size)) _Pre_valid_impl_) +#define _Deref_pre_opt_z_cap_(size) _SAL1_1_Source_(_Deref_pre_opt_z_cap_, (size), _Deref_pre1_impl_(__maybenull_impl_notref) _Deref_pre2_impl_(__zterm_impl,__cap_impl(size)) _Pre_valid_impl_) +#define _Deref_pre_z_bytecap_(size) _SAL1_1_Source_(_Deref_pre_z_bytecap_, (size), _Deref_pre1_impl_(__notnull_impl_notref) _Deref_pre2_impl_(__zterm_impl,__bytecap_impl(size)) _Pre_valid_impl_) +#define _Deref_pre_opt_z_bytecap_(size) _SAL1_1_Source_(_Deref_pre_opt_z_bytecap_, (size), _Deref_pre1_impl_(__maybenull_impl_notref) _Deref_pre2_impl_(__zterm_impl,__bytecap_impl(size)) _Pre_valid_impl_) + +#define _Deref_pre_z_cap_c_(size) _SAL1_1_Source_(_Deref_pre_z_cap_c_, (size), _Deref_pre1_impl_(__notnull_impl_notref) _Deref_pre2_impl_(__zterm_impl,__cap_c_impl(size)) _Pre_valid_impl_) +#define _Deref_pre_opt_z_cap_c_(size) _SAL1_1_Source_(_Deref_pre_opt_z_cap_c_, (size), _Deref_pre1_impl_(__maybenull_impl_notref) _Deref_pre2_impl_(__zterm_impl,__cap_c_impl(size)) _Pre_valid_impl_) +#define _Deref_pre_z_bytecap_c_(size) _SAL1_1_Source_(_Deref_pre_z_bytecap_c_, (size), _Deref_pre1_impl_(__notnull_impl_notref) _Deref_pre2_impl_(__zterm_impl,__bytecap_c_impl(size)) _Pre_valid_impl_) +#define _Deref_pre_opt_z_bytecap_c_(size) _SAL1_1_Source_(_Deref_pre_opt_z_bytecap_c_, (size), _Deref_pre1_impl_(__maybenull_impl_notref) _Deref_pre2_impl_(__zterm_impl,__bytecap_c_impl(size)) _Pre_valid_impl_) + +#define _Deref_pre_z_cap_x_(size) _SAL1_1_Source_(_Deref_pre_z_cap_x_, (size), _Deref_pre1_impl_(__notnull_impl_notref) _Deref_pre2_impl_(__zterm_impl,__cap_x_impl(size)) _Pre_valid_impl_) +#define _Deref_pre_opt_z_cap_x_(size) _SAL1_1_Source_(_Deref_pre_opt_z_cap_x_, (size), _Deref_pre1_impl_(__maybenull_impl_notref) _Deref_pre2_impl_(__zterm_impl,__cap_x_impl(size)) _Pre_valid_impl_) +#define _Deref_pre_z_bytecap_x_(size) _SAL1_1_Source_(_Deref_pre_z_bytecap_x_, (size), _Deref_pre1_impl_(__notnull_impl_notref) _Deref_pre2_impl_(__zterm_impl,__bytecap_x_impl(size)) _Pre_valid_impl_) +#define _Deref_pre_opt_z_bytecap_x_(size) _SAL1_1_Source_(_Deref_pre_opt_z_bytecap_x_, (size), _Deref_pre1_impl_(__maybenull_impl_notref) _Deref_pre2_impl_(__zterm_impl,__bytecap_x_impl(size)) _Pre_valid_impl_) + +// known capacity and valid but unknown readable extent +#define _Deref_pre_valid_cap_(size) _SAL1_1_Source_(_Deref_pre_valid_cap_, (size), _Deref_pre1_impl_(__notnull_impl_notref) _Deref_pre1_impl_(__cap_impl(size)) _Pre_valid_impl_) +#define _Deref_pre_opt_valid_cap_(size) _SAL1_1_Source_(_Deref_pre_opt_valid_cap_, (size), _Deref_pre1_impl_(__maybenull_impl_notref) _Deref_pre1_impl_(__cap_impl(size)) _Pre_valid_impl_) +#define _Deref_pre_valid_bytecap_(size) _SAL1_1_Source_(_Deref_pre_valid_bytecap_, (size), _Deref_pre1_impl_(__notnull_impl_notref) _Deref_pre1_impl_(__bytecap_impl(size)) _Pre_valid_impl_) +#define _Deref_pre_opt_valid_bytecap_(size) _SAL1_1_Source_(_Deref_pre_opt_valid_bytecap_, (size), _Deref_pre1_impl_(__maybenull_impl_notref) _Deref_pre1_impl_(__bytecap_impl(size)) _Pre_valid_impl_) + +#define _Deref_pre_valid_cap_c_(size) _SAL1_1_Source_(_Deref_pre_valid_cap_c_, (size), _Deref_pre1_impl_(__notnull_impl_notref) _Deref_pre1_impl_(__cap_c_impl(size)) _Pre_valid_impl_) +#define _Deref_pre_opt_valid_cap_c_(size) _SAL1_1_Source_(_Deref_pre_opt_valid_cap_c_, (size), _Deref_pre1_impl_(__maybenull_impl_notref) _Deref_pre1_impl_(__cap_c_impl(size)) _Pre_valid_impl_) +#define _Deref_pre_valid_bytecap_c_(size) _SAL1_1_Source_(_Deref_pre_valid_bytecap_c_, (size), _Deref_pre1_impl_(__notnull_impl_notref) _Deref_pre1_impl_(__bytecap_c_impl(size)) _Pre_valid_impl_) +#define _Deref_pre_opt_valid_bytecap_c_(size) _SAL1_1_Source_(_Deref_pre_opt_valid_bytecap_c_, (size), _Deref_pre1_impl_(__maybenull_impl_notref) _Deref_pre1_impl_(__bytecap_c_impl(size)) _Pre_valid_impl_) + +#define _Deref_pre_valid_cap_x_(size) _SAL1_1_Source_(_Deref_pre_valid_cap_x_, (size), _Deref_pre1_impl_(__notnull_impl_notref) _Deref_pre1_impl_(__cap_x_impl(size)) _Pre_valid_impl_) +#define _Deref_pre_opt_valid_cap_x_(size) _SAL1_1_Source_(_Deref_pre_opt_valid_cap_x_, (size), _Deref_pre1_impl_(__maybenull_impl_notref) _Deref_pre1_impl_(__cap_x_impl(size)) _Pre_valid_impl_) +#define _Deref_pre_valid_bytecap_x_(size) _SAL1_1_Source_(_Deref_pre_valid_bytecap_x_, (size), _Deref_pre1_impl_(__notnull_impl_notref) _Deref_pre1_impl_(__bytecap_x_impl(size)) _Pre_valid_impl_) +#define _Deref_pre_opt_valid_bytecap_x_(size) _SAL1_1_Source_(_Deref_pre_opt_valid_bytecap_x_, (size), _Deref_pre1_impl_(__maybenull_impl_notref) _Deref_pre1_impl_(__bytecap_x_impl(size)) _Pre_valid_impl_) + +// e.g. void SaveMatrix( _In_count_(n) _Deref_pre_count_(n) const Elem** matrix, size_t n ); +// valid buffer extent is described by another parameter +#define _Deref_pre_count_(size) _SAL1_1_Source_(_Deref_pre_count_, (size), _Deref_pre1_impl_(__notnull_impl_notref) _Deref_pre1_impl_(__count_impl(size)) _Pre_valid_impl_) +#define _Deref_pre_opt_count_(size) _SAL1_1_Source_(_Deref_pre_opt_count_, (size), _Deref_pre1_impl_(__maybenull_impl_notref) _Deref_pre1_impl_(__count_impl(size)) _Pre_valid_impl_) +#define _Deref_pre_bytecount_(size) _SAL1_1_Source_(_Deref_pre_bytecount_, (size), _Deref_pre1_impl_(__notnull_impl_notref) _Deref_pre1_impl_(__bytecount_impl(size)) _Pre_valid_impl_) +#define _Deref_pre_opt_bytecount_(size) _SAL1_1_Source_(_Deref_pre_opt_bytecount_, (size), _Deref_pre1_impl_(__maybenull_impl_notref) _Deref_pre1_impl_(__bytecount_impl(size)) _Pre_valid_impl_) + +// valid buffer extent is described by a constant expression +#define _Deref_pre_count_c_(size) _SAL1_1_Source_(_Deref_pre_count_c_, (size), _Deref_pre1_impl_(__notnull_impl_notref) _Deref_pre1_impl_(__count_c_impl(size)) _Pre_valid_impl_) +#define _Deref_pre_opt_count_c_(size) _SAL1_1_Source_(_Deref_pre_opt_count_c_, (size), _Deref_pre1_impl_(__maybenull_impl_notref) _Deref_pre1_impl_(__count_c_impl(size)) _Pre_valid_impl_) +#define _Deref_pre_bytecount_c_(size) _SAL1_1_Source_(_Deref_pre_bytecount_c_, (size), _Deref_pre1_impl_(__notnull_impl_notref) _Deref_pre1_impl_(__bytecount_c_impl(size)) _Pre_valid_impl_) +#define _Deref_pre_opt_bytecount_c_(size) _SAL1_1_Source_(_Deref_pre_opt_bytecount_c_, (size), _Deref_pre1_impl_(__maybenull_impl_notref) _Deref_pre1_impl_(__bytecount_c_impl(size)) _Pre_valid_impl_) + +// valid buffer extent is described by a complex expression +#define _Deref_pre_count_x_(size) _SAL1_1_Source_(_Deref_pre_count_x_, (size), _Deref_pre1_impl_(__notnull_impl_notref) _Deref_pre1_impl_(__count_x_impl(size)) _Pre_valid_impl_) +#define _Deref_pre_opt_count_x_(size) _SAL1_1_Source_(_Deref_pre_opt_count_x_, (size), _Deref_pre1_impl_(__maybenull_impl_notref) _Deref_pre1_impl_(__count_x_impl(size)) _Pre_valid_impl_) +#define _Deref_pre_bytecount_x_(size) _SAL1_1_Source_(_Deref_pre_bytecount_x_, (size), _Deref_pre1_impl_(__notnull_impl_notref) _Deref_pre1_impl_(__bytecount_x_impl(size)) _Pre_valid_impl_) +#define _Deref_pre_opt_bytecount_x_(size) _SAL1_1_Source_(_Deref_pre_opt_bytecount_x_, (size), _Deref_pre1_impl_(__maybenull_impl_notref) _Deref_pre1_impl_(__bytecount_x_impl(size)) _Pre_valid_impl_) + +// e.g. void PrintStringArray( _In_count_(cElems) _Deref_pre_valid_ LPCSTR rgStr[], size_t cElems ); +#define _Deref_pre_valid_ _SAL1_1_Source_(_Deref_pre_valid_, (), _Deref_pre1_impl_(__notnull_impl_notref) _Pre_valid_impl_) +#define _Deref_pre_opt_valid_ _SAL1_1_Source_(_Deref_pre_opt_valid_, (), _Deref_pre1_impl_(__maybenull_impl_notref) _Pre_valid_impl_) +#define _Deref_pre_invalid_ _SAL1_1_Source_(_Deref_pre_invalid_, (), _Deref_pre1_impl_(__notvalid_impl)) + +#define _Deref_pre_notnull_ _SAL1_1_Source_(_Deref_pre_notnull_, (), _Deref_pre1_impl_(__notnull_impl_notref)) +#define _Deref_pre_maybenull_ _SAL1_1_Source_(_Deref_pre_maybenull_, (), _Deref_pre1_impl_(__maybenull_impl_notref)) +#define _Deref_pre_null_ _SAL1_1_Source_(_Deref_pre_null_, (), _Deref_pre1_impl_(__null_impl_notref)) + +// restrict access rights +#define _Deref_pre_readonly_ _SAL1_1_Source_(_Deref_pre_readonly_, (), _Deref_pre1_impl_(__readaccess_impl_notref)) +#define _Deref_pre_writeonly_ _SAL1_1_Source_(_Deref_pre_writeonly_, (), _Deref_pre1_impl_(__writeaccess_impl_notref)) + +// +// _Deref_post_ --- +// +// describing conditions for array elements or dereferenced pointer parameters that hold after the call + +// e.g. void CloneString( _In_z_ const Wchar_t* wzIn _Out_ _Deref_post_z_ WCHAR** pWzOut ); +#define _Deref_post_z_ _SAL1_1_Source_(_Deref_post_z_, (), _Deref_post1_impl_(__notnull_impl_notref) _Deref_post1_impl_(__zterm_impl) _Post_valid_impl_) +#define _Deref_post_opt_z_ _SAL1_1_Source_(_Deref_post_opt_z_, (), _Deref_post1_impl_(__maybenull_impl_notref) _Deref_post1_impl_(__zterm_impl) _Post_valid_impl_) + +// e.g. HRESULT HrAllocateMemory( size_t cb, _Out_ _Deref_post_bytecap_(cb) void** ppv ); +// buffer capacity is described by another parameter +#define _Deref_post_cap_(size) _SAL1_1_Source_(_Deref_post_cap_, (size), _Deref_post1_impl_(__notnull_impl_notref) _Deref_post1_impl_(__cap_impl(size))) +#define _Deref_post_opt_cap_(size) _SAL1_1_Source_(_Deref_post_opt_cap_, (size), _Deref_post1_impl_(__maybenull_impl_notref) _Deref_post1_impl_(__cap_impl(size))) +#define _Deref_post_bytecap_(size) _SAL1_1_Source_(_Deref_post_bytecap_, (size), _Deref_post1_impl_(__notnull_impl_notref) _Deref_post1_impl_(__bytecap_impl(size))) +#define _Deref_post_opt_bytecap_(size) _SAL1_1_Source_(_Deref_post_opt_bytecap_, (size), _Deref_post1_impl_(__maybenull_impl_notref) _Deref_post1_impl_(__bytecap_impl(size))) + +// buffer capacity is described by a constant expression +#define _Deref_post_cap_c_(size) _SAL1_1_Source_(_Deref_post_cap_c_, (size), _Deref_post1_impl_(__notnull_impl_notref) _Deref_post1_impl_(__cap_c_impl(size))) +#define _Deref_post_opt_cap_c_(size) _SAL1_1_Source_(_Deref_post_opt_cap_c_, (size), _Deref_post1_impl_(__maybenull_impl_notref) _Deref_post1_impl_(__cap_c_impl(size))) +#define _Deref_post_bytecap_c_(size) _SAL1_1_Source_(_Deref_post_bytecap_c_, (size), _Deref_post1_impl_(__notnull_impl_notref) _Deref_post1_impl_(__bytecap_c_impl(size))) +#define _Deref_post_opt_bytecap_c_(size) _SAL1_1_Source_(_Deref_post_opt_bytecap_c_, (size), _Deref_post1_impl_(__maybenull_impl_notref) _Deref_post1_impl_(__bytecap_c_impl(size))) + +// buffer capacity is described by a complex expression +#define _Deref_post_cap_x_(size) _SAL1_1_Source_(_Deref_post_cap_x_, (size), _Deref_post1_impl_(__notnull_impl_notref) _Deref_post1_impl_(__cap_x_impl(size))) +#define _Deref_post_opt_cap_x_(size) _SAL1_1_Source_(_Deref_post_opt_cap_x_, (size), _Deref_post1_impl_(__maybenull_impl_notref) _Deref_post1_impl_(__cap_x_impl(size))) +#define _Deref_post_bytecap_x_(size) _SAL1_1_Source_(_Deref_post_bytecap_x_, (size), _Deref_post1_impl_(__notnull_impl_notref) _Deref_post1_impl_(__bytecap_x_impl(size))) +#define _Deref_post_opt_bytecap_x_(size) _SAL1_1_Source_(_Deref_post_opt_bytecap_x_, (size), _Deref_post1_impl_(__maybenull_impl_notref) _Deref_post1_impl_(__bytecap_x_impl(size))) + +// convenience macros for nullterminated buffers with given capacity +#define _Deref_post_z_cap_(size) _SAL1_1_Source_(_Deref_post_z_cap_, (size), _Deref_post1_impl_(__notnull_impl_notref) _Deref_post2_impl_(__zterm_impl,__cap_impl(size)) _Post_valid_impl_) +#define _Deref_post_opt_z_cap_(size) _SAL1_1_Source_(_Deref_post_opt_z_cap_, (size), _Deref_post1_impl_(__maybenull_impl_notref) _Deref_post2_impl_(__zterm_impl,__cap_impl(size)) _Post_valid_impl_) +#define _Deref_post_z_bytecap_(size) _SAL1_1_Source_(_Deref_post_z_bytecap_, (size), _Deref_post1_impl_(__notnull_impl_notref) _Deref_post2_impl_(__zterm_impl,__bytecap_impl(size)) _Post_valid_impl_) +#define _Deref_post_opt_z_bytecap_(size) _SAL1_1_Source_(_Deref_post_opt_z_bytecap_, (size), _Deref_post1_impl_(__maybenull_impl_notref) _Deref_post2_impl_(__zterm_impl,__bytecap_impl(size)) _Post_valid_impl_) + +#define _Deref_post_z_cap_c_(size) _SAL1_1_Source_(_Deref_post_z_cap_c_, (size), _Deref_post1_impl_(__notnull_impl_notref) _Deref_post2_impl_(__zterm_impl,__cap_c_impl(size)) _Post_valid_impl_) +#define _Deref_post_opt_z_cap_c_(size) _SAL1_1_Source_(_Deref_post_opt_z_cap_c_, (size), _Deref_post1_impl_(__maybenull_impl_notref) _Deref_post2_impl_(__zterm_impl,__cap_c_impl(size)) _Post_valid_impl_) +#define _Deref_post_z_bytecap_c_(size) _SAL1_1_Source_(_Deref_post_z_bytecap_c_, (size), _Deref_post1_impl_(__notnull_impl_notref) _Deref_post2_impl_(__zterm_impl,__bytecap_c_impl(size)) _Post_valid_impl_) +#define _Deref_post_opt_z_bytecap_c_(size) _SAL1_1_Source_(_Deref_post_opt_z_bytecap_c_, (size), _Deref_post1_impl_(__maybenull_impl_notref) _Deref_post2_impl_(__zterm_impl,__bytecap_c_impl(size)) _Post_valid_impl_) + +#define _Deref_post_z_cap_x_(size) _SAL1_1_Source_(_Deref_post_z_cap_x_, (size), _Deref_post1_impl_(__notnull_impl_notref) _Deref_post2_impl_(__zterm_impl,__cap_x_impl(size)) _Post_valid_impl_) +#define _Deref_post_opt_z_cap_x_(size) _SAL1_1_Source_(_Deref_post_opt_z_cap_x_, (size), _Deref_post1_impl_(__maybenull_impl_notref) _Deref_post2_impl_(__zterm_impl,__cap_x_impl(size)) _Post_valid_impl_) +#define _Deref_post_z_bytecap_x_(size) _SAL1_1_Source_(_Deref_post_z_bytecap_x_, (size), _Deref_post1_impl_(__notnull_impl_notref) _Deref_post2_impl_(__zterm_impl,__bytecap_x_impl(size)) _Post_valid_impl_) +#define _Deref_post_opt_z_bytecap_x_(size) _SAL1_1_Source_(_Deref_post_opt_z_bytecap_x_, (size), _Deref_post1_impl_(__maybenull_impl_notref) _Deref_post2_impl_(__zterm_impl,__bytecap_x_impl(size)) _Post_valid_impl_) + +// known capacity and valid but unknown readable extent +#define _Deref_post_valid_cap_(size) _SAL1_1_Source_(_Deref_post_valid_cap_, (size), _Deref_post1_impl_(__notnull_impl_notref) _Deref_post1_impl_(__cap_impl(size)) _Post_valid_impl_) +#define _Deref_post_opt_valid_cap_(size) _SAL1_1_Source_(_Deref_post_opt_valid_cap_, (size), _Deref_post1_impl_(__maybenull_impl_notref) _Deref_post1_impl_(__cap_impl(size)) _Post_valid_impl_) +#define _Deref_post_valid_bytecap_(size) _SAL1_1_Source_(_Deref_post_valid_bytecap_, (size), _Deref_post1_impl_(__notnull_impl_notref) _Deref_post1_impl_(__bytecap_impl(size)) _Post_valid_impl_) +#define _Deref_post_opt_valid_bytecap_(size) _SAL1_1_Source_(_Deref_post_opt_valid_bytecap_, (size), _Deref_post1_impl_(__maybenull_impl_notref) _Deref_post1_impl_(__bytecap_impl(size)) _Post_valid_impl_) + +#define _Deref_post_valid_cap_c_(size) _SAL1_1_Source_(_Deref_post_valid_cap_c_, (size), _Deref_post1_impl_(__notnull_impl_notref) _Deref_post1_impl_(__cap_c_impl(size)) _Post_valid_impl_) +#define _Deref_post_opt_valid_cap_c_(size) _SAL1_1_Source_(_Deref_post_opt_valid_cap_c_, (size), _Deref_post1_impl_(__maybenull_impl_notref) _Deref_post1_impl_(__cap_c_impl(size)) _Post_valid_impl_) +#define _Deref_post_valid_bytecap_c_(size) _SAL1_1_Source_(_Deref_post_valid_bytecap_c_, (size), _Deref_post1_impl_(__notnull_impl_notref) _Deref_post1_impl_(__bytecap_c_impl(size)) _Post_valid_impl_) +#define _Deref_post_opt_valid_bytecap_c_(size) _SAL1_1_Source_(_Deref_post_opt_valid_bytecap_c_, (size), _Deref_post1_impl_(__maybenull_impl_notref) _Deref_post1_impl_(__bytecap_c_impl(size)) _Post_valid_impl_) + +#define _Deref_post_valid_cap_x_(size) _SAL1_1_Source_(_Deref_post_valid_cap_x_, (size), _Deref_post1_impl_(__notnull_impl_notref) _Deref_post1_impl_(__cap_x_impl(size)) _Post_valid_impl_) +#define _Deref_post_opt_valid_cap_x_(size) _SAL1_1_Source_(_Deref_post_opt_valid_cap_x_, (size), _Deref_post1_impl_(__maybenull_impl_notref) _Deref_post1_impl_(__cap_x_impl(size)) _Post_valid_impl_) +#define _Deref_post_valid_bytecap_x_(size) _SAL1_1_Source_(_Deref_post_valid_bytecap_x_, (size), _Deref_post1_impl_(__notnull_impl_notref) _Deref_post1_impl_(__bytecap_x_impl(size)) _Post_valid_impl_) +#define _Deref_post_opt_valid_bytecap_x_(size) _SAL1_1_Source_(_Deref_post_opt_valid_bytecap_x_, (size), _Deref_post1_impl_(__maybenull_impl_notref) _Deref_post1_impl_(__bytecap_x_impl(size)) _Post_valid_impl_) + +// e.g. HRESULT HrAllocateZeroInitializedMemory( size_t cb, _Out_ _Deref_post_bytecount_(cb) void** ppv ); +// valid buffer extent is described by another parameter +#define _Deref_post_count_(size) _SAL1_1_Source_(_Deref_post_count_, (size), _Deref_post1_impl_(__notnull_impl_notref) _Deref_post1_impl_(__count_impl(size)) _Post_valid_impl_) +#define _Deref_post_opt_count_(size) _SAL1_1_Source_(_Deref_post_opt_count_, (size), _Deref_post1_impl_(__maybenull_impl_notref) _Deref_post1_impl_(__count_impl(size)) _Post_valid_impl_) +#define _Deref_post_bytecount_(size) _SAL1_1_Source_(_Deref_post_bytecount_, (size), _Deref_post1_impl_(__notnull_impl_notref) _Deref_post1_impl_(__bytecount_impl(size)) _Post_valid_impl_) +#define _Deref_post_opt_bytecount_(size) _SAL1_1_Source_(_Deref_post_opt_bytecount_, (size), _Deref_post1_impl_(__maybenull_impl_notref) _Deref_post1_impl_(__bytecount_impl(size)) _Post_valid_impl_) + +// buffer capacity is described by a constant expression +#define _Deref_post_count_c_(size) _SAL1_1_Source_(_Deref_post_count_c_, (size), _Deref_post1_impl_(__notnull_impl_notref) _Deref_post1_impl_(__count_c_impl(size)) _Post_valid_impl_) +#define _Deref_post_opt_count_c_(size) _SAL1_1_Source_(_Deref_post_opt_count_c_, (size), _Deref_post1_impl_(__maybenull_impl_notref) _Deref_post1_impl_(__count_c_impl(size)) _Post_valid_impl_) +#define _Deref_post_bytecount_c_(size) _SAL1_1_Source_(_Deref_post_bytecount_c_, (size), _Deref_post1_impl_(__notnull_impl_notref) _Deref_post1_impl_(__bytecount_c_impl(size)) _Post_valid_impl_) +#define _Deref_post_opt_bytecount_c_(size) _SAL1_1_Source_(_Deref_post_opt_bytecount_c_, (size), _Deref_post1_impl_(__maybenull_impl_notref) _Deref_post1_impl_(__bytecount_c_impl(size)) _Post_valid_impl_) + +// buffer capacity is described by a complex expression +#define _Deref_post_count_x_(size) _SAL1_1_Source_(_Deref_post_count_x_, (size), _Deref_post1_impl_(__notnull_impl_notref) _Deref_post1_impl_(__count_x_impl(size)) _Post_valid_impl_) +#define _Deref_post_opt_count_x_(size) _SAL1_1_Source_(_Deref_post_opt_count_x_, (size), _Deref_post1_impl_(__maybenull_impl_notref) _Deref_post1_impl_(__count_x_impl(size)) _Post_valid_impl_) +#define _Deref_post_bytecount_x_(size) _SAL1_1_Source_(_Deref_post_bytecount_x_, (size), _Deref_post1_impl_(__notnull_impl_notref) _Deref_post1_impl_(__bytecount_x_impl(size)) _Post_valid_impl_) +#define _Deref_post_opt_bytecount_x_(size) _SAL1_1_Source_(_Deref_post_opt_bytecount_x_, (size), _Deref_post1_impl_(__maybenull_impl_notref) _Deref_post1_impl_(__bytecount_x_impl(size)) _Post_valid_impl_) + +// e.g. void GetStrings( _Out_count_(cElems) _Deref_post_valid_ LPSTR const rgStr[], size_t cElems ); +#define _Deref_post_valid_ _SAL1_1_Source_(_Deref_post_valid_, (), _Deref_post1_impl_(__notnull_impl_notref) _Post_valid_impl_) +#define _Deref_post_opt_valid_ _SAL1_1_Source_(_Deref_post_opt_valid_, (), _Deref_post1_impl_(__maybenull_impl_notref) _Post_valid_impl_) + +#define _Deref_post_notnull_ _SAL1_1_Source_(_Deref_post_notnull_, (), _Deref_post1_impl_(__notnull_impl_notref)) +#define _Deref_post_maybenull_ _SAL1_1_Source_(_Deref_post_maybenull_, (), _Deref_post1_impl_(__maybenull_impl_notref)) +#define _Deref_post_null_ _SAL1_1_Source_(_Deref_post_null_, (), _Deref_post1_impl_(__null_impl_notref)) + +// +// _Deref_ret_ --- +// + +#define _Deref_ret_z_ _SAL1_1_Source_(_Deref_ret_z_, (), _Deref_ret1_impl_(__notnull_impl_notref) _Deref_ret1_impl_(__zterm_impl)) +#define _Deref_ret_opt_z_ _SAL1_1_Source_(_Deref_ret_opt_z_, (), _Deref_ret1_impl_(__maybenull_impl_notref) _Ret1_impl_(__zterm_impl)) + +// +// special _Deref_ --- +// +#define _Deref2_pre_readonly_ _SAL1_1_Source_(_Deref2_pre_readonly_, (), _Deref2_pre1_impl_(__readaccess_impl_notref)) + +// +// _Ret_ --- +// + +// e.g. _Ret_opt_valid_ LPSTR void* CloneSTR( _Pre_valid_ LPSTR src ); +#define _Ret_opt_valid_ _SAL1_1_Source_(_Ret_opt_valid_, (), _Ret1_impl_(__maybenull_impl_notref) _Ret_valid_impl_) +#define _Ret_opt_z_ _SAL1_1_Source_(_Ret_opt_z_, (), _Ret2_impl_(__maybenull_impl,__zterm_impl) _Ret_valid_impl_) + +// e.g. _Ret_opt_bytecap_(cb) void* AllocateMemory( size_t cb ); +// Buffer capacity is described by another parameter +#define _Ret_cap_(size) _SAL1_1_Source_(_Ret_cap_, (size), _Ret1_impl_(__notnull_impl_notref) _Ret1_impl_(__cap_impl(size))) +#define _Ret_opt_cap_(size) _SAL1_1_Source_(_Ret_opt_cap_, (size), _Ret1_impl_(__maybenull_impl_notref) _Ret1_impl_(__cap_impl(size))) +#define _Ret_bytecap_(size) _SAL1_1_Source_(_Ret_bytecap_, (size), _Ret1_impl_(__notnull_impl_notref) _Ret1_impl_(__bytecap_impl(size))) +#define _Ret_opt_bytecap_(size) _SAL1_1_Source_(_Ret_opt_bytecap_, (size), _Ret1_impl_(__maybenull_impl_notref) _Ret1_impl_(__bytecap_impl(size))) + +// Buffer capacity is described by a constant expression +#define _Ret_cap_c_(size) _SAL1_1_Source_(_Ret_cap_c_, (size), _Ret1_impl_(__notnull_impl_notref) _Ret1_impl_(__cap_c_impl(size))) +#define _Ret_opt_cap_c_(size) _SAL1_1_Source_(_Ret_opt_cap_c_, (size), _Ret1_impl_(__maybenull_impl_notref) _Ret1_impl_(__cap_c_impl(size))) +#define _Ret_bytecap_c_(size) _SAL1_1_Source_(_Ret_bytecap_c_, (size), _Ret1_impl_(__notnull_impl_notref) _Ret1_impl_(__bytecap_c_impl(size))) +#define _Ret_opt_bytecap_c_(size) _SAL1_1_Source_(_Ret_opt_bytecap_c_, (size), _Ret1_impl_(__maybenull_impl_notref) _Ret1_impl_(__bytecap_c_impl(size))) + +// Buffer capacity is described by a complex condition +#define _Ret_cap_x_(size) _SAL1_1_Source_(_Ret_cap_x_, (size), _Ret1_impl_(__notnull_impl_notref) _Ret1_impl_(__cap_x_impl(size))) +#define _Ret_opt_cap_x_(size) _SAL1_1_Source_(_Ret_opt_cap_x_, (size), _Ret1_impl_(__maybenull_impl_notref) _Ret1_impl_(__cap_x_impl(size))) +#define _Ret_bytecap_x_(size) _SAL1_1_Source_(_Ret_bytecap_x_, (size), _Ret1_impl_(__notnull_impl_notref) _Ret1_impl_(__bytecap_x_impl(size))) +#define _Ret_opt_bytecap_x_(size) _SAL1_1_Source_(_Ret_opt_bytecap_x_, (size), _Ret1_impl_(__maybenull_impl_notref) _Ret1_impl_(__bytecap_x_impl(size))) + +// return value is nullterminated and capacity is given by another parameter +#define _Ret_z_cap_(size) _SAL1_1_Source_(_Ret_z_cap_, (size), _Ret1_impl_(__notnull_impl_notref) _Ret2_impl_(__zterm_impl,__cap_impl(size)) _Ret_valid_impl_) +#define _Ret_opt_z_cap_(size) _SAL1_1_Source_(_Ret_opt_z_cap_, (size), _Ret1_impl_(__maybenull_impl_notref) _Ret2_impl_(__zterm_impl,__cap_impl(size)) _Ret_valid_impl_) +#define _Ret_z_bytecap_(size) _SAL1_1_Source_(_Ret_z_bytecap_, (size), _Ret1_impl_(__notnull_impl_notref) _Ret2_impl_(__zterm_impl,__bytecap_impl(size)) _Ret_valid_impl_) +#define _Ret_opt_z_bytecap_(size) _SAL1_1_Source_(_Ret_opt_z_bytecap_, (size), _Ret1_impl_(__maybenull_impl_notref) _Ret2_impl_(__zterm_impl,__bytecap_impl(size)) _Ret_valid_impl_) + +// e.g. _Ret_opt_bytecount_(cb) void* AllocateZeroInitializedMemory( size_t cb ); +// Valid Buffer extent is described by another parameter +#define _Ret_count_(size) _SAL1_1_Source_(_Ret_count_, (size), _Ret1_impl_(__notnull_impl_notref) _Ret1_impl_(__count_impl(size)) _Ret_valid_impl_) +#define _Ret_opt_count_(size) _SAL1_1_Source_(_Ret_opt_count_, (size), _Ret1_impl_(__maybenull_impl_notref) _Ret1_impl_(__count_impl(size)) _Ret_valid_impl_) +#define _Ret_bytecount_(size) _SAL1_1_Source_(_Ret_bytecount_, (size), _Ret1_impl_(__notnull_impl_notref) _Ret1_impl_(__bytecount_impl(size)) _Ret_valid_impl_) +#define _Ret_opt_bytecount_(size) _SAL1_1_Source_(_Ret_opt_bytecount_, (size), _Ret1_impl_(__maybenull_impl_notref) _Ret1_impl_(__bytecount_impl(size)) _Ret_valid_impl_) + +// Valid Buffer extent is described by a constant expression +#define _Ret_count_c_(size) _SAL1_1_Source_(_Ret_count_c_, (size), _Ret1_impl_(__notnull_impl_notref) _Ret1_impl_(__count_c_impl(size)) _Ret_valid_impl_) +#define _Ret_opt_count_c_(size) _SAL1_1_Source_(_Ret_opt_count_c_, (size), _Ret1_impl_(__maybenull_impl_notref) _Ret1_impl_(__count_c_impl(size)) _Ret_valid_impl_) +#define _Ret_bytecount_c_(size) _SAL1_1_Source_(_Ret_bytecount_c_, (size), _Ret1_impl_(__notnull_impl_notref) _Ret1_impl_(__bytecount_c_impl(size)) _Ret_valid_impl_) +#define _Ret_opt_bytecount_c_(size) _SAL1_1_Source_(_Ret_opt_bytecount_c_, (size), _Ret1_impl_(__maybenull_impl_notref) _Ret1_impl_(__bytecount_c_impl(size)) _Ret_valid_impl_) + +// Valid Buffer extent is described by a complex expression +#define _Ret_count_x_(size) _SAL1_1_Source_(_Ret_count_x_, (size), _Ret1_impl_(__notnull_impl_notref) _Ret1_impl_(__count_x_impl(size)) _Ret_valid_impl_) +#define _Ret_opt_count_x_(size) _SAL1_1_Source_(_Ret_opt_count_x_, (size), _Ret1_impl_(__maybenull_impl_notref) _Ret1_impl_(__count_x_impl(size)) _Ret_valid_impl_) +#define _Ret_bytecount_x_(size) _SAL1_1_Source_(_Ret_bytecount_x_, (size), _Ret1_impl_(__notnull_impl_notref) _Ret1_impl_(__bytecount_x_impl(size)) _Ret_valid_impl_) +#define _Ret_opt_bytecount_x_(size) _SAL1_1_Source_(_Ret_opt_bytecount_x_, (size), _Ret1_impl_(__maybenull_impl_notref) _Ret1_impl_(__bytecount_x_impl(size)) _Ret_valid_impl_) + +// return value is nullterminated and length is given by another parameter +#define _Ret_z_count_(size) _SAL1_1_Source_(_Ret_z_count_, (size), _Ret1_impl_(__notnull_impl_notref) _Ret2_impl_(__zterm_impl,__count_impl(size)) _Ret_valid_impl_) +#define _Ret_opt_z_count_(size) _SAL1_1_Source_(_Ret_opt_z_count_, (size), _Ret1_impl_(__maybenull_impl_notref) _Ret2_impl_(__zterm_impl,__count_impl(size)) _Ret_valid_impl_) +#define _Ret_z_bytecount_(size) _SAL1_1_Source_(_Ret_z_bytecount_, (size), _Ret1_impl_(__notnull_impl_notref) _Ret2_impl_(__zterm_impl,__bytecount_impl(size)) _Ret_valid_impl_) +#define _Ret_opt_z_bytecount_(size) _SAL1_1_Source_(_Ret_opt_z_bytecount_, (size), _Ret1_impl_(__maybenull_impl_notref) _Ret2_impl_(__zterm_impl,__bytecount_impl(size)) _Ret_valid_impl_) + + +// _Pre_ annotations --- +#define _Pre_opt_z_ _SAL1_1_Source_(_Pre_opt_z_, (), _Pre1_impl_(__maybenull_impl_notref) _Pre1_impl_(__zterm_impl) _Pre_valid_impl_) + +// restrict access rights +#define _Pre_readonly_ _SAL1_1_Source_(_Pre_readonly_, (), _Pre1_impl_(__readaccess_impl_notref)) +#define _Pre_writeonly_ _SAL1_1_Source_(_Pre_writeonly_, (), _Pre1_impl_(__writeaccess_impl_notref)) + +// e.g. void FreeMemory( _Pre_bytecap_(cb) _Post_ptr_invalid_ void* pv, size_t cb ); +// buffer capacity described by another parameter +#define _Pre_cap_(size) _SAL1_1_Source_(_Pre_cap_, (size), _Pre1_impl_(__notnull_impl_notref) _Pre1_impl_(__cap_impl(size))) +#define _Pre_opt_cap_(size) _SAL1_1_Source_(_Pre_opt_cap_, (size), _Pre1_impl_(__maybenull_impl_notref) _Pre1_impl_(__cap_impl(size))) +#define _Pre_bytecap_(size) _SAL1_1_Source_(_Pre_bytecap_, (size), _Pre1_impl_(__notnull_impl_notref) _Pre1_impl_(__bytecap_impl(size))) +#define _Pre_opt_bytecap_(size) _SAL1_1_Source_(_Pre_opt_bytecap_, (size), _Pre1_impl_(__maybenull_impl_notref) _Pre1_impl_(__bytecap_impl(size))) + +// buffer capacity described by a constant expression +#define _Pre_cap_c_(size) _SAL1_1_Source_(_Pre_cap_c_, (size), _Pre1_impl_(__notnull_impl_notref) _Pre1_impl_(__cap_c_impl(size))) +#define _Pre_opt_cap_c_(size) _SAL1_1_Source_(_Pre_opt_cap_c_, (size), _Pre1_impl_(__maybenull_impl_notref) _Pre1_impl_(__cap_c_impl(size))) +#define _Pre_bytecap_c_(size) _SAL1_1_Source_(_Pre_bytecap_c_, (size), _Pre1_impl_(__notnull_impl_notref) _Pre1_impl_(__bytecap_c_impl(size))) +#define _Pre_opt_bytecap_c_(size) _SAL1_1_Source_(_Pre_opt_bytecap_c_, (size), _Pre1_impl_(__maybenull_impl_notref) _Pre1_impl_(__bytecap_c_impl(size))) +#define _Pre_cap_c_one_ _SAL1_1_Source_(_Pre_cap_c_one_, (), _Pre1_impl_(__notnull_impl_notref) _Pre1_impl_(__cap_c_one_notref_impl)) +#define _Pre_opt_cap_c_one_ _SAL1_1_Source_(_Pre_opt_cap_c_one_, (), _Pre1_impl_(__maybenull_impl_notref) _Pre1_impl_(__cap_c_one_notref_impl)) + +// buffer capacity is described by another parameter multiplied by a constant expression +#define _Pre_cap_m_(mult,size) _SAL1_1_Source_(_Pre_cap_m_, (mult,size), _Pre1_impl_(__notnull_impl_notref) _Pre1_impl_(__mult_impl(mult,size))) +#define _Pre_opt_cap_m_(mult,size) _SAL1_1_Source_(_Pre_opt_cap_m_, (mult,size), _Pre1_impl_(__maybenull_impl_notref) _Pre1_impl_(__mult_impl(mult,size))) + +// buffer capacity described by size of other buffer, only used by dangerous legacy APIs +// e.g. int strcpy(_Pre_cap_for_(src) char* dst, const char* src); +#define _Pre_cap_for_(param) _SAL1_1_Source_(_Pre_cap_for_, (param), _Pre1_impl_(__notnull_impl_notref) _Pre1_impl_(__cap_for_impl(param))) +#define _Pre_opt_cap_for_(param) _SAL1_1_Source_(_Pre_opt_cap_for_, (param), _Pre1_impl_(__maybenull_impl_notref) _Pre1_impl_(__cap_for_impl(param))) + +// buffer capacity described by a complex condition +#define _Pre_cap_x_(size) _SAL1_1_Source_(_Pre_cap_x_, (size), _Pre1_impl_(__notnull_impl_notref) _Pre1_impl_(__cap_x_impl(size))) +#define _Pre_opt_cap_x_(size) _SAL1_1_Source_(_Pre_opt_cap_x_, (size), _Pre1_impl_(__maybenull_impl_notref) _Pre1_impl_(__cap_x_impl(size))) +#define _Pre_bytecap_x_(size) _SAL1_1_Source_(_Pre_bytecap_x_, (size), _Pre1_impl_(__notnull_impl_notref) _Pre1_impl_(__bytecap_x_impl(size))) +#define _Pre_opt_bytecap_x_(size) _SAL1_1_Source_(_Pre_opt_bytecap_x_, (size), _Pre1_impl_(__maybenull_impl_notref) _Pre1_impl_(__bytecap_x_impl(size))) + +// buffer capacity described by the difference to another pointer parameter +#define _Pre_ptrdiff_cap_(ptr) _SAL1_1_Source_(_Pre_ptrdiff_cap_, (ptr), _Pre1_impl_(__notnull_impl_notref) _Pre1_impl_(__cap_x_impl(__ptrdiff(ptr)))) +#define _Pre_opt_ptrdiff_cap_(ptr) _SAL1_1_Source_(_Pre_opt_ptrdiff_cap_, (ptr), _Pre1_impl_(__maybenull_impl_notref) _Pre1_impl_(__cap_x_impl(__ptrdiff(ptr)))) + +// e.g. void AppendStr( _Pre_z_ const char* szFrom, _Pre_z_cap_(cchTo) _Post_z_ char* szTo, size_t cchTo ); +#define _Pre_z_cap_(size) _SAL1_1_Source_(_Pre_z_cap_, (size), _Pre1_impl_(__notnull_impl_notref) _Pre2_impl_(__zterm_impl,__cap_impl(size)) _Pre_valid_impl_) +#define _Pre_opt_z_cap_(size) _SAL1_1_Source_(_Pre_opt_z_cap_, (size), _Pre1_impl_(__maybenull_impl_notref) _Pre2_impl_(__zterm_impl,__cap_impl(size)) _Pre_valid_impl_) +#define _Pre_z_bytecap_(size) _SAL1_1_Source_(_Pre_z_bytecap_, (size), _Pre1_impl_(__notnull_impl_notref) _Pre2_impl_(__zterm_impl,__bytecap_impl(size)) _Pre_valid_impl_) +#define _Pre_opt_z_bytecap_(size) _SAL1_1_Source_(_Pre_opt_z_bytecap_, (size), _Pre1_impl_(__maybenull_impl_notref) _Pre2_impl_(__zterm_impl,__bytecap_impl(size)) _Pre_valid_impl_) + +#define _Pre_z_cap_c_(size) _SAL1_1_Source_(_Pre_z_cap_c_, (size), _Pre1_impl_(__notnull_impl_notref) _Pre2_impl_(__zterm_impl,__cap_c_impl(size)) _Pre_valid_impl_) +#define _Pre_opt_z_cap_c_(size) _SAL1_1_Source_(_Pre_opt_z_cap_c_, (size), _Pre1_impl_(__maybenull_impl_notref) _Pre2_impl_(__zterm_impl,__cap_c_impl(size)) _Pre_valid_impl_) +#define _Pre_z_bytecap_c_(size) _SAL1_1_Source_(_Pre_z_bytecap_c_, (size), _Pre1_impl_(__notnull_impl_notref) _Pre2_impl_(__zterm_impl,__bytecap_c_impl(size)) _Pre_valid_impl_) +#define _Pre_opt_z_bytecap_c_(size) _SAL1_1_Source_(_Pre_opt_z_bytecap_c_, (size), _Pre1_impl_(__maybenull_impl_notref) _Pre2_impl_(__zterm_impl,__bytecap_c_impl(size)) _Pre_valid_impl_) + +#define _Pre_z_cap_x_(size) _SAL1_1_Source_(_Pre_z_cap_x_, (size), _Pre1_impl_(__notnull_impl_notref) _Pre2_impl_(__zterm_impl,__cap_x_impl(size)) _Pre_valid_impl_) +#define _Pre_opt_z_cap_x_(size) _SAL1_1_Source_(_Pre_opt_z_cap_x_, (size), _Pre1_impl_(__maybenull_impl_notref) _Pre2_impl_(__zterm_impl,__cap_x_impl(size)) _Pre_valid_impl_) +#define _Pre_z_bytecap_x_(size) _SAL1_1_Source_(_Pre_z_bytecap_x_, (size), _Pre1_impl_(__notnull_impl_notref) _Pre2_impl_(__zterm_impl,__bytecap_x_impl(size)) _Pre_valid_impl_) +#define _Pre_opt_z_bytecap_x_(size) _SAL1_1_Source_(_Pre_opt_z_bytecap_x_, (size), _Pre1_impl_(__maybenull_impl_notref) _Pre2_impl_(__zterm_impl,__bytecap_x_impl(size)) _Pre_valid_impl_) + +// known capacity and valid but unknown readable extent +#define _Pre_valid_cap_(size) _SAL1_1_Source_(_Pre_valid_cap_, (size), _Pre1_impl_(__notnull_impl_notref) _Pre1_impl_(__cap_impl(size)) _Pre_valid_impl_) +#define _Pre_opt_valid_cap_(size) _SAL1_1_Source_(_Pre_opt_valid_cap_, (size), _Pre1_impl_(__maybenull_impl_notref) _Pre1_impl_(__cap_impl(size)) _Pre_valid_impl_) +#define _Pre_valid_bytecap_(size) _SAL1_1_Source_(_Pre_valid_bytecap_, (size), _Pre1_impl_(__notnull_impl_notref) _Pre1_impl_(__bytecap_impl(size)) _Pre_valid_impl_) +#define _Pre_opt_valid_bytecap_(size) _SAL1_1_Source_(_Pre_opt_valid_bytecap_, (size), _Pre1_impl_(__maybenull_impl_notref) _Pre1_impl_(__bytecap_impl(size)) _Pre_valid_impl_) + +#define _Pre_valid_cap_c_(size) _SAL1_1_Source_(_Pre_valid_cap_c_, (size), _Pre1_impl_(__notnull_impl_notref) _Pre1_impl_(__cap_c_impl(size)) _Pre_valid_impl_) +#define _Pre_opt_valid_cap_c_(size) _SAL1_1_Source_(_Pre_opt_valid_cap_c_, (size), _Pre1_impl_(__maybenull_impl_notref) _Pre1_impl_(__cap_c_impl(size)) _Pre_valid_impl_) +#define _Pre_valid_bytecap_c_(size) _SAL1_1_Source_(_Pre_valid_bytecap_c_, (size), _Pre1_impl_(__notnull_impl_notref) _Pre1_impl_(__bytecap_c_impl(size)) _Pre_valid_impl_) +#define _Pre_opt_valid_bytecap_c_(size) _SAL1_1_Source_(_Pre_opt_valid_bytecap_c_, (size), _Pre1_impl_(__maybenull_impl_notref) _Pre1_impl_(__bytecap_c_impl(size)) _Pre_valid_impl_) + +#define _Pre_valid_cap_x_(size) _SAL1_1_Source_(_Pre_valid_cap_x_, (size), _Pre1_impl_(__notnull_impl_notref) _Pre1_impl_(__cap_x_impl(size)) _Pre_valid_impl_) +#define _Pre_opt_valid_cap_x_(size) _SAL1_1_Source_(_Pre_opt_valid_cap_x_, (size), _Pre1_impl_(__maybenull_impl_notref) _Pre1_impl_(__cap_x_impl(size)) _Pre_valid_impl_) +#define _Pre_valid_bytecap_x_(size) _SAL1_1_Source_(_Pre_valid_bytecap_x_, (size), _Pre1_impl_(__notnull_impl_notref) _Pre1_impl_(__bytecap_x_impl(size)) _Pre_valid_impl_) +#define _Pre_opt_valid_bytecap_x_(size) _SAL1_1_Source_(_Pre_opt_valid_bytecap_x_, (size), _Pre1_impl_(__maybenull_impl_notref) _Pre1_impl_(__bytecap_x_impl(size)) _Pre_valid_impl_) + +// e.g. void AppendCharRange( _Pre_count_(cchFrom) const char* rgFrom, size_t cchFrom, _Out_z_cap_(cchTo) char* szTo, size_t cchTo ); +// Valid buffer extent described by another parameter +#define _Pre_count_(size) _SAL1_1_Source_(_Pre_count_, (size), _Pre1_impl_(__notnull_impl_notref) _Pre1_impl_(__count_impl(size)) _Pre_valid_impl_) +#define _Pre_opt_count_(size) _SAL1_1_Source_(_Pre_opt_count_, (size), _Pre1_impl_(__maybenull_impl_notref) _Pre1_impl_(__count_impl(size)) _Pre_valid_impl_) +#define _Pre_bytecount_(size) _SAL1_1_Source_(_Pre_bytecount_, (size), _Pre1_impl_(__notnull_impl_notref) _Pre1_impl_(__bytecount_impl(size)) _Pre_valid_impl_) +#define _Pre_opt_bytecount_(size) _SAL1_1_Source_(_Pre_opt_bytecount_, (size), _Pre1_impl_(__maybenull_impl_notref) _Pre1_impl_(__bytecount_impl(size)) _Pre_valid_impl_) + +// Valid buffer extent described by a constant expression +#define _Pre_count_c_(size) _SAL1_1_Source_(_Pre_count_c_, (size), _Pre1_impl_(__notnull_impl_notref) _Pre1_impl_(__count_c_impl(size)) _Pre_valid_impl_) +#define _Pre_opt_count_c_(size) _SAL1_1_Source_(_Pre_opt_count_c_, (size), _Pre1_impl_(__maybenull_impl_notref) _Pre1_impl_(__count_c_impl(size)) _Pre_valid_impl_) +#define _Pre_bytecount_c_(size) _SAL1_1_Source_(_Pre_bytecount_c_, (size), _Pre1_impl_(__notnull_impl_notref) _Pre1_impl_(__bytecount_c_impl(size)) _Pre_valid_impl_) +#define _Pre_opt_bytecount_c_(size) _SAL1_1_Source_(_Pre_opt_bytecount_c_, (size), _Pre1_impl_(__maybenull_impl_notref) _Pre1_impl_(__bytecount_c_impl(size)) _Pre_valid_impl_) + +// Valid buffer extent described by a complex expression +#define _Pre_count_x_(size) _SAL1_1_Source_(_Pre_count_x_, (size), _Pre1_impl_(__notnull_impl_notref) _Pre1_impl_(__count_x_impl(size)) _Pre_valid_impl_) +#define _Pre_opt_count_x_(size) _SAL1_1_Source_(_Pre_opt_count_x_, (size), _Pre1_impl_(__maybenull_impl_notref) _Pre1_impl_(__count_x_impl(size)) _Pre_valid_impl_) +#define _Pre_bytecount_x_(size) _SAL1_1_Source_(_Pre_bytecount_x_, (size), _Pre1_impl_(__notnull_impl_notref) _Pre1_impl_(__bytecount_x_impl(size)) _Pre_valid_impl_) +#define _Pre_opt_bytecount_x_(size) _SAL1_1_Source_(_Pre_opt_bytecount_x_, (size), _Pre1_impl_(__maybenull_impl_notref) _Pre1_impl_(__bytecount_x_impl(size)) _Pre_valid_impl_) + +// Valid buffer extent described by the difference to another pointer parameter +#define _Pre_ptrdiff_count_(ptr) _SAL1_1_Source_(_Pre_ptrdiff_count_, (ptr), _Pre1_impl_(__notnull_impl_notref) _Pre1_impl_(__count_x_impl(__ptrdiff(ptr))) _Pre_valid_impl_) +#define _Pre_opt_ptrdiff_count_(ptr) _SAL1_1_Source_(_Pre_opt_ptrdiff_count_, (ptr), _Pre1_impl_(__maybenull_impl_notref) _Pre1_impl_(__count_x_impl(__ptrdiff(ptr))) _Pre_valid_impl_) + + +// char * strncpy(_Out_cap_(_Count) _Post_maybez_ char * _Dest, _In_z_ const char * _Source, _In_ size_t _Count) +// buffer maybe zero-terminated after the call +#define _Post_maybez_ _SAL1_1_Source_(_Post_maybez_, (), _Post1_impl_(__maybezterm_impl)) + +// e.g. SIZE_T HeapSize( _In_ HANDLE hHeap, DWORD dwFlags, _Pre_notnull_ _Post_bytecap_(return) LPCVOID lpMem ); +#define _Post_cap_(size) _SAL1_1_Source_(_Post_cap_, (size), _Post1_impl_(__cap_impl(size))) +#define _Post_bytecap_(size) _SAL1_1_Source_(_Post_bytecap_, (size), _Post1_impl_(__bytecap_impl(size))) + +// e.g. int strlen( _In_z_ _Post_count_(return+1) const char* sz ); +#define _Post_count_(size) _SAL1_1_Source_(_Post_count_, (size), _Post1_impl_(__count_impl(size)) _Post_valid_impl_) +#define _Post_bytecount_(size) _SAL1_1_Source_(_Post_bytecount_, (size), _Post1_impl_(__bytecount_impl(size)) _Post_valid_impl_) +#define _Post_count_c_(size) _SAL1_1_Source_(_Post_count_c_, (size), _Post1_impl_(__count_c_impl(size)) _Post_valid_impl_) +#define _Post_bytecount_c_(size) _SAL1_1_Source_(_Post_bytecount_c_, (size), _Post1_impl_(__bytecount_c_impl(size)) _Post_valid_impl_) +#define _Post_count_x_(size) _SAL1_1_Source_(_Post_count_x_, (size), _Post1_impl_(__count_x_impl(size)) _Post_valid_impl_) +#define _Post_bytecount_x_(size) _SAL1_1_Source_(_Post_bytecount_x_, (size), _Post1_impl_(__bytecount_x_impl(size)) _Post_valid_impl_) + +// e.g. size_t CopyStr( _In_z_ const char* szFrom, _Pre_cap_(cch) _Post_z_count_(return+1) char* szFrom, size_t cchFrom ); +#define _Post_z_count_(size) _SAL1_1_Source_(_Post_z_count_, (size), _Post2_impl_(__zterm_impl,__count_impl(size)) _Post_valid_impl_) +#define _Post_z_bytecount_(size) _SAL1_1_Source_(_Post_z_bytecount_, (size), _Post2_impl_(__zterm_impl,__bytecount_impl(size)) _Post_valid_impl_) +#define _Post_z_count_c_(size) _SAL1_1_Source_(_Post_z_count_c_, (size), _Post2_impl_(__zterm_impl,__count_c_impl(size)) _Post_valid_impl_) +#define _Post_z_bytecount_c_(size) _SAL1_1_Source_(_Post_z_bytecount_c_, (size), _Post2_impl_(__zterm_impl,__bytecount_c_impl(size)) _Post_valid_impl_) +#define _Post_z_count_x_(size) _SAL1_1_Source_(_Post_z_count_x_, (size), _Post2_impl_(__zterm_impl,__count_x_impl(size)) _Post_valid_impl_) +#define _Post_z_bytecount_x_(size) _SAL1_1_Source_(_Post_z_bytecount_x_, (size), _Post2_impl_(__zterm_impl,__bytecount_x_impl(size)) _Post_valid_impl_) + +// +// _Prepost_ --- +// +// describing conditions that hold before and after the function call + +#define _Prepost_opt_z_ _SAL1_1_Source_(_Prepost_opt_z_, (), _Pre_opt_z_ _Post_z_) + +#define _Prepost_count_(size) _SAL1_1_Source_(_Prepost_count_, (size), _Pre_count_(size) _Post_count_(size)) +#define _Prepost_opt_count_(size) _SAL1_1_Source_(_Prepost_opt_count_, (size), _Pre_opt_count_(size) _Post_count_(size)) +#define _Prepost_bytecount_(size) _SAL1_1_Source_(_Prepost_bytecount_, (size), _Pre_bytecount_(size) _Post_bytecount_(size)) +#define _Prepost_opt_bytecount_(size) _SAL1_1_Source_(_Prepost_opt_bytecount_, (size), _Pre_opt_bytecount_(size) _Post_bytecount_(size)) +#define _Prepost_count_c_(size) _SAL1_1_Source_(_Prepost_count_c_, (size), _Pre_count_c_(size) _Post_count_c_(size)) +#define _Prepost_opt_count_c_(size) _SAL1_1_Source_(_Prepost_opt_count_c_, (size), _Pre_opt_count_c_(size) _Post_count_c_(size)) +#define _Prepost_bytecount_c_(size) _SAL1_1_Source_(_Prepost_bytecount_c_, (size), _Pre_bytecount_c_(size) _Post_bytecount_c_(size)) +#define _Prepost_opt_bytecount_c_(size) _SAL1_1_Source_(_Prepost_opt_bytecount_c_, (size), _Pre_opt_bytecount_c_(size) _Post_bytecount_c_(size)) +#define _Prepost_count_x_(size) _SAL1_1_Source_(_Prepost_count_x_, (size), _Pre_count_x_(size) _Post_count_x_(size)) +#define _Prepost_opt_count_x_(size) _SAL1_1_Source_(_Prepost_opt_count_x_, (size), _Pre_opt_count_x_(size) _Post_count_x_(size)) +#define _Prepost_bytecount_x_(size) _SAL1_1_Source_(_Prepost_bytecount_x_, (size), _Pre_bytecount_x_(size) _Post_bytecount_x_(size)) +#define _Prepost_opt_bytecount_x_(size) _SAL1_1_Source_(_Prepost_opt_bytecount_x_, (size), _Pre_opt_bytecount_x_(size) _Post_bytecount_x_(size)) + +#define _Prepost_valid_ _SAL1_1_Source_(_Prepost_valid_, (), _Pre_valid_ _Post_valid_) +#define _Prepost_opt_valid_ _SAL1_1_Source_(_Prepost_opt_valid_, (), _Pre_opt_valid_ _Post_valid_) + +// +// _Deref_ --- +// +// short version for _Deref_pre_ _Deref_post_ +// describing conditions for array elements or dereferenced pointer parameters that hold before and after the call + +#define _Deref_prepost_z_ _SAL1_1_Source_(_Deref_prepost_z_, (), _Deref_pre_z_ _Deref_post_z_) +#define _Deref_prepost_opt_z_ _SAL1_1_Source_(_Deref_prepost_opt_z_, (), _Deref_pre_opt_z_ _Deref_post_opt_z_) + +#define _Deref_prepost_cap_(size) _SAL1_1_Source_(_Deref_prepost_cap_, (size), _Deref_pre_cap_(size) _Deref_post_cap_(size)) +#define _Deref_prepost_opt_cap_(size) _SAL1_1_Source_(_Deref_prepost_opt_cap_, (size), _Deref_pre_opt_cap_(size) _Deref_post_opt_cap_(size)) +#define _Deref_prepost_bytecap_(size) _SAL1_1_Source_(_Deref_prepost_bytecap_, (size), _Deref_pre_bytecap_(size) _Deref_post_bytecap_(size)) +#define _Deref_prepost_opt_bytecap_(size) _SAL1_1_Source_(_Deref_prepost_opt_bytecap_, (size), _Deref_pre_opt_bytecap_(size) _Deref_post_opt_bytecap_(size)) + +#define _Deref_prepost_cap_x_(size) _SAL1_1_Source_(_Deref_prepost_cap_x_, (size), _Deref_pre_cap_x_(size) _Deref_post_cap_x_(size)) +#define _Deref_prepost_opt_cap_x_(size) _SAL1_1_Source_(_Deref_prepost_opt_cap_x_, (size), _Deref_pre_opt_cap_x_(size) _Deref_post_opt_cap_x_(size)) +#define _Deref_prepost_bytecap_x_(size) _SAL1_1_Source_(_Deref_prepost_bytecap_x_, (size), _Deref_pre_bytecap_x_(size) _Deref_post_bytecap_x_(size)) +#define _Deref_prepost_opt_bytecap_x_(size) _SAL1_1_Source_(_Deref_prepost_opt_bytecap_x_, (size), _Deref_pre_opt_bytecap_x_(size) _Deref_post_opt_bytecap_x_(size)) + +#define _Deref_prepost_z_cap_(size) _SAL1_1_Source_(_Deref_prepost_z_cap_, (size), _Deref_pre_z_cap_(size) _Deref_post_z_cap_(size)) +#define _Deref_prepost_opt_z_cap_(size) _SAL1_1_Source_(_Deref_prepost_opt_z_cap_, (size), _Deref_pre_opt_z_cap_(size) _Deref_post_opt_z_cap_(size)) +#define _Deref_prepost_z_bytecap_(size) _SAL1_1_Source_(_Deref_prepost_z_bytecap_, (size), _Deref_pre_z_bytecap_(size) _Deref_post_z_bytecap_(size)) +#define _Deref_prepost_opt_z_bytecap_(size) _SAL1_1_Source_(_Deref_prepost_opt_z_bytecap_, (size), _Deref_pre_opt_z_bytecap_(size) _Deref_post_opt_z_bytecap_(size)) + +#define _Deref_prepost_valid_cap_(size) _SAL1_1_Source_(_Deref_prepost_valid_cap_, (size), _Deref_pre_valid_cap_(size) _Deref_post_valid_cap_(size)) +#define _Deref_prepost_opt_valid_cap_(size) _SAL1_1_Source_(_Deref_prepost_opt_valid_cap_, (size), _Deref_pre_opt_valid_cap_(size) _Deref_post_opt_valid_cap_(size)) +#define _Deref_prepost_valid_bytecap_(size) _SAL1_1_Source_(_Deref_prepost_valid_bytecap_, (size), _Deref_pre_valid_bytecap_(size) _Deref_post_valid_bytecap_(size)) +#define _Deref_prepost_opt_valid_bytecap_(size) _SAL1_1_Source_(_Deref_prepost_opt_valid_bytecap_, (size), _Deref_pre_opt_valid_bytecap_(size) _Deref_post_opt_valid_bytecap_(size)) + +#define _Deref_prepost_valid_cap_x_(size) _SAL1_1_Source_(_Deref_prepost_valid_cap_x_, (size), _Deref_pre_valid_cap_x_(size) _Deref_post_valid_cap_x_(size)) +#define _Deref_prepost_opt_valid_cap_x_(size) _SAL1_1_Source_(_Deref_prepost_opt_valid_cap_x_, (size), _Deref_pre_opt_valid_cap_x_(size) _Deref_post_opt_valid_cap_x_(size)) +#define _Deref_prepost_valid_bytecap_x_(size) _SAL1_1_Source_(_Deref_prepost_valid_bytecap_x_, (size), _Deref_pre_valid_bytecap_x_(size) _Deref_post_valid_bytecap_x_(size)) +#define _Deref_prepost_opt_valid_bytecap_x_(size) _SAL1_1_Source_(_Deref_prepost_opt_valid_bytecap_x_, (size), _Deref_pre_opt_valid_bytecap_x_(size) _Deref_post_opt_valid_bytecap_x_(size)) + +#define _Deref_prepost_count_(size) _SAL1_1_Source_(_Deref_prepost_count_, (size), _Deref_pre_count_(size) _Deref_post_count_(size)) +#define _Deref_prepost_opt_count_(size) _SAL1_1_Source_(_Deref_prepost_opt_count_, (size), _Deref_pre_opt_count_(size) _Deref_post_opt_count_(size)) +#define _Deref_prepost_bytecount_(size) _SAL1_1_Source_(_Deref_prepost_bytecount_, (size), _Deref_pre_bytecount_(size) _Deref_post_bytecount_(size)) +#define _Deref_prepost_opt_bytecount_(size) _SAL1_1_Source_(_Deref_prepost_opt_bytecount_, (size), _Deref_pre_opt_bytecount_(size) _Deref_post_opt_bytecount_(size)) + +#define _Deref_prepost_count_x_(size) _SAL1_1_Source_(_Deref_prepost_count_x_, (size), _Deref_pre_count_x_(size) _Deref_post_count_x_(size)) +#define _Deref_prepost_opt_count_x_(size) _SAL1_1_Source_(_Deref_prepost_opt_count_x_, (size), _Deref_pre_opt_count_x_(size) _Deref_post_opt_count_x_(size)) +#define _Deref_prepost_bytecount_x_(size) _SAL1_1_Source_(_Deref_prepost_bytecount_x_, (size), _Deref_pre_bytecount_x_(size) _Deref_post_bytecount_x_(size)) +#define _Deref_prepost_opt_bytecount_x_(size) _SAL1_1_Source_(_Deref_prepost_opt_bytecount_x_, (size), _Deref_pre_opt_bytecount_x_(size) _Deref_post_opt_bytecount_x_(size)) + +#define _Deref_prepost_valid_ _SAL1_1_Source_(_Deref_prepost_valid_, (), _Deref_pre_valid_ _Deref_post_valid_) +#define _Deref_prepost_opt_valid_ _SAL1_1_Source_(_Deref_prepost_opt_valid_, (), _Deref_pre_opt_valid_ _Deref_post_opt_valid_) + +// +// _Deref_ +// +// used with references to arrays + +#define _Deref_out_z_cap_c_(size) _SAL1_1_Source_(_Deref_out_z_cap_c_, (size), _Deref_pre_cap_c_(size) _Deref_post_z_) +#define _Deref_inout_z_cap_c_(size) _SAL1_1_Source_(_Deref_inout_z_cap_c_, (size), _Deref_pre_z_cap_c_(size) _Deref_post_z_) +#define _Deref_out_z_bytecap_c_(size) _SAL1_1_Source_(_Deref_out_z_bytecap_c_, (size), _Deref_pre_bytecap_c_(size) _Deref_post_z_) +#define _Deref_inout_z_bytecap_c_(size) _SAL1_1_Source_(_Deref_inout_z_bytecap_c_, (size), _Deref_pre_z_bytecap_c_(size) _Deref_post_z_) +#define _Deref_inout_z_ _SAL1_1_Source_(_Deref_inout_z_, (), _Deref_prepost_z_) + +// #pragma endregion Input Buffer SAL 1 compatibility macros + + +//============================================================================ +// Implementation Layer: +//============================================================================ + + +// Naming conventions: +// A symbol the begins with _SA_ is for the machinery of creating any +// annotations; many of those come from sourceannotations.h in the case +// of attributes. + +// A symbol that ends with _impl is the very lowest level macro. It is +// not required to be a legal standalone annotation, and in the case +// of attribute annotations, usually is not. (In the case of some declspec +// annotations, it might be, but it should not be assumed so.) Those +// symols will be used in the _PreN..., _PostN... and _RetN... annotations +// to build up more complete annotations. + +// A symbol ending in _impl_ is reserved to the implementation as well, +// but it does form a complete annotation; usually they are used to build +// up even higher level annotations. + + +#if _USE_ATTRIBUTES_FOR_SAL || _USE_DECLSPECS_FOR_SAL // [ +// Sharable "_impl" macros: these can be shared between the various annotation +// forms but are part of the implementation of the macros. These are collected +// here to assure that only necessary differences in the annotations +// exist. + +#define _Always_impl_(annos) _Group_(annos _SAL_nop_impl_) _On_failure_impl_(annos _SAL_nop_impl_) +#define _Bound_impl_ _SA_annotes0(SAL_bound) +#define _Field_range_impl_(min,max) _Range_impl_(min,max) +#define _Literal_impl_ _SA_annotes1(SAL_constant, __yes) +#define _Maybenull_impl_ _SA_annotes1(SAL_null, __maybe) +#define _Maybevalid_impl_ _SA_annotes1(SAL_valid, __maybe) +#define _Must_inspect_impl_ _Post_impl_ _SA_annotes0(SAL_mustInspect) +#define _Notliteral_impl_ _SA_annotes1(SAL_constant, __no) +#define _Notnull_impl_ _SA_annotes1(SAL_null, __no) +#define _Notvalid_impl_ _SA_annotes1(SAL_valid, __no) +#define _NullNull_terminated_impl_ _Group_(_SA_annotes1(SAL_nullTerminated, __yes) _SA_annotes1(SAL_readableTo,inexpressibleCount("NullNull terminated string"))) +#define _Null_impl_ _SA_annotes1(SAL_null, __yes) +#define _Null_terminated_impl_ _SA_annotes1(SAL_nullTerminated, __yes) +#define _Out_impl_ _Pre1_impl_(__notnull_impl_notref) _Pre1_impl_(__cap_c_one_notref_impl) _Post_valid_impl_ +#define _Out_opt_impl_ _Pre1_impl_(__maybenull_impl_notref) _Pre1_impl_(__cap_c_one_notref_impl) _Post_valid_impl_ +#define _Points_to_data_impl_ _At_(*_Curr_, _SA_annotes1(SAL_mayBePointer, __no)) +#define _Post_satisfies_impl_(cond) _Post_impl_ _Satisfies_impl_(cond) +#define _Post_valid_impl_ _Post1_impl_(__valid_impl) +#define _Pre_satisfies_impl_(cond) _Pre_impl_ _Satisfies_impl_(cond) +#define _Pre_valid_impl_ _Pre1_impl_(__valid_impl) +#define _Range_impl_(min,max) _SA_annotes2(SAL_range, min, max) +#define _Readable_bytes_impl_(size) _SA_annotes1(SAL_readableTo, byteCount(size)) +#define _Readable_elements_impl_(size) _SA_annotes1(SAL_readableTo, elementCount(size)) +#define _Ret_valid_impl_ _Ret1_impl_(__valid_impl) +#define _Satisfies_impl_(cond) _SA_annotes1(SAL_satisfies, cond) +#define _Valid_impl_ _SA_annotes1(SAL_valid, __yes) +#define _Writable_bytes_impl_(size) _SA_annotes1(SAL_writableTo, byteCount(size)) +#define _Writable_elements_impl_(size) _SA_annotes1(SAL_writableTo, elementCount(size)) + +#define _In_range_impl_(min,max) _Pre_impl_ _Range_impl_(min,max) +#define _Out_range_impl_(min,max) _Post_impl_ _Range_impl_(min,max) +#define _Ret_range_impl_(min,max) _Post_impl_ _Range_impl_(min,max) +#define _Deref_in_range_impl_(min,max) _Deref_pre_impl_ _Range_impl_(min,max) +#define _Deref_out_range_impl_(min,max) _Deref_post_impl_ _Range_impl_(min,max) +#define _Deref_ret_range_impl_(min,max) _Deref_post_impl_ _Range_impl_(min,max) + +#define _Deref_pre_impl_ _Pre_impl_ _Notref_impl_ _Deref_impl_ +#define _Deref_post_impl_ _Post_impl_ _Notref_impl_ _Deref_impl_ + +// The following are for the implementation machinery, and are not +// suitable for annotating general code. +// We're tying to phase this out, someday. The parser quotes the param. +#define __AuToQuOtE _SA_annotes0(SAL_AuToQuOtE) + +// Normally the parser does some simple type checking of annotation params, +// defer that check to the plugin. +#define __deferTypecheck _SA_annotes0(SAL_deferTypecheck) + +#define _SA_SPECSTRIZE( x ) #x +#define _SAL_nop_impl_ /* nothing */ +#define __nop_impl(x) x +#endif + + +#if _USE_ATTRIBUTES_FOR_SAL // [ + +// Using attributes for sal + +#include "codeanalysis\sourceannotations.h" + + +#define _SA_annotes0(n) [SAL_annotes(Name=#n)] +#define _SA_annotes1(n,pp1) [SAL_annotes(Name=#n, p1=_SA_SPECSTRIZE(pp1))] +#define _SA_annotes2(n,pp1,pp2) [SAL_annotes(Name=#n, p1=_SA_SPECSTRIZE(pp1), p2=_SA_SPECSTRIZE(pp2))] +#define _SA_annotes3(n,pp1,pp2,pp3) [SAL_annotes(Name=#n, p1=_SA_SPECSTRIZE(pp1), p2=_SA_SPECSTRIZE(pp2), p3=_SA_SPECSTRIZE(pp3))] + +#define _Pre_impl_ [SAL_pre] +#define _Post_impl_ [SAL_post] +#define _Deref_impl_ [SAL_deref] +#define _Notref_impl_ [SAL_notref] + + +// Declare a function to be an annotation or primop (respectively). +// Done this way so that they don't appear in the regular compiler's +// namespace. +#define __ANNOTATION(fun) _SA_annotes0(SAL_annotation) void __SA_##fun; +#define __PRIMOP(type, fun) _SA_annotes0(SAL_primop) type __SA_##fun; +#define __QUALIFIER(fun) _SA_annotes0(SAL_qualifier) void __SA_##fun; + +// Benign declspec needed here for WindowsPREfast +#define __In_impl_ [SA_Pre(Valid=SA_Yes)] [SA_Pre(Deref=1, Notref=1, Access=SA_Read)] __declspec("SAL_pre SAL_valid") + +#elif _USE_DECLSPECS_FOR_SAL // ][ + +// Using declspecs for sal + +#define _SA_annotes0(n) __declspec(#n) +#define _SA_annotes1(n,pp1) __declspec(#n "(" _SA_SPECSTRIZE(pp1) ")" ) +#define _SA_annotes2(n,pp1,pp2) __declspec(#n "(" _SA_SPECSTRIZE(pp1) "," _SA_SPECSTRIZE(pp2) ")") +#define _SA_annotes3(n,pp1,pp2,pp3) __declspec(#n "(" _SA_SPECSTRIZE(pp1) "," _SA_SPECSTRIZE(pp2) "," _SA_SPECSTRIZE(pp3) ")") + +#define _Pre_impl_ _SA_annotes0(SAL_pre) +#define _Post_impl_ _SA_annotes0(SAL_post) +#define _Deref_impl_ _SA_annotes0(SAL_deref) +#define _Notref_impl_ _SA_annotes0(SAL_notref) + +// Declare a function to be an annotation or primop (respectively). +// Done this way so that they don't appear in the regular compiler's +// namespace. +#define __ANNOTATION(fun) _SA_annotes0(SAL_annotation) void __SA_##fun + +#define __PRIMOP(type, fun) _SA_annotes0(SAL_primop) type __SA_##fun + +#define __QUALIFIER(fun) _SA_annotes0(SAL_qualifier) void __SA_##fun; + +#define __In_impl_ _Pre_impl_ _SA_annotes0(SAL_valid) _Pre_impl_ _Deref_impl_ _Notref_impl_ _SA_annotes0(SAL_readonly) + +#else // ][ + +// Using "nothing" for sal + +#define _SA_annotes0(n) +#define _SA_annotes1(n,pp1) +#define _SA_annotes2(n,pp1,pp2) +#define _SA_annotes3(n,pp1,pp2,pp3) + +#define __ANNOTATION(fun) +#define __PRIMOP(type, fun) +#define __QUALIFIER(type, fun) + +#endif // ] + +#if _USE_ATTRIBUTES_FOR_SAL || _USE_DECLSPECS_FOR_SAL // [ + +// Declare annotations that need to be declared. +__ANNOTATION(SAL_useHeader(void)); +__ANNOTATION(SAL_bound(void)); +__ANNOTATION(SAL_allocator(void)); //??? resolve with PFD +__ANNOTATION(SAL_file_parser(__AuToQuOtE __In_impl_ char *, __In_impl_ char *)); +__ANNOTATION(SAL_source_code_content(__In_impl_ char *)); +__ANNOTATION(SAL_analysisHint(__AuToQuOtE __In_impl_ char *)); +__ANNOTATION(SAL_untrusted_data_source(__AuToQuOtE __In_impl_ char *)); +__ANNOTATION(SAL_untrusted_data_source_this(__AuToQuOtE __In_impl_ char *)); +__ANNOTATION(SAL_validated(__AuToQuOtE __In_impl_ char *)); +__ANNOTATION(SAL_validated_this(__AuToQuOtE __In_impl_ char *)); +__ANNOTATION(SAL_encoded(void)); +__ANNOTATION(SAL_adt(__AuToQuOtE __In_impl_ char *, __AuToQuOtE __In_impl_ char *)); +__ANNOTATION(SAL_add_adt_property(__AuToQuOtE __In_impl_ char *, __AuToQuOtE __In_impl_ char *)); +__ANNOTATION(SAL_remove_adt_property(__AuToQuOtE __In_impl_ char *, __AuToQuOtE __In_impl_ char *)); +__ANNOTATION(SAL_transfer_adt_property_from(__AuToQuOtE __In_impl_ char *)); +__ANNOTATION(SAL_post_type(__AuToQuOtE __In_impl_ char *)); +__ANNOTATION(SAL_volatile(void)); +__ANNOTATION(SAL_nonvolatile(void)); +__ANNOTATION(SAL_entrypoint(__AuToQuOtE __In_impl_ char *, __AuToQuOtE __In_impl_ char *)); +__ANNOTATION(SAL_blocksOn(__In_impl_ void*)); +__ANNOTATION(SAL_mustInspect(void)); + +// Only appears in model files, but needs to be declared. +__ANNOTATION(SAL_TypeName(__AuToQuOtE __In_impl_ char *)); + +// To be declared well-known soon. +__ANNOTATION(SAL_interlocked(void);) + +#pragma warning (suppress: 28227 28241) +__ANNOTATION(SAL_name(__In_impl_ char *, __In_impl_ char *, __In_impl_ char *);) + +__PRIMOP(char *, _Macro_value_(__In_impl_ char *)); +__PRIMOP(int, _Macro_defined_(__In_impl_ char *)); +__PRIMOP(char *, _Strstr_(__In_impl_ char *, __In_impl_ char *)); + +#endif // ] + +#if _USE_ATTRIBUTES_FOR_SAL // [ + +#define _Check_return_impl_ [SA_Post(MustCheck=SA_Yes)] + +#define _Success_impl_(expr) [SA_Success(Condition=#expr)] +#define _On_failure_impl_(annos) [SAL_context(p1="SAL_failed")] _Group_(_Post_impl_ _Group_(annos _SAL_nop_impl_)) + +#define _Printf_format_string_impl_ [SA_FormatString(Style="printf")] +#define _Scanf_format_string_impl_ [SA_FormatString(Style="scanf")] +#define _Scanf_s_format_string_impl_ [SA_FormatString(Style="scanf_s")] + +#define _In_bound_impl_ [SA_PreBound(Deref=0)] +#define _Out_bound_impl_ [SA_PostBound(Deref=0)] +#define _Ret_bound_impl_ [SA_PostBound(Deref=0)] +#define _Deref_in_bound_impl_ [SA_PreBound(Deref=1)] +#define _Deref_out_bound_impl_ [SA_PostBound(Deref=1)] +#define _Deref_ret_bound_impl_ [SA_PostBound(Deref=1)] + +#define __valid_impl Valid=SA_Yes +#define __maybevalid_impl Valid=SA_Maybe +#define __notvalid_impl Valid=SA_No + +#define __null_impl Null=SA_Yes +#define __maybenull_impl Null=SA_Maybe +#define __notnull_impl Null=SA_No + +#define __null_impl_notref Null=SA_Yes,Notref=1 +#define __maybenull_impl_notref Null=SA_Maybe,Notref=1 +#define __notnull_impl_notref Null=SA_No,Notref=1 + +#define __zterm_impl NullTerminated=SA_Yes +#define __maybezterm_impl NullTerminated=SA_Maybe +#define __maybzterm_impl NullTerminated=SA_Maybe +#define __notzterm_impl NullTerminated=SA_No + +#define __readaccess_impl Access=SA_Read +#define __writeaccess_impl Access=SA_Write +#define __allaccess_impl Access=SA_ReadWrite + +#define __readaccess_impl_notref Access=SA_Read,Notref=1 +#define __writeaccess_impl_notref Access=SA_Write,Notref=1 +#define __allaccess_impl_notref Access=SA_ReadWrite,Notref=1 + +#if _MSC_VER >= 1610 /*IFSTRIP=IGN*/ // [ + +// For SAL2, we need to expect general expressions. + +#define __cap_impl(size) WritableElements="\n"#size +#define __bytecap_impl(size) WritableBytes="\n"#size +#define __bytecount_impl(size) ValidBytes="\n"#size +#define __count_impl(size) ValidElements="\n"#size + +#else // ][ + +#define __cap_impl(size) WritableElements=#size +#define __bytecap_impl(size) WritableBytes=#size +#define __bytecount_impl(size) ValidBytes=#size +#define __count_impl(size) ValidElements=#size + +#endif // ] + +#define __cap_c_impl(size) WritableElementsConst=size +#define __cap_c_one_notref_impl WritableElementsConst=1,Notref=1 +#define __cap_for_impl(param) WritableElementsLength=#param +#define __cap_x_impl(size) WritableElements="\n@"#size + +#define __bytecap_c_impl(size) WritableBytesConst=size +#define __bytecap_x_impl(size) WritableBytes="\n@"#size + +#define __mult_impl(mult,size) __cap_impl((mult)*(size)) + +#define __count_c_impl(size) ValidElementsConst=size +#define __count_x_impl(size) ValidElements="\n@"#size + +#define __bytecount_c_impl(size) ValidBytesConst=size +#define __bytecount_x_impl(size) ValidBytes="\n@"#size + + +#define _At_impl_(target, annos) [SAL_at(p1=#target)] _Group_(annos) +#define _At_buffer_impl_(target, iter, bound, annos) [SAL_at_buffer(p1=#target, p2=#iter, p3=#bound)] _Group_(annos) +#define _When_impl_(expr, annos) [SAL_when(p1=#expr)] _Group_(annos) + +#define _Group_impl_(annos) [SAL_begin] annos [SAL_end] +#define _GrouP_impl_(annos) [SAL_BEGIN] annos [SAL_END] + +#define _Use_decl_anno_impl_ _SA_annotes0(SAL_useHeader) // this is a special case! + +#define _Pre1_impl_(p1) [SA_Pre(p1)] +#define _Pre2_impl_(p1,p2) [SA_Pre(p1,p2)] +#define _Pre3_impl_(p1,p2,p3) [SA_Pre(p1,p2,p3)] + +#define _Post1_impl_(p1) [SA_Post(p1)] +#define _Post2_impl_(p1,p2) [SA_Post(p1,p2)] +#define _Post3_impl_(p1,p2,p3) [SA_Post(p1,p2,p3)] + +#define _Ret1_impl_(p1) [SA_Post(p1)] +#define _Ret2_impl_(p1,p2) [SA_Post(p1,p2)] +#define _Ret3_impl_(p1,p2,p3) [SA_Post(p1,p2,p3)] + +#define _Deref_pre1_impl_(p1) [SA_Pre(Deref=1,p1)] +#define _Deref_pre2_impl_(p1,p2) [SA_Pre(Deref=1,p1,p2)] +#define _Deref_pre3_impl_(p1,p2,p3) [SA_Pre(Deref=1,p1,p2,p3)] + + +#define _Deref_post1_impl_(p1) [SA_Post(Deref=1,p1)] +#define _Deref_post2_impl_(p1,p2) [SA_Post(Deref=1,p1,p2)] +#define _Deref_post3_impl_(p1,p2,p3) [SA_Post(Deref=1,p1,p2,p3)] + +#define _Deref_ret1_impl_(p1) [SA_Post(Deref=1,p1)] +#define _Deref_ret2_impl_(p1,p2) [SA_Post(Deref=1,p1,p2)] +#define _Deref_ret3_impl_(p1,p2,p3) [SA_Post(Deref=1,p1,p2,p3)] + +#define _Deref2_pre1_impl_(p1) [SA_Pre(Deref=2,Notref=1,p1)] +#define _Deref2_post1_impl_(p1) [SA_Post(Deref=2,Notref=1,p1)] +#define _Deref2_ret1_impl_(p1) [SA_Post(Deref=2,Notref=1,p1)] + +// Obsolete -- may be needed for transition to attributes. +#define __inner_typefix(ctype) [SAL_typefix(p1=_SA_SPECSTRIZE(ctype))] +#define __inner_exceptthat [SAL_except] + + +#elif _USE_DECLSPECS_FOR_SAL // ][ + +#define _Check_return_impl_ __post _SA_annotes0(SAL_checkReturn) + +#define _Success_impl_(expr) _SA_annotes1(SAL_success, expr) +#define _On_failure_impl_(annos) _SA_annotes1(SAL_context, SAL_failed) _Group_(_Post_impl_ _Group_(_SAL_nop_impl_ annos)) + +#define _Printf_format_string_impl_ _SA_annotes1(SAL_IsFormatString, "printf") +#define _Scanf_format_string_impl_ _SA_annotes1(SAL_IsFormatString, "scanf") +#define _Scanf_s_format_string_impl_ _SA_annotes1(SAL_IsFormatString, "scanf_s") + +#define _In_bound_impl_ _Pre_impl_ _Bound_impl_ +#define _Out_bound_impl_ _Post_impl_ _Bound_impl_ +#define _Ret_bound_impl_ _Post_impl_ _Bound_impl_ +#define _Deref_in_bound_impl_ _Deref_pre_impl_ _Bound_impl_ +#define _Deref_out_bound_impl_ _Deref_post_impl_ _Bound_impl_ +#define _Deref_ret_bound_impl_ _Deref_post_impl_ _Bound_impl_ + + +#define __null_impl _SA_annotes0(SAL_null) // _SA_annotes1(SAL_null, __yes) +#define __notnull_impl _SA_annotes0(SAL_notnull) // _SA_annotes1(SAL_null, __no) +#define __maybenull_impl _SA_annotes0(SAL_maybenull) // _SA_annotes1(SAL_null, __maybe) + +#define __valid_impl _SA_annotes0(SAL_valid) // _SA_annotes1(SAL_valid, __yes) +#define __notvalid_impl _SA_annotes0(SAL_notvalid) // _SA_annotes1(SAL_valid, __no) +#define __maybevalid_impl _SA_annotes0(SAL_maybevalid) // _SA_annotes1(SAL_valid, __maybe) + +#define __null_impl_notref _Notref_ _Null_impl_ +#define __maybenull_impl_notref _Notref_ _Maybenull_impl_ +#define __notnull_impl_notref _Notref_ _Notnull_impl_ + +#define __zterm_impl _SA_annotes1(SAL_nullTerminated, __yes) +#define __maybezterm_impl _SA_annotes1(SAL_nullTerminated, __maybe) +#define __maybzterm_impl _SA_annotes1(SAL_nullTerminated, __maybe) +#define __notzterm_impl _SA_annotes1(SAL_nullTerminated, __no) + +#define __readaccess_impl _SA_annotes1(SAL_access, 0x1) +#define __writeaccess_impl _SA_annotes1(SAL_access, 0x2) +#define __allaccess_impl _SA_annotes1(SAL_access, 0x3) + +#define __readaccess_impl_notref _Notref_ _SA_annotes1(SAL_access, 0x1) +#define __writeaccess_impl_notref _Notref_ _SA_annotes1(SAL_access, 0x2) +#define __allaccess_impl_notref _Notref_ _SA_annotes1(SAL_access, 0x3) + +#define __cap_impl(size) _SA_annotes1(SAL_writableTo,elementCount(size)) +#define __cap_c_impl(size) _SA_annotes1(SAL_writableTo,elementCount(size)) +#define __cap_c_one_notref_impl _Notref_ _SA_annotes1(SAL_writableTo,elementCount(1)) +#define __cap_for_impl(param) _SA_annotes1(SAL_writableTo,inexpressibleCount(sizeof(param))) +#define __cap_x_impl(size) _SA_annotes1(SAL_writableTo,inexpressibleCount(#size)) + +#define __bytecap_impl(size) _SA_annotes1(SAL_writableTo,byteCount(size)) +#define __bytecap_c_impl(size) _SA_annotes1(SAL_writableTo,byteCount(size)) +#define __bytecap_x_impl(size) _SA_annotes1(SAL_writableTo,inexpressibleCount(#size)) + +#define __mult_impl(mult,size) _SA_annotes1(SAL_writableTo,(mult)*(size)) + +#define __count_impl(size) _SA_annotes1(SAL_readableTo,elementCount(size)) +#define __count_c_impl(size) _SA_annotes1(SAL_readableTo,elementCount(size)) +#define __count_x_impl(size) _SA_annotes1(SAL_readableTo,inexpressibleCount(#size)) + +#define __bytecount_impl(size) _SA_annotes1(SAL_readableTo,byteCount(size)) +#define __bytecount_c_impl(size) _SA_annotes1(SAL_readableTo,byteCount(size)) +#define __bytecount_x_impl(size) _SA_annotes1(SAL_readableTo,inexpressibleCount(#size)) + +#define _At_impl_(target, annos) _SA_annotes0(SAL_at(target)) _Group_(annos) +#define _At_buffer_impl_(target, iter, bound, annos) _SA_annotes3(SAL_at_buffer, target, iter, bound) _Group_(annos) +#define _Group_impl_(annos) _SA_annotes0(SAL_begin) annos _SA_annotes0(SAL_end) +#define _GrouP_impl_(annos) _SA_annotes0(SAL_BEGIN) annos _SA_annotes0(SAL_END) +#define _When_impl_(expr, annos) _SA_annotes0(SAL_when(expr)) _Group_(annos) + +#define _Use_decl_anno_impl_ __declspec("SAL_useHeader()") // this is a special case! + +#define _Pre1_impl_(p1) _Pre_impl_ p1 +#define _Pre2_impl_(p1,p2) _Pre_impl_ p1 _Pre_impl_ p2 +#define _Pre3_impl_(p1,p2,p3) _Pre_impl_ p1 _Pre_impl_ p2 _Pre_impl_ p3 + +#define _Post1_impl_(p1) _Post_impl_ p1 +#define _Post2_impl_(p1,p2) _Post_impl_ p1 _Post_impl_ p2 +#define _Post3_impl_(p1,p2,p3) _Post_impl_ p1 _Post_impl_ p2 _Post_impl_ p3 + +#define _Ret1_impl_(p1) _Post_impl_ p1 +#define _Ret2_impl_(p1,p2) _Post_impl_ p1 _Post_impl_ p2 +#define _Ret3_impl_(p1,p2,p3) _Post_impl_ p1 _Post_impl_ p2 _Post_impl_ p3 + +#define _Deref_pre1_impl_(p1) _Deref_pre_impl_ p1 +#define _Deref_pre2_impl_(p1,p2) _Deref_pre_impl_ p1 _Deref_pre_impl_ p2 +#define _Deref_pre3_impl_(p1,p2,p3) _Deref_pre_impl_ p1 _Deref_pre_impl_ p2 _Deref_pre_impl_ p3 + +#define _Deref_post1_impl_(p1) _Deref_post_impl_ p1 +#define _Deref_post2_impl_(p1,p2) _Deref_post_impl_ p1 _Deref_post_impl_ p2 +#define _Deref_post3_impl_(p1,p2,p3) _Deref_post_impl_ p1 _Deref_post_impl_ p2 _Deref_post_impl_ p3 + +#define _Deref_ret1_impl_(p1) _Deref_post_impl_ p1 +#define _Deref_ret2_impl_(p1,p2) _Deref_post_impl_ p1 _Deref_post_impl_ p2 +#define _Deref_ret3_impl_(p1,p2,p3) _Deref_post_impl_ p1 _Deref_post_impl_ p2 _Deref_post_impl_ p3 + +#define _Deref2_pre1_impl_(p1) _Deref_pre_impl_ _Notref_impl_ _Deref_impl_ p1 +#define _Deref2_post1_impl_(p1) _Deref_post_impl_ _Notref_impl_ _Deref_impl_ p1 +#define _Deref2_ret1_impl_(p1) _Deref_post_impl_ _Notref_impl_ _Deref_impl_ p1 + +#define __inner_typefix(ctype) _SA_annotes1(SAL_typefix, ctype) +#define __inner_exceptthat _SA_annotes0(SAL_except) + +#elif defined(_MSC_EXTENSIONS) && !defined( MIDL_PASS ) && !defined(__midl) && !defined(RC_INVOKED) && defined(_PFT_VER) && _MSC_VER >= 1400 /*IFSTRIP=IGN*/ // ][ + +// minimum attribute expansion for foreground build + +#pragma push_macro( "SA" ) +#pragma push_macro( "REPEATABLE" ) + +#ifdef __cplusplus // [ +#define SA( id ) id +#define REPEATABLE [repeatable] +#else // !__cplusplus // ][ +#define SA( id ) SA_##id +#define REPEATABLE +#endif // !__cplusplus // ] + +REPEATABLE +[source_annotation_attribute( SA( Parameter ) )] +struct __P_impl +{ +#ifdef __cplusplus // [ + __P_impl(); +#endif // ] + int __d_; +}; +typedef struct __P_impl __P_impl; + +REPEATABLE +[source_annotation_attribute( SA( ReturnValue ) )] +struct __R_impl +{ +#ifdef __cplusplus // [ + __R_impl(); +#endif // ] + int __d_; +}; +typedef struct __R_impl __R_impl; + +[source_annotation_attribute( SA( Method ) )] +struct __M_ +{ +#ifdef __cplusplus // [ + __M_(); +#endif // ] + int __d_; +}; +typedef struct __M_ __M_; + +[source_annotation_attribute( SA( All ) )] +struct __A_ +{ +#ifdef __cplusplus // [ + __A_(); +#endif // ] + int __d_; +}; +typedef struct __A_ __A_; + +[source_annotation_attribute( SA( Field ) )] +struct __F_ +{ +#ifdef __cplusplus // [ + __F_(); +#endif // ] + int __d_; +}; +typedef struct __F_ __F_; + +#pragma pop_macro( "REPEATABLE" ) +#pragma pop_macro( "SA" ) + + +#define _SAL_nop_impl_ + +#define _At_impl_(target, annos) [__A_(__d_=0)] +#define _At_buffer_impl_(target, iter, bound, annos) [__A_(__d_=0)] +#define _When_impl_(expr, annos) annos +#define _Group_impl_(annos) annos +#define _GrouP_impl_(annos) annos +#define _Use_decl_anno_impl_ [__M_(__d_=0)] + +#define _Points_to_data_impl_ [__P_impl(__d_=0)] +#define _Literal_impl_ [__P_impl(__d_=0)] +#define _Notliteral_impl_ [__P_impl(__d_=0)] + +#define _Pre_valid_impl_ [__P_impl(__d_=0)] +#define _Post_valid_impl_ [__P_impl(__d_=0)] +#define _Ret_valid_impl_ [__R_impl(__d_=0)] + +#define _Check_return_impl_ [__R_impl(__d_=0)] +#define _Must_inspect_impl_ [__R_impl(__d_=0)] + +#define _Success_impl_(expr) [__M_(__d_=0)] +#define _On_failure_impl_(expr) [__M_(__d_=0)] +#define _Always_impl_(expr) [__M_(__d_=0)] + +#define _Printf_format_string_impl_ [__P_impl(__d_=0)] +#define _Scanf_format_string_impl_ [__P_impl(__d_=0)] +#define _Scanf_s_format_string_impl_ [__P_impl(__d_=0)] + +#define _Raises_SEH_exception_impl_ [__M_(__d_=0)] +#define _Maybe_raises_SEH_exception_impl_ [__M_(__d_=0)] + +#define _In_bound_impl_ [__P_impl(__d_=0)] +#define _Out_bound_impl_ [__P_impl(__d_=0)] +#define _Ret_bound_impl_ [__R_impl(__d_=0)] +#define _Deref_in_bound_impl_ [__P_impl(__d_=0)] +#define _Deref_out_bound_impl_ [__P_impl(__d_=0)] +#define _Deref_ret_bound_impl_ [__R_impl(__d_=0)] + +#define _Range_impl_(min,max) [__P_impl(__d_=0)] +#define _In_range_impl_(min,max) [__P_impl(__d_=0)] +#define _Out_range_impl_(min,max) [__P_impl(__d_=0)] +#define _Ret_range_impl_(min,max) [__R_impl(__d_=0)] +#define _Deref_in_range_impl_(min,max) [__P_impl(__d_=0)] +#define _Deref_out_range_impl_(min,max) [__P_impl(__d_=0)] +#define _Deref_ret_range_impl_(min,max) [__R_impl(__d_=0)] + +#define _Field_range_impl_(min,max) [__F_(__d_=0)] + +#define _Pre_satisfies_impl_(cond) [__A_(__d_=0)] +#define _Post_satisfies_impl_(cond) [__A_(__d_=0)] +#define _Satisfies_impl_(cond) [__A_(__d_=0)] + +#define _Null_impl_ [__A_(__d_=0)] +#define _Notnull_impl_ [__A_(__d_=0)] +#define _Maybenull_impl_ [__A_(__d_=0)] + +#define _Valid_impl_ [__A_(__d_=0)] +#define _Notvalid_impl_ [__A_(__d_=0)] +#define _Maybevalid_impl_ [__A_(__d_=0)] + +#define _Readable_bytes_impl_(size) [__A_(__d_=0)] +#define _Readable_elements_impl_(size) [__A_(__d_=0)] +#define _Writable_bytes_impl_(size) [__A_(__d_=0)] +#define _Writable_elements_impl_(size) [__A_(__d_=0)] + +#define _Null_terminated_impl_ [__A_(__d_=0)] +#define _NullNull_terminated_impl_ [__A_(__d_=0)] + +#define _Pre_impl_ [__P_impl(__d_=0)] +#define _Pre1_impl_(p1) [__P_impl(__d_=0)] +#define _Pre2_impl_(p1,p2) [__P_impl(__d_=0)] +#define _Pre3_impl_(p1,p2,p3) [__P_impl(__d_=0)] + +#define _Post_impl_ [__P_impl(__d_=0)] +#define _Post1_impl_(p1) [__P_impl(__d_=0)] +#define _Post2_impl_(p1,p2) [__P_impl(__d_=0)] +#define _Post3_impl_(p1,p2,p3) [__P_impl(__d_=0)] + +#define _Ret1_impl_(p1) [__R_impl(__d_=0)] +#define _Ret2_impl_(p1,p2) [__R_impl(__d_=0)] +#define _Ret3_impl_(p1,p2,p3) [__R_impl(__d_=0)] + +#define _Deref_pre1_impl_(p1) [__P_impl(__d_=0)] +#define _Deref_pre2_impl_(p1,p2) [__P_impl(__d_=0)] +#define _Deref_pre3_impl_(p1,p2,p3) [__P_impl(__d_=0)] + +#define _Deref_post1_impl_(p1) [__P_impl(__d_=0)] +#define _Deref_post2_impl_(p1,p2) [__P_impl(__d_=0)] +#define _Deref_post3_impl_(p1,p2,p3) [__P_impl(__d_=0)] + +#define _Deref_ret1_impl_(p1) [__R_impl(__d_=0)] +#define _Deref_ret2_impl_(p1,p2) [__R_impl(__d_=0)] +#define _Deref_ret3_impl_(p1,p2,p3) [__R_impl(__d_=0)] + +#define _Deref2_pre1_impl_(p1) //[__P_impl(__d_=0)] +#define _Deref2_post1_impl_(p1) //[__P_impl(__d_=0)] +#define _Deref2_ret1_impl_(p1) //[__P_impl(__d_=0)] + +#else // ][ + + +#define _SAL_nop_impl_ X + +#define _At_impl_(target, annos) +#define _When_impl_(expr, annos) +#define _Group_impl_(annos) +#define _GrouP_impl_(annos) +#define _At_buffer_impl_(target, iter, bound, annos) +#define _Use_decl_anno_impl_ +#define _Points_to_data_impl_ +#define _Literal_impl_ +#define _Notliteral_impl_ +#define _Notref_impl_ + +#define _Pre_valid_impl_ +#define _Post_valid_impl_ +#define _Ret_valid_impl_ + +#define _Check_return_impl_ +#define _Must_inspect_impl_ + +#define _Success_impl_(expr) +#define _On_failure_impl_(annos) +#define _Always_impl_(annos) + +#define _Printf_format_string_impl_ +#define _Scanf_format_string_impl_ +#define _Scanf_s_format_string_impl_ + +#define _In_bound_impl_ +#define _Out_bound_impl_ +#define _Ret_bound_impl_ +#define _Deref_in_bound_impl_ +#define _Deref_out_bound_impl_ +#define _Deref_ret_bound_impl_ + +#define _Range_impl_(min,max) +#define _In_range_impl_(min,max) +#define _Out_range_impl_(min,max) +#define _Ret_range_impl_(min,max) +#define _Deref_in_range_impl_(min,max) +#define _Deref_out_range_impl_(min,max) +#define _Deref_ret_range_impl_(min,max) + +#define _Satisfies_impl_(expr) +#define _Pre_satisfies_impl_(expr) +#define _Post_satisfies_impl_(expr) + +#define _Null_impl_ +#define _Notnull_impl_ +#define _Maybenull_impl_ + +#define _Valid_impl_ +#define _Notvalid_impl_ +#define _Maybevalid_impl_ + +#define _Field_range_impl_(min,max) + +#define _Pre_impl_ +#define _Pre1_impl_(p1) +#define _Pre2_impl_(p1,p2) +#define _Pre3_impl_(p1,p2,p3) + +#define _Post_impl_ +#define _Post1_impl_(p1) +#define _Post2_impl_(p1,p2) +#define _Post3_impl_(p1,p2,p3) + +#define _Ret1_impl_(p1) +#define _Ret2_impl_(p1,p2) +#define _Ret3_impl_(p1,p2,p3) + +#define _Deref_pre1_impl_(p1) +#define _Deref_pre2_impl_(p1,p2) +#define _Deref_pre3_impl_(p1,p2,p3) + +#define _Deref_post1_impl_(p1) +#define _Deref_post2_impl_(p1,p2) +#define _Deref_post3_impl_(p1,p2,p3) + +#define _Deref_ret1_impl_(p1) +#define _Deref_ret2_impl_(p1,p2) +#define _Deref_ret3_impl_(p1,p2,p3) + +#define _Deref2_pre1_impl_(p1) +#define _Deref2_post1_impl_(p1) +#define _Deref2_ret1_impl_(p1) + +#define _Readable_bytes_impl_(size) +#define _Readable_elements_impl_(size) +#define _Writable_bytes_impl_(size) +#define _Writable_elements_impl_(size) + +#define _Null_terminated_impl_ +#define _NullNull_terminated_impl_ + +// Obsolete -- may be needed for transition to attributes. +#define __inner_typefix(ctype) +#define __inner_exceptthat + +#endif // ] + +// This section contains the deprecated annotations + +/* + ------------------------------------------------------------------------------- + Introduction + + sal.h provides a set of annotations to describe how a function uses its + parameters - the assumptions it makes about them, and the guarantees it makes + upon finishing. + + Annotations may be placed before either a function parameter's type or its return + type, and describe the function's behavior regarding the parameter or return value. + There are two classes of annotations: buffer annotations and advanced annotations. + Buffer annotations describe how functions use their pointer parameters, and + advanced annotations either describe complex/unusual buffer behavior, or provide + additional information about a parameter that is not otherwise expressible. + + ------------------------------------------------------------------------------- + Buffer Annotations + + The most important annotations in sal.h provide a consistent way to annotate + buffer parameters or return values for a function. Each of these annotations describes + a single buffer (which could be a string, a fixed-length or variable-length array, + or just a pointer) that the function interacts with: where it is, how large it is, + how much is initialized, and what the function does with it. + + The appropriate macro for a given buffer can be constructed using the table below. + Just pick the appropriate values from each category, and combine them together + with a leading underscore. Some combinations of values do not make sense as buffer + annotations. Only meaningful annotations can be added to your code; for a list of + these, see the buffer annotation definitions section. + + Only a single buffer annotation should be used for each parameter. + + |------------|------------|---------|--------|----------|----------|---------------| + | Level | Usage | Size | Output | NullTerm | Optional | Parameters | + |------------|------------|---------|--------|----------|----------|---------------| + | <> | <> | <> | <> | _z | <> | <> | + | _deref | _in | _ecount | _full | _nz | _opt | (size) | + | _deref_opt | _out | _bcount | _part | | | (size,length) | + | | _inout | | | | | | + | | | | | | | | + |------------|------------|---------|--------|----------|----------|---------------| + + Level: Describes the buffer pointer's level of indirection from the parameter or + return value 'p'. + + <> : p is the buffer pointer. + _deref : *p is the buffer pointer. p must not be NULL. + _deref_opt : *p may be the buffer pointer. p may be NULL, in which case the rest of + the annotation is ignored. + + Usage: Describes how the function uses the buffer. + + <> : The buffer is not accessed. If used on the return value or with _deref, the + function will provide the buffer, and it will be uninitialized at exit. + Otherwise, the caller must provide the buffer. This should only be used + for alloc and free functions. + _in : The function will only read from the buffer. The caller must provide the + buffer and initialize it. Cannot be used with _deref. + _out : The function will only write to the buffer. If used on the return value or + with _deref, the function will provide the buffer and initialize it. + Otherwise, the caller must provide the buffer, and the function will + initialize it. + _inout : The function may freely read from and write to the buffer. The caller must + provide the buffer and initialize it. If used with _deref, the buffer may + be reallocated by the function. + + Size: Describes the total size of the buffer. This may be less than the space actually + allocated for the buffer, in which case it describes the accessible amount. + + <> : No buffer size is given. If the type specifies the buffer size (such as + with LPSTR and LPWSTR), that amount is used. Otherwise, the buffer is one + element long. Must be used with _in, _out, or _inout. + _ecount : The buffer size is an explicit element count. + _bcount : The buffer size is an explicit byte count. + + Output: Describes how much of the buffer will be initialized by the function. For + _inout buffers, this also describes how much is initialized at entry. Omit this + category for _in buffers; they must be fully initialized by the caller. + + <> : The type specifies how much is initialized. For instance, a function initializing + an LPWSTR must NULL-terminate the string. + _full : The function initializes the entire buffer. + _part : The function initializes part of the buffer, and explicitly indicates how much. + + NullTerm: States if the present of a '\0' marks the end of valid elements in the buffer. + _z : A '\0' indicated the end of the buffer + _nz : The buffer may not be null terminated and a '\0' does not indicate the end of the + buffer. + Optional: Describes if the buffer itself is optional. + + <> : The pointer to the buffer must not be NULL. + _opt : The pointer to the buffer might be NULL. It will be checked before being dereferenced. + + Parameters: Gives explicit counts for the size and length of the buffer. + + <> : There is no explicit count. Use when neither _ecount nor _bcount is used. + (size) : Only the buffer's total size is given. Use with _ecount or _bcount but not _part. + (size,length) : The buffer's total size and initialized length are given. Use with _ecount_part + and _bcount_part. + + ------------------------------------------------------------------------------- + Buffer Annotation Examples + + LWSTDAPI_(BOOL) StrToIntExA( + __in LPCSTR pszString, + DWORD dwFlags, + __out int *piRet -- A pointer whose dereference will be filled in. + ); + + void MyPaintingFunction( + __in HWND hwndControl, -- An initialized read-only parameter. + __in_opt HDC hdcOptional, -- An initialized read-only parameter that might be NULL. + __inout IPropertyStore *ppsStore -- An initialized parameter that may be freely used + -- and modified. + ); + + LWSTDAPI_(BOOL) PathCompactPathExA( + __out_ecount(cchMax) LPSTR pszOut, -- A string buffer with cch elements that will + -- be NULL terminated on exit. + __in LPCSTR pszSrc, + UINT cchMax, + DWORD dwFlags + ); + + HRESULT SHLocalAllocBytes( + size_t cb, + __deref_bcount(cb) T **ppv -- A pointer whose dereference will be set to an + -- uninitialized buffer with cb bytes. + ); + + __inout_bcount_full(cb) : A buffer with cb elements that is fully initialized at + entry and exit, and may be written to by this function. + + __out_ecount_part(count, *countOut) : A buffer with count elements that will be + partially initialized by this function. The function indicates how much it + initialized by setting *countOut. + + ------------------------------------------------------------------------------- + Advanced Annotations + + Advanced annotations describe behavior that is not expressible with the regular + buffer macros. These may be used either to annotate buffer parameters that involve + complex or conditional behavior, or to enrich existing annotations with additional + information. + + __success(expr) f : + indicates whether function f succeeded or not. If is true at exit, + all the function's guarantees (as given by other annotations) must hold. If + is false at exit, the caller should not expect any of the function's guarantees + to hold. If not used, the function must always satisfy its guarantees. Added + automatically to functions that indicate success in standard ways, such as by + returning an HRESULT. + + __nullterminated p : + Pointer p is a buffer that may be read or written up to and including the first + NULL character or pointer. May be used on typedefs, which marks valid (properly + initialized) instances of that type as being NULL-terminated. + + __nullnullterminated p : + Pointer p is a buffer that may be read or written up to and including the first + sequence of two NULL characters or pointers. May be used on typedefs, which marks + valid instances of that type as being double-NULL terminated. + + __reserved v : + Value v must be 0/NULL, reserved for future use. + + __checkReturn v : + Return value v must not be ignored by callers of this function. + + __typefix(ctype) v : + Value v should be treated as an instance of ctype, rather than its declared type. + + __override f : + Specify C#-style 'override' behaviour for overriding virtual methods. + + __callback f : + Function f can be used as a function pointer. + + __format_string p : + Pointer p is a string that contains % markers in the style of printf. + + __blocksOn(resource) f : + Function f blocks on the resource 'resource'. + + __fallthrough : + Annotates switch statement labels where fall-through is desired, to distinguish + from forgotten break statements. + + ------------------------------------------------------------------------------- + Advanced Annotation Examples + + __success(return != FALSE) LWSTDAPI_(BOOL) + PathCanonicalizeA(__out_ecount(MAX_PATH) LPSTR pszBuf, LPCSTR pszPath) : + pszBuf is only guaranteed to be NULL-terminated when TRUE is returned. + + typedef __nullterminated WCHAR* LPWSTR : Initialized LPWSTRs are NULL-terminated strings. + + __out_ecount(cch) __typefix(LPWSTR) void *psz : psz is a buffer parameter which will be + a NULL-terminated WCHAR string at exit, and which initially contains cch WCHARs. + + ------------------------------------------------------------------------------- +*/ + +#define __specstrings + +#ifdef __cplusplus // [ +#ifndef __nothrow // [ +# define __nothrow __declspec(nothrow) +#endif // ] +extern "C" { +#else // ][ +#ifndef __nothrow // [ +# define __nothrow +#endif // ] +#endif /* #ifdef __cplusplus */ // ] + + +/* + ------------------------------------------------------------------------------- + Helper Macro Definitions + + These express behavior common to many of the high-level annotations. + DO NOT USE THESE IN YOUR CODE. + ------------------------------------------------------------------------------- +*/ + +/* + The helper annotations are only understood by the compiler version used by + various defect detection tools. When the regular compiler is running, they + are defined into nothing, and do not affect the compiled code. +*/ + +#if !defined(__midl) && defined(_PREFAST_) // [ + + /* + In the primitive "SAL_*" annotations "SAL" stands for Standard + Annotation Language. These "SAL_*" annotations are the + primitives the compiler understands and high-level MACROs + will decompose into these primivates. + */ + + #define _SA_SPECSTRIZE( x ) #x + + /* + __null p + __notnull p + __maybenull p + + Annotates a pointer p. States that pointer p is null. Commonly used + in the negated form __notnull or the possibly null form __maybenull. + */ + +#ifndef PAL_STDCPP_COMPAT + #define __null _Null_impl_ + #define __notnull _Notnull_impl_ + #define __maybenull _Maybenull_impl_ +#endif // !PAL_STDCPP_COMPAT + + /* + __readonly l + __notreadonly l + __mabyereadonly l + + Annotates a location l. States that location l is not modified after + this point. If the annotation is placed on the precondition state of + a function, the restriction only applies until the postcondition state + of the function. __maybereadonly states that the annotated location + may be modified, whereas __notreadonly states that a location must be + modified. + */ + + #define __readonly _Pre1_impl_(__readaccess_impl) + #define __notreadonly _Pre1_impl_(__allaccess_impl) + #define __maybereadonly _Pre1_impl_(__readaccess_impl) + + /* + __valid v + __notvalid v + __maybevalid v + + Annotates any value v. States that the value satisfies all properties of + valid values of its type. For example, for a string buffer, valid means + that the buffer pointer is either NULL or points to a NULL-terminated string. + */ + + #define __valid _Valid_impl_ + #define __notvalid _Notvalid_impl_ + #define __maybevalid _Maybevalid_impl_ + + /* + __readableTo(extent) p + + Annotates a buffer pointer p. If the buffer can be read, extent describes + how much of the buffer is readable. For a reader of the buffer, this is + an explicit permission to read up to that amount, rather than a restriction to + read only up to it. + */ + + #define __readableTo(extent) _SA_annotes1(SAL_readableTo, extent) + + /* + + __elem_readableTo(size) + + Annotates a buffer pointer p as being readable to size elements. + */ + + #define __elem_readableTo(size) _SA_annotes1(SAL_readableTo, elementCount( size )) + + /* + __byte_readableTo(size) + + Annotates a buffer pointer p as being readable to size bytes. + */ + #define __byte_readableTo(size) _SA_annotes1(SAL_readableTo, byteCount(size)) + + /* + __writableTo(extent) p + + Annotates a buffer pointer p. If the buffer can be modified, extent + describes how much of the buffer is writable (usually the allocation + size). For a writer of the buffer, this is an explicit permission to + write up to that amount, rather than a restriction to write only up to it. + */ + #define __writableTo(size) _SA_annotes1(SAL_writableTo, size) + + /* + __elem_writableTo(size) + + Annotates a buffer pointer p as being writable to size elements. + */ + #define __elem_writableTo(size) _SA_annotes1(SAL_writableTo, elementCount( size )) + + /* + __byte_writableTo(size) + + Annotates a buffer pointer p as being writable to size bytes. + */ + #define __byte_writableTo(size) _SA_annotes1(SAL_writableTo, byteCount( size)) + + /* + __deref p + + Annotates a pointer p. The next annotation applies one dereference down + in the type. If readableTo(p, size) then the next annotation applies to + all elements *(p+i) for which i satisfies the size. If p is a pointer + to a struct, the next annotation applies to all fields of the struct. + */ + #define __deref _Deref_impl_ + + /* + __pre __next_annotation + + The next annotation applies in the precondition state + */ + #define __pre _Pre_impl_ + + /* + __post __next_annotation + + The next annotation applies in the postcondition state + */ + #define __post _Post_impl_ + + /* + __precond() + + When is true, the next annotation applies in the precondition state + (currently not enabled) + */ + #define __precond(expr) __pre + + /* + __postcond() + + When is true, the next annotation applies in the postcondition state + (currently not enabled) + */ + #define __postcond(expr) __post + + /* + __exceptthat + + Given a set of annotations Q containing __exceptthat maybeP, the effect of + the except clause is to erase any P or notP annotations (explicit or + implied) within Q at the same level of dereferencing that the except + clause appears, and to replace it with maybeP. + + Example 1: __valid __pre_except_maybenull on a pointer p means that the + pointer may be null, and is otherwise valid, thus overriding + the implicit notnull annotation implied by __valid on + pointers. + + Example 2: __valid __deref __pre_except_maybenull on an int **p means + that p is not null (implied by valid), but the elements + pointed to by p could be null, and are otherwise valid. + */ + #define __exceptthat __inner_exceptthat + + /* + _refparam + + Added to all out parameter macros to indicate that they are all reference + parameters. + */ + #define __refparam _Notref_ __deref __notreadonly + + /* + __inner_* + + Helper macros that directly correspond to certain high-level annotations. + + */ + + /* + Macros to classify the entrypoints and indicate their category. + + Pre-defined control point categories include: RPC, LPC, DeviceDriver, UserToKernel, ISAPI, COM. + + */ + #define __inner_control_entrypoint(category) _SA_annotes2(SAL_entrypoint, controlEntry, category) + + + /* + Pre-defined data entry point categories include: Registry, File, Network. + */ + #define __inner_data_entrypoint(category) _SA_annotes2(SAL_entrypoint, dataEntry, category) + + #define __inner_override _SA_annotes0(__override) + #define __inner_callback _SA_annotes0(__callback) + #define __inner_blocksOn(resource) _SA_annotes1(SAL_blocksOn, resource) + #define __inner_fallthrough_dec __inline __nothrow void __FallThrough() {} + #define __inner_fallthrough __FallThrough(); + + #define __post_except_maybenull __post __inner_exceptthat _Maybenull_impl_ + #define __pre_except_maybenull __pre __inner_exceptthat _Maybenull_impl_ + + #define __post_deref_except_maybenull __post __deref __inner_exceptthat _Maybenull_impl_ + #define __pre_deref_except_maybenull __pre __deref __inner_exceptthat _Maybenull_impl_ + + #define __inexpressible_readableTo(size) _Readable_elements_impl_(_Inexpressible_(size)) + #define __inexpressible_writableTo(size) _Writable_elements_impl_(_Inexpressible_(size)) + + +#else // ][ +#ifndef PAL_STDCPP_COMPAT + #define __null + #define __notnull +#endif // !PAL_STDCPP_COMPAT + #define __maybenull + #define __readonly + #define __notreadonly + #define __maybereadonly + #define __valid + #define __notvalid + #define __maybevalid + #define __readableTo(extent) + #define __elem_readableTo(size) + #define __byte_readableTo(size) + #define __writableTo(size) + #define __elem_writableTo(size) + #define __byte_writableTo(size) + #define __deref + #define __pre + #define __post + #define __precond(expr) + #define __postcond(expr) + #define __exceptthat + #define __inner_override + #define __inner_callback + #define __inner_blocksOn(resource) + #define __inner_fallthrough_dec + #define __inner_fallthrough + #define __refparam + #define __inner_control_entrypoint(category) + #define __inner_data_entrypoint(category) + + #define __post_except_maybenull + #define __pre_except_maybenull + #define __post_deref_except_maybenull + #define __pre_deref_except_maybenull + + #define __inexpressible_readableTo(size) + #define __inexpressible_writableTo(size) + +#endif /* #if !defined(__midl) && defined(_PREFAST_) */ // ] + +/* +------------------------------------------------------------------------------- +Buffer Annotation Definitions + +Any of these may be used to directly annotate functions, but only one should +be used for each parameter. To determine which annotation to use for a given +buffer, use the table in the buffer annotations section. +------------------------------------------------------------------------------- +*/ + +// These macros conflict with c++ headers. +#ifndef PAL_STDCPP_COMPAT +#define __in _SAL1_Source_(__in, (), _In_) +#define __out _SAL1_Source_(__out, (), _Out_) +#endif // !PAL_STDCPP_COMPAT + +#define __ecount(size) _SAL1_Source_(__ecount, (size), __notnull __elem_writableTo(size)) +#define __bcount(size) _SAL1_Source_(__bcount, (size), __notnull __byte_writableTo(size)) +#define __in_ecount(size) _SAL1_Source_(__in_ecount, (size), _In_reads_(size)) +#define __in_bcount(size) _SAL1_Source_(__in_bcount, (size), _In_reads_bytes_(size)) +#define __in_z _SAL1_Source_(__in_z, (), _In_z_) +#define __in_ecount_z(size) _SAL1_Source_(__in_ecount_z, (size), _In_reads_z_(size)) +#define __in_bcount_z(size) _SAL1_Source_(__in_bcount_z, (size), __in_bcount(size) __pre __nullterminated) +#define __in_nz _SAL1_Source_(__in_nz, (), __in) +#define __in_ecount_nz(size) _SAL1_Source_(__in_ecount_nz, (size), __in_ecount(size)) +#define __in_bcount_nz(size) _SAL1_Source_(__in_bcount_nz, (size), __in_bcount(size)) +#define __out_ecount(size) _SAL1_Source_(__out_ecount, (size), _Out_writes_(size)) +#define __out_bcount(size) _SAL1_Source_(__out_bcount, (size), _Out_writes_bytes_(size)) +#define __out_ecount_part(size,length) _SAL1_Source_(__out_ecount_part, (size,length), _Out_writes_to_(size,length)) +#define __out_bcount_part(size,length) _SAL1_Source_(__out_bcount_part, (size,length), _Out_writes_bytes_to_(size,length)) +#define __out_ecount_full(size) _SAL1_Source_(__out_ecount_full, (size), _Out_writes_all_(size)) +#define __out_bcount_full(size) _SAL1_Source_(__out_bcount_full, (size), _Out_writes_bytes_all_(size)) +#define __out_z _SAL1_Source_(__out_z, (), __post __valid __refparam __post __nullterminated) +#define __out_z_opt _SAL1_Source_(__out_z_opt, (), __post __valid __refparam __post __nullterminated __pre_except_maybenull) +#define __out_ecount_z(size) _SAL1_Source_(__out_ecount_z, (size), __ecount(size) __post __valid __refparam __post __nullterminated) +#define __out_bcount_z(size) _SAL1_Source_(__out_bcount_z, (size), __bcount(size) __post __valid __refparam __post __nullterminated) +#define __out_ecount_part_z(size,length) _SAL1_Source_(__out_ecount_part_z, (size,length), __out_ecount_part(size,length) __post __nullterminated) +#define __out_bcount_part_z(size,length) _SAL1_Source_(__out_bcount_part_z, (size,length), __out_bcount_part(size,length) __post __nullterminated) +#define __out_ecount_full_z(size) _SAL1_Source_(__out_ecount_full_z, (size), __out_ecount_full(size) __post __nullterminated) +#define __out_bcount_full_z(size) _SAL1_Source_(__out_bcount_full_z, (size), __out_bcount_full(size) __post __nullterminated) +#define __out_nz _SAL1_Source_(__out_nz, (), __post __valid __refparam) +#define __out_nz_opt _SAL1_Source_(__out_nz_opt, (), __post __valid __refparam __post_except_maybenull_) +#define __out_ecount_nz(size) _SAL1_Source_(__out_ecount_nz, (size), __ecount(size) __post __valid __refparam) +#define __out_bcount_nz(size) _SAL1_Source_(__out_bcount_nz, (size), __bcount(size) __post __valid __refparam) +#define __inout _SAL1_Source_(__inout, (), _Inout_) +#define __inout_ecount(size) _SAL1_Source_(__inout_ecount, (size), _Inout_updates_(size)) +#define __inout_bcount(size) _SAL1_Source_(__inout_bcount, (size), _Inout_updates_bytes_(size)) +#define __inout_ecount_part(size,length) _SAL1_Source_(__inout_ecount_part, (size,length), _Inout_updates_to_(size,length)) +#define __inout_bcount_part(size,length) _SAL1_Source_(__inout_bcount_part, (size,length), _Inout_updates_bytes_to_(size,length)) +#define __inout_ecount_full(size) _SAL1_Source_(__inout_ecount_full, (size), _Inout_updates_all_(size)) +#define __inout_bcount_full(size) _SAL1_Source_(__inout_bcount_full, (size), _Inout_updates_bytes_all_(size)) +#define __inout_z _SAL1_Source_(__inout_z, (), _Inout_z_) +#define __inout_ecount_z(size) _SAL1_Source_(__inout_ecount_z, (size), _Inout_updates_z_(size)) +#define __inout_bcount_z(size) _SAL1_Source_(__inout_bcount_z, (size), __inout_bcount(size) __pre __nullterminated __post __nullterminated) +#define __inout_nz _SAL1_Source_(__inout_nz, (), __inout) +#define __inout_ecount_nz(size) _SAL1_Source_(__inout_ecount_nz, (size), __inout_ecount(size)) +#define __inout_bcount_nz(size) _SAL1_Source_(__inout_bcount_nz, (size), __inout_bcount(size)) +#define __ecount_opt(size) _SAL1_Source_(__ecount_opt, (size), __ecount(size) __pre_except_maybenull) +#define __bcount_opt(size) _SAL1_Source_(__bcount_opt, (size), __bcount(size) __pre_except_maybenull) +#define __in_opt _SAL1_Source_(__in_opt, (), _In_opt_) +#define __in_ecount_opt(size) _SAL1_Source_(__in_ecount_opt, (size), _In_reads_opt_(size)) +#define __in_bcount_opt(size) _SAL1_Source_(__in_bcount_opt, (size), _In_reads_bytes_opt_(size)) +#define __in_z_opt _SAL1_Source_(__in_z_opt, (), _In_opt_z_) +#define __in_ecount_z_opt(size) _SAL1_Source_(__in_ecount_z_opt, (size), __in_ecount_opt(size) __pre __nullterminated) +#define __in_bcount_z_opt(size) _SAL1_Source_(__in_bcount_z_opt, (size), __in_bcount_opt(size) __pre __nullterminated) +#define __in_nz_opt _SAL1_Source_(__in_nz_opt, (), __in_opt) +#define __in_ecount_nz_opt(size) _SAL1_Source_(__in_ecount_nz_opt, (size), __in_ecount_opt(size)) +#define __in_bcount_nz_opt(size) _SAL1_Source_(__in_bcount_nz_opt, (size), __in_bcount_opt(size)) +#define __out_opt _SAL1_Source_(__out_opt, (), _Out_opt_) +#define __out_ecount_opt(size) _SAL1_Source_(__out_ecount_opt, (size), _Out_writes_opt_(size)) +#define __out_bcount_opt(size) _SAL1_Source_(__out_bcount_opt, (size), _Out_writes_bytes_opt_(size)) +#define __out_ecount_part_opt(size,length) _SAL1_Source_(__out_ecount_part_opt, (size,length), __out_ecount_part(size,length) __pre_except_maybenull) +#define __out_bcount_part_opt(size,length) _SAL1_Source_(__out_bcount_part_opt, (size,length), __out_bcount_part(size,length) __pre_except_maybenull) +#define __out_ecount_full_opt(size) _SAL1_Source_(__out_ecount_full_opt, (size), __out_ecount_full(size) __pre_except_maybenull) +#define __out_bcount_full_opt(size) _SAL1_Source_(__out_bcount_full_opt, (size), __out_bcount_full(size) __pre_except_maybenull) +#define __out_ecount_z_opt(size) _SAL1_Source_(__out_ecount_z_opt, (size), __out_ecount_opt(size) __post __nullterminated) +#define __out_bcount_z_opt(size) _SAL1_Source_(__out_bcount_z_opt, (size), __out_bcount_opt(size) __post __nullterminated) +#define __out_ecount_part_z_opt(size,length) _SAL1_Source_(__out_ecount_part_z_opt, (size,length), __out_ecount_part_opt(size,length) __post __nullterminated) +#define __out_bcount_part_z_opt(size,length) _SAL1_Source_(__out_bcount_part_z_opt, (size,length), __out_bcount_part_opt(size,length) __post __nullterminated) +#define __out_ecount_full_z_opt(size) _SAL1_Source_(__out_ecount_full_z_opt, (size), __out_ecount_full_opt(size) __post __nullterminated) +#define __out_bcount_full_z_opt(size) _SAL1_Source_(__out_bcount_full_z_opt, (size), __out_bcount_full_opt(size) __post __nullterminated) +#define __out_ecount_nz_opt(size) _SAL1_Source_(__out_ecount_nz_opt, (size), __out_ecount_opt(size) __post __nullterminated) +#define __out_bcount_nz_opt(size) _SAL1_Source_(__out_bcount_nz_opt, (size), __out_bcount_opt(size) __post __nullterminated) +#define __inout_opt _SAL1_Source_(__inout_opt, (), _Inout_opt_) +#define __inout_ecount_opt(size) _SAL1_Source_(__inout_ecount_opt, (size), __inout_ecount(size) __pre_except_maybenull) +#define __inout_bcount_opt(size) _SAL1_Source_(__inout_bcount_opt, (size), __inout_bcount(size) __pre_except_maybenull) +#define __inout_ecount_part_opt(size,length) _SAL1_Source_(__inout_ecount_part_opt, (size,length), __inout_ecount_part(size,length) __pre_except_maybenull) +#define __inout_bcount_part_opt(size,length) _SAL1_Source_(__inout_bcount_part_opt, (size,length), __inout_bcount_part(size,length) __pre_except_maybenull) +#define __inout_ecount_full_opt(size) _SAL1_Source_(__inout_ecount_full_opt, (size), __inout_ecount_full(size) __pre_except_maybenull) +#define __inout_bcount_full_opt(size) _SAL1_Source_(__inout_bcount_full_opt, (size), __inout_bcount_full(size) __pre_except_maybenull) +#define __inout_z_opt _SAL1_Source_(__inout_z_opt, (), __inout_opt __pre __nullterminated __post __nullterminated) +#define __inout_ecount_z_opt(size) _SAL1_Source_(__inout_ecount_z_opt, (size), __inout_ecount_opt(size) __pre __nullterminated __post __nullterminated) +#define __inout_ecount_z_opt(size) _SAL1_Source_(__inout_ecount_z_opt, (size), __inout_ecount_opt(size) __pre __nullterminated __post __nullterminated) +#define __inout_bcount_z_opt(size) _SAL1_Source_(__inout_bcount_z_opt, (size), __inout_bcount_opt(size)) +#define __inout_nz_opt _SAL1_Source_(__inout_nz_opt, (), __inout_opt) +#define __inout_ecount_nz_opt(size) _SAL1_Source_(__inout_ecount_nz_opt, (size), __inout_ecount_opt(size)) +#define __inout_bcount_nz_opt(size) _SAL1_Source_(__inout_bcount_nz_opt, (size), __inout_bcount_opt(size)) +#define __deref_ecount(size) _SAL1_Source_(__deref_ecount, (size), _Notref_ __ecount(1) __post _Notref_ __elem_readableTo(1) __post _Notref_ __deref _Notref_ __notnull __post __deref __elem_writableTo(size)) +#define __deref_bcount(size) _SAL1_Source_(__deref_bcount, (size), _Notref_ __ecount(1) __post _Notref_ __elem_readableTo(1) __post _Notref_ __deref _Notref_ __notnull __post __deref __byte_writableTo(size)) +#define __deref_out _SAL1_Source_(__deref_out, (), _Outptr_) +#define __deref_out_ecount(size) _SAL1_Source_(__deref_out_ecount, (size), _Outptr_result_buffer_(size)) +#define __deref_out_bcount(size) _SAL1_Source_(__deref_out_bcount, (size), _Outptr_result_bytebuffer_(size)) +#define __deref_out_ecount_part(size,length) _SAL1_Source_(__deref_out_ecount_part, (size,length), _Outptr_result_buffer_to_(size,length)) +#define __deref_out_bcount_part(size,length) _SAL1_Source_(__deref_out_bcount_part, (size,length), _Outptr_result_bytebuffer_to_(size,length)) +#define __deref_out_ecount_full(size) _SAL1_Source_(__deref_out_ecount_full, (size), __deref_out_ecount_part(size,size)) +#define __deref_out_bcount_full(size) _SAL1_Source_(__deref_out_bcount_full, (size), __deref_out_bcount_part(size,size)) +#define __deref_out_z _SAL1_Source_(__deref_out_z, (), _Outptr_result_z_) +#define __deref_out_ecount_z(size) _SAL1_Source_(__deref_out_ecount_z, (size), __deref_out_ecount(size) __post __deref __nullterminated) +#define __deref_out_bcount_z(size) _SAL1_Source_(__deref_out_bcount_z, (size), __deref_out_bcount(size) __post __deref __nullterminated) +#define __deref_out_nz _SAL1_Source_(__deref_out_nz, (), __deref_out) +#define __deref_out_ecount_nz(size) _SAL1_Source_(__deref_out_ecount_nz, (size), __deref_out_ecount(size)) +#define __deref_out_bcount_nz(size) _SAL1_Source_(__deref_out_bcount_nz, (size), __deref_out_ecount(size)) +#define __deref_inout _SAL1_Source_(__deref_inout, (), _Notref_ __notnull _Notref_ __elem_readableTo(1) __pre __deref __valid __post _Notref_ __deref __valid __refparam) +#define __deref_inout_z _SAL1_Source_(__deref_inout_z, (), __deref_inout __pre __deref __nullterminated __post _Notref_ __deref __nullterminated) +#define __deref_inout_ecount(size) _SAL1_Source_(__deref_inout_ecount, (size), __deref_inout __pre __deref __elem_writableTo(size) __post _Notref_ __deref __elem_writableTo(size)) +#define __deref_inout_bcount(size) _SAL1_Source_(__deref_inout_bcount, (size), __deref_inout __pre __deref __byte_writableTo(size) __post _Notref_ __deref __byte_writableTo(size)) +#define __deref_inout_ecount_part(size,length) _SAL1_Source_(__deref_inout_ecount_part, (size,length), __deref_inout_ecount(size) __pre __deref __elem_readableTo(length) __post __deref __elem_readableTo(length)) +#define __deref_inout_bcount_part(size,length) _SAL1_Source_(__deref_inout_bcount_part, (size,length), __deref_inout_bcount(size) __pre __deref __byte_readableTo(length) __post __deref __byte_readableTo(length)) +#define __deref_inout_ecount_full(size) _SAL1_Source_(__deref_inout_ecount_full, (size), __deref_inout_ecount_part(size,size)) +#define __deref_inout_bcount_full(size) _SAL1_Source_(__deref_inout_bcount_full, (size), __deref_inout_bcount_part(size,size)) +#define __deref_inout_ecount_z(size) _SAL1_Source_(__deref_inout_ecount_z, (size), __deref_inout_ecount(size) __pre __deref __nullterminated __post __deref __nullterminated) +#define __deref_inout_bcount_z(size) _SAL1_Source_(__deref_inout_bcount_z, (size), __deref_inout_bcount(size) __pre __deref __nullterminated __post __deref __nullterminated) +#define __deref_inout_nz _SAL1_Source_(__deref_inout_nz, (), __deref_inout) +#define __deref_inout_ecount_nz(size) _SAL1_Source_(__deref_inout_ecount_nz, (size), __deref_inout_ecount(size)) +#define __deref_inout_bcount_nz(size) _SAL1_Source_(__deref_inout_bcount_nz, (size), __deref_inout_ecount(size)) +#define __deref_ecount_opt(size) _SAL1_Source_(__deref_ecount_opt, (size), __deref_ecount(size) __post_deref_except_maybenull) +#define __deref_bcount_opt(size) _SAL1_Source_(__deref_bcount_opt, (size), __deref_bcount(size) __post_deref_except_maybenull) +#define __deref_out_opt _SAL1_Source_(__deref_out_opt, (), __deref_out __post_deref_except_maybenull) +#define __deref_out_ecount_opt(size) _SAL1_Source_(__deref_out_ecount_opt, (size), __deref_out_ecount(size) __post_deref_except_maybenull) +#define __deref_out_bcount_opt(size) _SAL1_Source_(__deref_out_bcount_opt, (size), __deref_out_bcount(size) __post_deref_except_maybenull) +#define __deref_out_ecount_part_opt(size,length) _SAL1_Source_(__deref_out_ecount_part_opt, (size,length), __deref_out_ecount_part(size,length) __post_deref_except_maybenull) +#define __deref_out_bcount_part_opt(size,length) _SAL1_Source_(__deref_out_bcount_part_opt, (size,length), __deref_out_bcount_part(size,length) __post_deref_except_maybenull) +#define __deref_out_ecount_full_opt(size) _SAL1_Source_(__deref_out_ecount_full_opt, (size), __deref_out_ecount_full(size) __post_deref_except_maybenull) +#define __deref_out_bcount_full_opt(size) _SAL1_Source_(__deref_out_bcount_full_opt, (size), __deref_out_bcount_full(size) __post_deref_except_maybenull) +#define __deref_out_z_opt _SAL1_Source_(__deref_out_z_opt, (), _Outptr_result_maybenull_z_) +#define __deref_out_ecount_z_opt(size) _SAL1_Source_(__deref_out_ecount_z_opt, (size), __deref_out_ecount_opt(size) __post __deref __nullterminated) +#define __deref_out_bcount_z_opt(size) _SAL1_Source_(__deref_out_bcount_z_opt, (size), __deref_out_bcount_opt(size) __post __deref __nullterminated) +#define __deref_out_nz_opt _SAL1_Source_(__deref_out_nz_opt, (), __deref_out_opt) +#define __deref_out_ecount_nz_opt(size) _SAL1_Source_(__deref_out_ecount_nz_opt, (size), __deref_out_ecount_opt(size)) +#define __deref_out_bcount_nz_opt(size) _SAL1_Source_(__deref_out_bcount_nz_opt, (size), __deref_out_bcount_opt(size)) +#define __deref_inout_opt _SAL1_Source_(__deref_inout_opt, (), __deref_inout __pre_deref_except_maybenull __post_deref_except_maybenull) +#define __deref_inout_ecount_opt(size) _SAL1_Source_(__deref_inout_ecount_opt, (size), __deref_inout_ecount(size) __pre_deref_except_maybenull __post_deref_except_maybenull) +#define __deref_inout_bcount_opt(size) _SAL1_Source_(__deref_inout_bcount_opt, (size), __deref_inout_bcount(size) __pre_deref_except_maybenull __post_deref_except_maybenull) +#define __deref_inout_ecount_part_opt(size,length) _SAL1_Source_(__deref_inout_ecount_part_opt, (size,length), __deref_inout_ecount_part(size,length) __pre_deref_except_maybenull __post_deref_except_maybenull) +#define __deref_inout_bcount_part_opt(size,length) _SAL1_Source_(__deref_inout_bcount_part_opt, (size,length), __deref_inout_bcount_part(size,length) __pre_deref_except_maybenull __post_deref_except_maybenull) +#define __deref_inout_ecount_full_opt(size) _SAL1_Source_(__deref_inout_ecount_full_opt, (size), __deref_inout_ecount_full(size) __pre_deref_except_maybenull __post_deref_except_maybenull) +#define __deref_inout_bcount_full_opt(size) _SAL1_Source_(__deref_inout_bcount_full_opt, (size), __deref_inout_bcount_full(size) __pre_deref_except_maybenull __post_deref_except_maybenull) +#define __deref_inout_z_opt _SAL1_Source_(__deref_inout_z_opt, (), __deref_inout_opt __pre __deref __nullterminated __post __deref __nullterminated) +#define __deref_inout_ecount_z_opt(size) _SAL1_Source_(__deref_inout_ecount_z_opt, (size), __deref_inout_ecount_opt(size) __pre __deref __nullterminated __post __deref __nullterminated) +#define __deref_inout_bcount_z_opt(size) _SAL1_Source_(__deref_inout_bcount_z_opt, (size), __deref_inout_bcount_opt(size) __pre __deref __nullterminated __post __deref __nullterminated) +#define __deref_inout_nz_opt _SAL1_Source_(__deref_inout_nz_opt, (), __deref_inout_opt) +#define __deref_inout_ecount_nz_opt(size) _SAL1_Source_(__deref_inout_ecount_nz_opt, (size), __deref_inout_ecount_opt(size)) +#define __deref_inout_bcount_nz_opt(size) _SAL1_Source_(__deref_inout_bcount_nz_opt, (size), __deref_inout_bcount_opt(size)) +#define __deref_opt_ecount(size) _SAL1_Source_(__deref_opt_ecount, (size), __deref_ecount(size) __pre_except_maybenull) +#define __deref_opt_bcount(size) _SAL1_Source_(__deref_opt_bcount, (size), __deref_bcount(size) __pre_except_maybenull) +#define __deref_opt_out _SAL1_Source_(__deref_opt_out, (), _Outptr_opt_) +#define __deref_opt_out_z _SAL1_Source_(__deref_opt_out_z, (), _Outptr_opt_result_z_) +#define __deref_opt_out_ecount(size) _SAL1_Source_(__deref_opt_out_ecount, (size), __deref_out_ecount(size) __pre_except_maybenull) +#define __deref_opt_out_bcount(size) _SAL1_Source_(__deref_opt_out_bcount, (size), __deref_out_bcount(size) __pre_except_maybenull) +#define __deref_opt_out_ecount_part(size,length) _SAL1_Source_(__deref_opt_out_ecount_part, (size,length), __deref_out_ecount_part(size,length) __pre_except_maybenull) +#define __deref_opt_out_bcount_part(size,length) _SAL1_Source_(__deref_opt_out_bcount_part, (size,length), __deref_out_bcount_part(size,length) __pre_except_maybenull) +#define __deref_opt_out_ecount_full(size) _SAL1_Source_(__deref_opt_out_ecount_full, (size), __deref_out_ecount_full(size) __pre_except_maybenull) +#define __deref_opt_out_bcount_full(size) _SAL1_Source_(__deref_opt_out_bcount_full, (size), __deref_out_bcount_full(size) __pre_except_maybenull) +#define __deref_opt_inout _SAL1_Source_(__deref_opt_inout, (), _Inout_opt_) +#define __deref_opt_inout_ecount(size) _SAL1_Source_(__deref_opt_inout_ecount, (size), __deref_inout_ecount(size) __pre_except_maybenull) +#define __deref_opt_inout_bcount(size) _SAL1_Source_(__deref_opt_inout_bcount, (size), __deref_inout_bcount(size) __pre_except_maybenull) +#define __deref_opt_inout_ecount_part(size,length) _SAL1_Source_(__deref_opt_inout_ecount_part, (size,length), __deref_inout_ecount_part(size,length) __pre_except_maybenull) +#define __deref_opt_inout_bcount_part(size,length) _SAL1_Source_(__deref_opt_inout_bcount_part, (size,length), __deref_inout_bcount_part(size,length) __pre_except_maybenull) +#define __deref_opt_inout_ecount_full(size) _SAL1_Source_(__deref_opt_inout_ecount_full, (size), __deref_inout_ecount_full(size) __pre_except_maybenull) +#define __deref_opt_inout_bcount_full(size) _SAL1_Source_(__deref_opt_inout_bcount_full, (size), __deref_inout_bcount_full(size) __pre_except_maybenull) +#define __deref_opt_inout_z _SAL1_Source_(__deref_opt_inout_z, (), __deref_opt_inout __pre __deref __nullterminated __post __deref __nullterminated) +#define __deref_opt_inout_ecount_z(size) _SAL1_Source_(__deref_opt_inout_ecount_z, (size), __deref_opt_inout_ecount(size) __pre __deref __nullterminated __post __deref __nullterminated) +#define __deref_opt_inout_bcount_z(size) _SAL1_Source_(__deref_opt_inout_bcount_z, (size), __deref_opt_inout_bcount(size) __pre __deref __nullterminated __post __deref __nullterminated) +#define __deref_opt_inout_nz _SAL1_Source_(__deref_opt_inout_nz, (), __deref_opt_inout) +#define __deref_opt_inout_ecount_nz(size) _SAL1_Source_(__deref_opt_inout_ecount_nz, (size), __deref_opt_inout_ecount(size)) +#define __deref_opt_inout_bcount_nz(size) _SAL1_Source_(__deref_opt_inout_bcount_nz, (size), __deref_opt_inout_bcount(size)) +#define __deref_opt_ecount_opt(size) _SAL1_Source_(__deref_opt_ecount_opt, (size), __deref_ecount_opt(size) __pre_except_maybenull) +#define __deref_opt_bcount_opt(size) _SAL1_Source_(__deref_opt_bcount_opt, (size), __deref_bcount_opt(size) __pre_except_maybenull) +#define __deref_opt_out_opt _SAL1_Source_(__deref_opt_out_opt, (), _Outptr_opt_result_maybenull_) +#define __deref_opt_out_ecount_opt(size) _SAL1_Source_(__deref_opt_out_ecount_opt, (size), __deref_out_ecount_opt(size) __pre_except_maybenull) +#define __deref_opt_out_bcount_opt(size) _SAL1_Source_(__deref_opt_out_bcount_opt, (size), __deref_out_bcount_opt(size) __pre_except_maybenull) +#define __deref_opt_out_ecount_part_opt(size,length) _SAL1_Source_(__deref_opt_out_ecount_part_opt, (size,length), __deref_out_ecount_part_opt(size,length) __pre_except_maybenull) +#define __deref_opt_out_bcount_part_opt(size,length) _SAL1_Source_(__deref_opt_out_bcount_part_opt, (size,length), __deref_out_bcount_part_opt(size,length) __pre_except_maybenull) +#define __deref_opt_out_ecount_full_opt(size) _SAL1_Source_(__deref_opt_out_ecount_full_opt, (size), __deref_out_ecount_full_opt(size) __pre_except_maybenull) +#define __deref_opt_out_bcount_full_opt(size) _SAL1_Source_(__deref_opt_out_bcount_full_opt, (size), __deref_out_bcount_full_opt(size) __pre_except_maybenull) +#define __deref_opt_out_z_opt _SAL1_Source_(__deref_opt_out_z_opt, (), __post __deref __valid __refparam __pre_except_maybenull __pre_deref_except_maybenull __post_deref_except_maybenull __post __deref __nullterminated) +#define __deref_opt_out_ecount_z_opt(size) _SAL1_Source_(__deref_opt_out_ecount_z_opt, (size), __deref_opt_out_ecount_opt(size) __post __deref __nullterminated) +#define __deref_opt_out_bcount_z_opt(size) _SAL1_Source_(__deref_opt_out_bcount_z_opt, (size), __deref_opt_out_bcount_opt(size) __post __deref __nullterminated) +#define __deref_opt_out_nz_opt _SAL1_Source_(__deref_opt_out_nz_opt, (), __deref_opt_out_opt) +#define __deref_opt_out_ecount_nz_opt(size) _SAL1_Source_(__deref_opt_out_ecount_nz_opt, (size), __deref_opt_out_ecount_opt(size)) +#define __deref_opt_out_bcount_nz_opt(size) _SAL1_Source_(__deref_opt_out_bcount_nz_opt, (size), __deref_opt_out_bcount_opt(size)) +#define __deref_opt_inout_opt _SAL1_Source_(__deref_opt_inout_opt, (), __deref_inout_opt __pre_except_maybenull) +#define __deref_opt_inout_ecount_opt(size) _SAL1_Source_(__deref_opt_inout_ecount_opt, (size), __deref_inout_ecount_opt(size) __pre_except_maybenull) +#define __deref_opt_inout_bcount_opt(size) _SAL1_Source_(__deref_opt_inout_bcount_opt, (size), __deref_inout_bcount_opt(size) __pre_except_maybenull) +#define __deref_opt_inout_ecount_part_opt(size,length) _SAL1_Source_(__deref_opt_inout_ecount_part_opt, (size,length), __deref_inout_ecount_part_opt(size,length) __pre_except_maybenull) +#define __deref_opt_inout_bcount_part_opt(size,length) _SAL1_Source_(__deref_opt_inout_bcount_part_opt, (size,length), __deref_inout_bcount_part_opt(size,length) __pre_except_maybenull) +#define __deref_opt_inout_ecount_full_opt(size) _SAL1_Source_(__deref_opt_inout_ecount_full_opt, (size), __deref_inout_ecount_full_opt(size) __pre_except_maybenull) +#define __deref_opt_inout_bcount_full_opt(size) _SAL1_Source_(__deref_opt_inout_bcount_full_opt, (size), __deref_inout_bcount_full_opt(size) __pre_except_maybenull) +#define __deref_opt_inout_z_opt _SAL1_Source_(__deref_opt_inout_z_opt, (), __deref_opt_inout_opt __pre __deref __nullterminated __post __deref __nullterminated) +#define __deref_opt_inout_ecount_z_opt(size) _SAL1_Source_(__deref_opt_inout_ecount_z_opt, (size), __deref_opt_inout_ecount_opt(size) __pre __deref __nullterminated __post __deref __nullterminated) +#define __deref_opt_inout_bcount_z_opt(size) _SAL1_Source_(__deref_opt_inout_bcount_z_opt, (size), __deref_opt_inout_bcount_opt(size) __pre __deref __nullterminated __post __deref __nullterminated) +#define __deref_opt_inout_nz_opt _SAL1_Source_(__deref_opt_inout_nz_opt, (), __deref_opt_inout_opt) +#define __deref_opt_inout_ecount_nz_opt(size) _SAL1_Source_(__deref_opt_inout_ecount_nz_opt, (size), __deref_opt_inout_ecount_opt(size)) +#define __deref_opt_inout_bcount_nz_opt(size) _SAL1_Source_(__deref_opt_inout_bcount_nz_opt, (size), __deref_opt_inout_bcount_opt(size)) + +/* +------------------------------------------------------------------------------- +Advanced Annotation Definitions + +Any of these may be used to directly annotate functions, and may be used in +combination with each other or with regular buffer macros. For an explanation +of each annotation, see the advanced annotations section. +------------------------------------------------------------------------------- +*/ + +#define __success(expr) _Success_(expr) +#define __nullterminated _Null_terminated_ +#define __nullnullterminated +#define __clr_reserved _SAL1_Source_(__reserved, (), _Reserved_) +#define __checkReturn _SAL1_Source_(__checkReturn, (), _Check_return_) +#define __typefix(ctype) _SAL1_Source_(__typefix, (ctype), __inner_typefix(ctype)) +#define __override __inner_override +#define __callback __inner_callback +#define __format_string _Printf_format_string_ +#define __blocksOn(resource) __inner_blocksOn(resource) +#define __control_entrypoint(category) __inner_control_entrypoint(category) +#define __data_entrypoint(category) __inner_data_entrypoint(category) +#define __useHeader _Use_decl_anno_impl_ +#define __on_failure(annotes) _On_failure_impl_(annotes _SAL_nop_impl_) + +#ifndef __fallthrough // [ + __inner_fallthrough_dec + #define __fallthrough __inner_fallthrough +#endif // ] + +#ifndef __analysis_assume // [ +#ifdef _PREFAST_ // [ +#define __analysis_assume(expr) __assume(expr) +#else // ][ +#define __analysis_assume(expr) +#endif // ] +#endif // ] + +#ifndef _Analysis_assume_ // [ +#ifdef _PREFAST_ // [ +#define _Analysis_assume_(expr) __assume(expr) +#else // ][ +#define _Analysis_assume_(expr) +#endif // ] +#endif // ] + +#define _Analysis_noreturn_ _SAL2_Source_(_Analysis_noreturn_, (), _SA_annotes0(SAL_terminates)) + +#ifdef _PREFAST_ // [ +__inline __nothrow +void __AnalysisAssumeNullterminated(_Post_ __nullterminated void *p); + +#define _Analysis_assume_nullterminated_(x) __AnalysisAssumeNullterminated(x) +#else // ][ +#define _Analysis_assume_nullterminated_(x) +#endif // ] + +// +// Set the analysis mode (global flags to analysis). +// They take effect at the point of declaration; use at global scope +// as a declaration. +// + +// Synthesize a unique symbol. +#define ___MKID(x, y) x ## y +#define __MKID(x, y) ___MKID(x, y) +#define __GENSYM(x) __MKID(x, __COUNTER__) + +__ANNOTATION(SAL_analysisMode(__AuToQuOtE __In_impl_ char *mode);) + +#define _Analysis_mode_impl_(mode) _SA_annotes1(SAL_analysisMode, #mode) + +#define _Analysis_mode_(mode) \ + typedef _Analysis_mode_impl_(mode) int \ + __GENSYM(__prefast_analysis_mode_flag); + +// The following are predefined: +// _Analysis_operator_new_throw_ (operator new throws) +// _Analysis_operator_new_null_ (operator new returns null) +// _Analysis_operator_new_never_fails_ (operator new never fails) +// + +// Function class annotations. +__ANNOTATION(SAL_functionClassNew(__In_impl_ char*);) +__PRIMOP(int, _In_function_class_(__In_impl_ char*);) +#define _In_function_class_(x) _In_function_class_(#x) + +#define _Function_class_(x) _SA_annotes1(SAL_functionClassNew, #x) + +/* + * interlocked operand used in interlocked instructions + */ +//#define _Interlocked_operand_ _Pre_ _SA_annotes0(SAL_interlocked) + +#define _Enum_is_bitflag_ _SA_annotes0(SAL_enumIsBitflag) +#define _Strict_type_match_ _SA_annotes0(SAL_strictType2) + +#define _Maybe_raises_SEH_exception_ _Pre_ _SA_annotes1(SAL_inTry,__yes) +#define _Raises_SEH_exception_ _Group_(_Maybe_raises_SEH_exception_ _Analysis_noreturn_) + +#ifdef __cplusplus // [ +} +#endif // ] + +// Rotor doesn't need concurrency sal. +// #include + +#define _Interlocked_operand_ diff --git a/src/coreclr/src/nativeaot/Runtime/unix/specstrings.h b/src/coreclr/src/nativeaot/Runtime/unix/specstrings.h new file mode 100644 index 0000000000000..cbab8237961f1 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/unix/specstrings.h @@ -0,0 +1,535 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + + +// +#ifndef SPECSTRINGS_H +#define SPECSTRINGS_H + +/*** +*specstrings.h - support for markers for documenting the semantics of APIs +* + +* +* [Public] +****/ + +/************************************************************************* +* See specstrings_strict.h for documentation of all user visible macros. +*************************************************************************/ + +#if _MSC_VER +#pragma once +#endif + +#if !defined(_SAL_VERSION_SAL2) + + #if defined(__BUILDMACHINE__) || defined(_USE_SAL2_ONLY) + #define _SAL_VERSION_SAL2(_A) SAL_2_Clean_Violation_using ## _A + #else + #define _SAL_VERSION_SAL2(_A) + #endif + + #ifdef _USE_SAL2_ONLY + #define _SAL2_STRICT + #define _SAL_VERSION_CHECK(_A) _SAL_VERSION_SAL2(_A) + #else + #define _SAL_VERSION_CHECK(_A) + #endif + + #ifndef SAL_VERSION_CHECK + #define SAL_VERSION_CHECK(_A) _SAL_VERSION_CHECK(_A) + #define SAL_VERSION_SAL2(_A) _SAL_VERSION_SAL2(_A) + #endif + +#endif + +#include + +#ifndef __SAL_H_FULL_VER +#define __SAL_H_FULL_VER 140050727 +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/* version specific fixes to bring sal.h upto date */ +#if __SAL_H_FULL_VER <= 140050727 + +#if !defined(__midl) && defined(_PREFAST_) && _MSC_VER >= 1000 // [ + +/* Missing from RTM sal.h */ +#define __inner_bound _SA_annotes0(SAL_bound) +#define __inner_range(lb,ub) _SA_annotes2(SAL_range,lb,ub) +#define __inner_assume_bound_dec __inline __nothrow void __AssumeBoundInt(_Post_ __inner_bound int i) {i;} +#define __inner_assume_bound(i) __AssumeBoundInt(i); +#define __inner_allocator _SA_annotes0(SAL_allocator) + +#define __static_context(ctx, annotes) \ + _SA_annotes1(SAL_context,ctx) _Group_(__nop_impl(annotes)) + +#define __failure(x) __static_context(SAL_return_convention, \ + _SA_annotes1(SAL_failure,x)) + +__ANNOTATION(SAL_valueUndefined()); +#define __valueUndefined _SA_annotes0(SAL_valueUndefined) + +enum __SAL_failureKind{__failureUnspecified = 0, __failureUndefined = 1}; + +__ANNOTATION(SAL_failureDefault(enum __SAL_failureKind)); +#define __failureDefault(kind) __static_context(SAL_return_convention, \ + _SA_annotes1(SAL_failureDefault,kind)) + +#else // ][ + +#define __inner_bound +#define __inner_range(lb,ub) +#define __inner_assume_bound_dec +#define __inner_assume_bound(i) +#define __inner_allocator + +#define __static_context(ctx, annotes) +#define __failure(x) +#define __valueUndefined +#define __failureDefault(x) + +#endif // ] + +#define __xcount(size) __notnull __inexpressible_writableTo(size) +#define __in_xcount(size) __in _Pre_ __inexpressible_readableTo(size) +#define __out_xcount(size) __xcount(size) _Post_ __valid __refparam +#define __out_xcount_part(size,length) __out_xcount(size) _Post_ __inexpressible_readableTo(length) +#define __out_xcount_full(size) __out_xcount_part(size,size) +#define __inout_xcount(size) __out_xcount(size) _Pre_ __valid +#define __inout_xcount_part(size,length) __out_xcount_part(size,length) _Pre_ __valid _Pre_ __inexpressible_readableTo(length) +#define __inout_xcount_full(size) __inout_xcount_part(size,size) +#define __xcount_opt(size) __xcount(size) __exceptthat __maybenull +#define __in_xcount_opt(size) __in_xcount(size) __exceptthat __maybenull +#define __out_xcount_opt(size) __out_xcount(size) __exceptthat __maybenull +#define __out_xcount_part_opt(size,length) __out_xcount_part(size,length) __exceptthat __maybenull +#define __out_xcount_full_opt(size) __out_xcount_full(size) __exceptthat __maybenull +#define __inout_xcount_opt(size) __inout_xcount(size) __exceptthat __maybenull +#define __inout_xcount_part_opt(size,length) __inout_xcount_part(size,length) __exceptthat __maybenull +#define __inout_xcount_full_opt(size) __inout_xcount_full(size) __exceptthat __maybenull +#define __deref_xcount(size) __ecount(1) _Post_ __elem_readableTo(1) _Post_ __deref __notnull _Post_ __deref __inexpressible_writableTo(size) +#define __deref_in __in _Pre_ __deref __deref __readonly +#define __deref_in_ecount(size) __deref_in _Pre_ __deref __elem_readableTo(size) +#define __deref_in_bcount(size) __deref_in _Pre_ __deref __byte_readableTo(size) +#define __deref_in_xcount(size) __deref_in _Pre_ __deref __inexpressible_readableTo(size) +#define __deref_out_xcount(size) __deref_xcount(size) _Post_ __deref __valid __refparam +#define __deref_out_xcount_part(size,length) __deref_out_xcount(size) _Post_ __deref __inexpressible_readableTo(length) +#define __deref_out_xcount_full(size) __deref_out_xcount_part(size,size) +#define __deref_out_xcount(size) __deref_xcount(size) _Post_ __deref __valid __refparam +#define __inout_xcount_opt(size) __inout_xcount(size) __exceptthat __maybenull +#define __inout_xcount_part_opt(size,length) __inout_xcount_part(size,length) __exceptthat __maybenull +#define __inout_xcount_full_opt(size) __inout_xcount_full(size) __exceptthat __maybenull +#define __deref_xcount(size) __ecount(1) _Post_ __elem_readableTo(1) _Post_ __deref __notnull _Post_ __deref __inexpressible_writableTo(size) +#define __deref_in __in _Pre_ __deref __deref __readonly +#define __deref_in_ecount(size) __deref_in _Pre_ __deref __elem_readableTo(size) +#define __deref_in_bcount(size) __deref_in _Pre_ __deref __byte_readableTo(size) +#define __deref_in_xcount(size) __deref_in _Pre_ __deref __inexpressible_readableTo(size) +#define __deref_out_xcount(size) __deref_xcount(size) _Post_ __deref __valid __refparam +#define __deref_out_xcount_part(size,length) __deref_out_xcount(size) _Post_ __deref __inexpressible_readableTo(length) +#define __deref_out_xcount_full(size) __deref_out_xcount_part(size,size) +#define __deref_out_xcount(size) __deref_xcount(size) _Post_ __deref __valid __refparam +#define __deref_inout_xcount(size) __deref_inout _Pre_ __deref __inexpressible_writableTo(size) _Post_ __deref __inexpressible_writableTo(size) +#define __deref_inout_xcount_part(size,length) __deref_inout_xcount(size) _Pre_ __deref __inexpressible_readableTo(length) _Post_ __deref __inexpressible_readableTo(length) +#define __deref_inout_xcount_full(size) __deref_inout_xcount_part(size,size) +#define __deref_xcount_opt(size) __deref_xcount(size) _Post_ __deref __exceptthat __maybenull +#define __deref_in_opt __deref_in _Pre_ __deref __exceptthat __maybenull +#define __deref_in_opt_out __deref_inout _Pre_ __deref __exceptthat __maybenull _Post_ __deref __notnull +#define __deref_in_ecount_opt(size) __deref_in_ecount(size) _Pre_ __deref __exceptthat __maybenull +#define __deref_in_bcount_opt(size) __deref_in_bcount(size) _Pre_ __deref __exceptthat __maybenull +#define __deref_in_xcount_opt(size) __deref_in_xcount(size) _Pre_ __deref __exceptthat __maybenull +#define __deref_out_xcount_opt(size) __deref_out_xcount(size) _Post_ __deref __exceptthat __maybenull +#define __deref_out_xcount_part_opt(size,length) __deref_out_xcount_part(size,length) _Post_ __deref __exceptthat __maybenull +#define __deref_out_xcount_full_opt(size) __deref_out_xcount_full(size) _Post_ __deref __exceptthat __maybenull +#define __deref_inout_xcount_opt(size) __deref_inout_xcount(size) _Pre_ __deref __exceptthat __maybenull _Post_ __deref __exceptthat __maybenull +#define __deref_inout_xcount_part_opt(size,length) __deref_inout_xcount_part(size,length) _Pre_ __deref __exceptthat __maybenull _Post_ __deref __exceptthat __maybenull +#define __deref_inout_xcount_full_opt(size) __deref_inout_xcount_full(size) _Pre_ __deref __exceptthat __maybenull _Post_ __deref __exceptthat __maybenull +#define __deref_opt_xcount(size) __deref_xcount(size) __exceptthat __maybenull +#define __deref_opt_in __deref_in __exceptthat __maybenull +#define __deref_opt_in_ecount(size) __deref_in_ecount(size) __exceptthat __maybenull +#define __deref_opt_in_bcount(size) __deref_in_bcount(size) __exceptthat __maybenull +#define __deref_opt_in_xcount(size) __deref_in_xcount(size) __exceptthat __maybenull +#define __deref_opt_out_xcount(size) __deref_out_xcount(size) __exceptthat __maybenull +#define __deref_opt_out_xcount_part(size,length) __deref_out_xcount_part(size,length) __exceptthat __maybenull +#define __deref_opt_out_xcount_full(size) __deref_out_xcount_full(size) __exceptthat __maybenull +#define __deref_opt_inout_xcount(size) __deref_inout_xcount(size) __exceptthat __maybenull +#define __deref_opt_inout_xcount_part(size,length) __deref_inout_xcount_part(size,length) __exceptthat __maybenull +#define __deref_opt_inout_xcount_full(size) __deref_inout_xcount_full(size) __exceptthat __maybenull +#define __deref_opt_xcount_opt(size) __deref_xcount_opt(size) __exceptthat __maybenull +#define __deref_opt_in_opt __deref_in_opt __exceptthat __maybenull +#define __deref_opt_in_ecount_opt(size) __deref_in_ecount_opt(size) __exceptthat __maybenull +#define __deref_opt_in_bcount_opt(size) __deref_in_bcount_opt(size) __exceptthat __maybenull +#define __deref_opt_in_xcount_opt(size) __deref_in_xcount_opt(size) __exceptthat __maybenull +#define __deref_opt_out_xcount_opt(size) __deref_out_xcount_opt(size) __exceptthat __maybenull +#define __deref_opt_out_xcount_part_opt(size,length) __deref_out_xcount_part_opt(size,length) __exceptthat __maybenull +#define __deref_opt_out_xcount_full_opt(size) __deref_out_xcount_full_opt(size) __exceptthat __maybenull +#define __deref_opt_inout_xcount_opt(size) __deref_inout_xcount_opt(size) __exceptthat __maybenull +#define __deref_opt_inout_xcount_part_opt(size,length) __deref_inout_xcount_part_opt(size,length) __exceptthat __maybenull +#define __deref_opt_inout_xcount_full_opt(size) __deref_inout_xcount_full_opt(size) __exceptthat __maybenull + +#define __deref_in_ecount_iterator(size, incr) __inout _Pre_ __deref __elem_readableTo(size) __deref_out_range(==, _Old_(*_Curr_) + incr) +#define __deref_out_ecount_iterator(size, incr) __inout _Pre_ __deref __elem_writableTo(size) __deref_out_range(==, _Old_(*_Curr_) + incr) +#define __deref_inout_ecount_iterator(size, incr) __inout _Pre_ __deref __elem_readableTo(size) _Pre_ __deref __elem_writableTo(size) __deref_out_range(==, _Old_(*_Curr_) + incr) + +#define __post_bcount(size) _Post_ __byte_writableTo(size) +#define __post_ecount(size) _Post_ __elem_writableTo(size) + +#define __deref_realloc_bcount(insize, outsize) __inout _Pre_ __deref __byte_readableTo(insize) _Post_ __deref __byte_writableTo(outsize) + +/* __in_ecount_or_z(c) specifies semantics like strncmp, where a string + * parameter is either null terminated, or valid up to c elements. + */ +#define __in_ecount_or_z(c) _When_(_String_length_(_Curr_) < (c), __in_z) \ + _When_(_String_length_(_Curr_) >= (c), __in_ecount(c)) + + +/* Provide default definition to be overridden when needed */ +#define __post_nullnullterminated + +/* Must protect redfinitions of macros to workaround rc.exe issues. */ +#ifndef RC_INVOKED + +#undef __nullnullterminated +#define __nullnullterminated __inexpressible_readableTo("string terminated by two nulls") __nullterminated + +#undef __post_nullnullterminated +#define __post_nullnullterminated _Post_ __inexpressible_readableTo("string terminated by two nulls") _Post_ __nullterminated + +#endif +#endif //__SAL_H_FULL_VER <= 140050727 + +/************************************************************************ + New extensions to sal.h follow here. +*************************************************************************/ + +#if (_MSC_VER >= 1000) && !defined(__midl) && defined(_PREFAST_) + +#define __file_parser(typ) _SA_annotes2(SAL_file_parser,"function",typ) +#define __file_parser_class(typ) _SA_annotes2(SAL_file_parser,"class",typ) +#define __file_parser_library(typ) extern int _SA_annotes2(SAL_file_parser, "library", typ) __iSALFileParserLibrary##typ; +#define __source_code_content(typ) extern int _SA_annotes1(SAL_source_code_content, typ) __iSAL_Source_Code_Content##typ; +#define __class_code_content(typ) _SA_annotes1(SAL_class_code_content, typ) +#define __analysis_assert(e) __assume(e) +#define __analysis_hint(hint) _SA_annotes1(SAL_analysisHint, hint) +// For "breakpoint": doesn't return as far as analysis is concerned. +#define __analysis_noreturn __declspec(noreturn) +/* Internal defintions */ +#define __inner_data_source(src_raw) _SA_annotes1(SAL_untrusted_data_source,src_raw) +#define __inner_this_data_source(src_raw) _SA_annotes1(SAL_untrusted_data_source_this,src_raw) +#define __inner_out_validated(typ_raw) _Post_ _SA_annotes1(SAL_validated,typ_raw) +#define __inner_this_out_validated(typ_raw) _SA_annotes1(SAL_validated_this,typ_raw) +#define __inner_assume_validated_dec __inline __nothrow void __AssumeValidated(__inner_out_validated("BY_DESIGN") const void *p) {p;} +#define __inner_assume_validated(p) __AssumeValidated(p) +#define __inner_transfer(formal) _SA_annotes1(SAL_transfer_adt_property_from,formal) +#define __inner_encoded _SA_annotes0(SAL_encoded) + +#if defined(_MSC_EXTENSIONS) || defined(_PREFAST_) || defined(OACR) +#define __inner_adt_prop(adt,prop) _SA_annotes2(SAL_adt, adt,prop) +#define __inner_adt_add_prop(adt,prop) _SA_annotes2(SAL_add_adt_property,adt,prop) +#define __inner_adt_remove_prop(adt,prop) _SA_annotes2(SAL_remove_adt_property,adt,prop) +#define __inner_adt_transfer_prop(arg) _SA_annotes1(SAL_transfer_adt_property_from,arg) +#define __inner_adt_type_props(typ) _SA_annotes1(SAL_post_type,typ) +#define __inner_volatile _SA_annotes0(SAL_volatile) +#define __inner_nonvolatile _SA_annotes0(SAL_nonvolatile) +#define __inner_possibly_notnullterminated _SA_annotes1(SAL_nullTerminated,__maybe) +#define __inner_analysis_assume_nullterminated_dec __inline __nothrow void __AnalysisAssumeNullterminated(_Post_ __nullterminated void *p) {*(char*)p=0;} +#define __inner_analysis_assume_nullterminated(x) __AnalysisAssumeNullterminated(x); +#endif + +#else + +#define __file_parser(typ) +#define __file_parser_class(typ) +#define __file_parser_library(typ) +#define __source_code_content(typ) +#define __class_code_content(typ) +#define __analysis_assert(e) +#define __analysis_hint(hint) +#define __analysis_noreturn +/* Internal defintions */ +#define __inner_data_source(src_raw) +#define __inner_this_data_source(src_raw) +#define __inner_out_validated(typ_raw) +#define __inner_this_out_validated(typ_raw) +#define __inner_assume_validated_dec +#define __inner_assume_validated(p) +#define __inner_transfer(formal) +#define __inner_encoded +#define __inner_adt_prop(adt,prop) +#define __inner_adt_add_prop(adt,prop) +#define __inner_adt_remove_prop(adt,prop) +#define __inner_adt_transfer_prop(arg) +#define __inner_adt_type_props(typ) +#define __inner_volatile +#define __inner_nonvolatile +#define __inner_possibly_notnullterminated +#define __inner_analysis_assume_nullterminated_dec +#define __inner_analysis_assume_nullterminated(x) + +#endif // #if (_MSC_VER >= 1000) && !defined(__midl) && defined(_PREFAST_) + +#define __field_ecount(size) __notnull __elem_writableTo(size) +#define __field_bcount(size) __notnull __byte_writableTo(size) +#define __field_xcount(size) __notnull __inexpressible_writableTo(size) + +#define __field_ecount_opt(size) __maybenull __elem_writableTo(size) +#define __field_bcount_opt(size) __maybenull __byte_writableTo(size) +#define __field_xcount_opt(size) __maybenull __inexpressible_writableTo(size) + +#define __field_ecount_part(size,init) __notnull __elem_writableTo(size) __elem_readableTo(init) +#define __field_bcount_part(size,init) __notnull __byte_writableTo(size) __byte_readableTo(init) +#define __field_xcount_part(size,init) __notnull __inexpressible_writableTo(size) __inexpressible_readableTo(init) + +#define __field_ecount_part_opt(size,init) __maybenull __elem_writableTo(size) __elem_readableTo(init) +#define __field_bcount_part_opt(size,init) __maybenull __byte_writableTo(size) __byte_readableTo(init) +#define __field_xcount_part_opt(size,init) __maybenull __inexpressible_writableTo(size) __inexpressible_readableTo(init) + +#define __field_ecount_full(size) __field_ecount_part(size,size) +#define __field_bcount_full(size) __field_bcount_part(size,size) +#define __field_xcount_full(size) __field_xcount_part(size,size) + +#define __field_ecount_full_opt(size) __field_ecount_part_opt(size,size) +#define __field_bcount_full_opt(size) __field_bcount_part_opt(size,size) +#define __field_xcount_full_opt(size) __field_xcount_part_opt(size,size) + +#define __field_nullterminated __nullterminated + +#define __struct_bcount(size) __byte_writableTo(size) +#define __struct_xcount(size) __inexpressible_writableTo(size) + +#define __out_awcount(expr,size) _Pre_ __notnull \ + __byte_writableTo((expr) ? (size) : (size) * 2) \ + _Post_ __valid __refparam +#define __in_awcount(expr,size) _Pre_ __valid \ + _Pre_ _Notref_ __deref __readonly \ + __byte_readableTo((expr) ? (size) : (size) * 2) +#define __post_invalid _Post_ __notvalid +/* integer related macros */ +#define __allocator __inner_allocator +#ifndef PAL_STDCPP_COMPAT +#define __deallocate(kind) _Pre_ __notnull __post_invalid +#define __deallocate_opt(kind) _Pre_ __maybenull __post_invalid +#endif +#define __bound __inner_bound +#define __range(lb,ub) __inner_range(lb,ub) +#define __in_bound _Pre_ __inner_bound +#define __out_bound _Post_ __inner_bound +#define __deref_out_bound _Post_ __deref __inner_bound +#define __in_range(lb,ub) _Pre_ __inner_range(lb,ub) +#define __out_range(lb,ub) _Post_ __inner_range(lb,ub) +#define __deref_in_range(lb,ub) _Pre_ __deref __inner_range(lb,ub) +#define __deref_out_range(lb,ub) _Post_ __deref __inner_range(lb,ub) +#define __deref_inout_range(lb,ub) __deref_in_range(lb,ub) __deref_out_range(lb,ub) +#define __field_range(lb,ub) __range(lb,ub) +#define __field_data_source(src_sym) __inner_data_source(#src_sym) + +#define __range_max(a,b) __range(==, a > b ? a : b) +#define __range_min(a,b) __range(==, a < b ? a : b) + + +/* Penetration review macros */ +#define __in_data_source(src_sym) _Pre_ __inner_data_source(#src_sym) +#define __out_data_source(src_sym) _Post_ __inner_data_source(#src_sym) +#define __out_validated(typ_sym) __inner_out_validated(#typ_sym) +#define __this_out_data_source(src_sym) __inner_this_data_source(#src_sym) +#define __this_out_validated(typ_sym) __inner_this_out_validated(#typ_sym) +#define __transfer(formal) _Post_ __inner_transfer(formal) +#define __rpc_entry __inner_control_entrypoint(RPC) +#define __kernel_entry __inner_control_entrypoint(UserToKernel) +#define __gdi_entry __inner_control_entrypoint(GDI) +#define __encoded_pointer __inner_encoded +#define __encoded_array __inner_encoded +#define __field_encoded_pointer __inner_encoded +#define __field_encoded_array __inner_encoded +#if defined(_MSC_EXTENSIONS) || defined(_PREFAST_) || defined(OACR) +#define __type_has_adt_prop(adt,prop) __inner_adt_prop(adt,prop) +#define __out_has_adt_prop(adt,prop) _Post_ __inner_adt_add_prop(adt,prop) +#define __out_not_has_adt_prop(adt,prop) _Post_ __inner_adt_remove_prop(adt,prop) +#define __out_transfer_adt_prop(arg) _Post_ __inner_adt_transfer_prop(arg) +#define __out_has_type_adt_props(typ) _Post_ __inner_adt_type_props(typ) + +/* useful PFD related macros */ +#define __possibly_notnullterminated __inner_possibly_notnullterminated + +/* Windows Internal */ +#define __volatile __inner_volatile +#define __nonvolatile __inner_nonvolatile +#else +#define __out_has_type_adt_props(typ) /* nothing */ +#endif +#define __deref_volatile __deref __volatile +#define __deref_nonvolatile __deref __nonvolatile + +/* declare stub functions for macros */ +__inner_assume_validated_dec +__inner_assume_bound_dec +__inner_analysis_assume_nullterminated_dec +#define __analysis_assume_nullterminated(x) __inner_analysis_assume_nullterminated(x) +#define __assume_validated(p) __inner_assume_validated(p) +#define __assume_bound(i) __inner_assume_bound(i) + + +/************************************************************************** +* SAL 2 extensions for Windows-specific APIs. +***************************************************************************/ + +// Annotation for parameters that are not used in any way by the function. +// Unlike _Reserved_, an _Unreferenced_parameter_ pointer need not be NULL. +#ifndef _Unreferenced_parameter_ +#define _Unreferenced_parameter_ _Const_ +#endif + +// Pointer parameters that are freed by the function, and thus the pointed-to +// memory should not be used after return. +#ifndef _Frees_ptr_ +#define _Frees_ptr_ _Pre_notnull_ _Post_ptr_invalid_ +#endif +#ifndef _Frees_ptr_opt_ +#define _Frees_ptr_opt_ _Pre_maybenull_ _Post_ptr_invalid_ +#endif + +// NLS APIs allow strings to be specified either by an element count or +// null termination. Unlike _In_reads_or_z_, this is not whichever comes +// first, but based on whether the size is negative or not. +#define _In_NLS_string_(size) _When_((size) < 0, _In_z_) \ + _When_((size) >= 0, _In_reads_(size)) + + +// Minifilter CompletionContext parameters on the pre-operation callback +// default to NULL. For return type FLT_PREOP_SUCCESS_WITH_CALLBACK or +// FLT_PREOP_SYNCHRONIZE, it may be set to NULL or a valid pointer. For all +// other returns, it must be NULL. +#define _Flt_CompletionContext_Outptr_ \ + _Outptr_result_maybenull_ _Pre_valid_ \ + _At_(*_Curr_, _Pre_null_ \ + _When_(return != FLT_PREOP_SUCCESS_WITH_CALLBACK && return != FLT_PREOP_SYNCHRONIZE, _Post_null_)) + +// Minifilter ConnectionCookie parameters on the port connect notify callback +// default to NULL. On successful return, it may be set to NULL or non-NULL, +// but it must be NULL on failure. +#define _Flt_ConnectionCookie_Outptr_ \ + _Outptr_result_maybenull_ _Pre_valid_ \ + _At_(*_Curr_, _Pre_null_ _On_failure_(_Post_null_)) + + +// +// A common pattern is to pass an "_Inout_ PCHAR* ppBuf" of size "_Inout_ DWORD* pSize" +// to a function that writes to **pBuf, incrementing *ppBuf to point to one +// past the last written byte. Thus the length of the write is +// (*ppBuf - Old(*ppBuf)). The size of the remaining unwritten capacity +// is written to *pSize. +// +// This pattern is frequently used when progressively filling a +// large buffer in chunks +// (e.g. when reading from a network interface in a driver). +// +// It is expected that these supplementary annotations would be used inside an +// _At_, like so: +// +// _At_(*ppBuf, _Writes_and_advances_ptr_(*pBufSize)) +// HRESULT WriteChunkOfData(_Inout_ PCHAR* ppBuf, _Inout_ DWORD* pBufSize); +// +#ifndef _Writes_and_advances_ptr_ +#define _Writes_and_advances_ptr_(size) \ + _At_((void*)_Curr_, _Inout_) \ + _At_(_Curr_, \ + _Pre_writable_size_(size) \ + _Post_writable_size_(size) \ + _Post_satisfies_(_Curr_ - _Old_(_Curr_) == size)) \ + _At_(_Old_(_Curr_), \ + _Post_readable_size_(_Old_(size) - size)) +#endif + +#ifndef _Writes_bytes_and_advances_ptr_ +#define _Writes_bytes_and_advances_ptr_(size) \ + _At_((void*)_Curr_, _Inout_) \ + _At_(_Curr_, \ + _Pre_writable_byte_size_(size) \ + _Post_writable_byte_size_(size) \ + _Post_satisfies_(((char*)_Curr_) - ((void*)_Old_(_Curr_)) == size)) \ + _At_(_Old_(_Curr_), \ + _Post_readable_byte_size_(_Old_(size) - size)) +#endif + +// +// Gets the current error code (as returned by GetLastError()), and stores +// in _Curr_ as a postcondition. This is currently approximated by assuming +// that GetLastError() always returns a failed error code. This is not a +// completely accurate approximation, but reasonable. +// +#define _Post_equals_last_error_ _Post_satisfies_(_Curr_ != 0) + +#ifdef __cplusplus +} +#endif + +#ifdef _PREFIX_ +/************************************************************************** +* Defintion of __pfx_assume and __pfx_assert. Thse should be the only +* defintions of these functions. +***************************************************************************/ +#if __cplusplus +extern "C" void __pfx_assert(bool, const char *); +extern "C" void __pfx_assume(bool, const char *); +#else +void __pfx_assert(int, const char *); +void __pfx_assume(int, const char *); +#endif +/************************************************************************** +* Redefintion of __analysis_assume and __analysis_assert for PREFIX build +**************************************************************************/ +#undef __analysis_assume +#undef __analysis_assert +#define __analysis_assume(e) (__pfx_assume(e,"pfx_assume"),__assume(e)); +#define __analysis_assert(e) (__pfx_assert(e,"pfx_assert"),__assume(e)); +#endif /* ifdef _PREFIX_ */ + +/************************************************************************** +* This include should always be the last thing in this file. +* Must avoid redfinitions of macros to workaround rc.exe issues. +***************************************************************************/ +#if !(defined(RC_INVOKED) || defined(SORTPP_PASS)) +#include +#endif /* if !(defined(RC_INVOKED) || defined(SORTPP_PASS)) */ + +/* + If no SAL 2 appears to have been defined (_Outptr_ is a representative choice) + then we must be operating in a downlevel build environment (such as VS10). + We also test against the compiler version to identify a downlevel environment, + as VS11 is the minimum required for SAL 2 support. + + If we are operating in a downlevel build environment (such as VS10) + we need to undefine the following symbols before including driverspecs.h + or we will end up referencing SAL 2 implementation symbols and cause + build failures. +*/ +#if (!defined(_Outptr_) || _MSC_VER <= 1600) && !( defined( MIDL_PASS ) || defined(__midl) || defined(RC_INVOKED) ) /*IFSTRIP=IGN*/ +#undef __ANNOTATION +#define __ANNOTATION(fun) /* fun */ +#undef __PRIMOP +#define __PRIMOP(type, fun) +#endif /* !defined(_Outptr_) || _MSC_VER <= 1600 */ + +// ROTOR doesn't need driverspecs.h +// #include + +/* + If no SAL 2 appears to have been defined (_Outptr_ is a representative choice) + then we must be operating in a downlevel build environment (such as VS10). + We also test against the compiler version to identify a downlevel environment, + as VS11 is the minimum required for SAL 2 support. + + If we are in a downlevel environment, we can go ahead and include no_sal2.h + to make all of SAL 2 no-ops to ensure no build failures. +*/ +#if (!defined(_Outptr_) || _MSC_VER <= 1600) && !( defined( MIDL_PASS ) || defined(__midl) || defined(RC_INVOKED) ) && !( defined( _SDV_ ) ) /*IFSTRIP=IGN*/ +#include +#endif /* !defined(_Outptr_) || _MSC_VER <= 1600 */ + +#endif /* #ifndef SPECSTRINGS_H */ + + diff --git a/src/coreclr/src/nativeaot/Runtime/unix/specstrings_strict.h b/src/coreclr/src/nativeaot/Runtime/unix/specstrings_strict.h new file mode 100644 index 0000000000000..04d08e21c4c77 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/unix/specstrings_strict.h @@ -0,0 +1,1189 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +/************************************************************************* +* This file documents all the macros approved for use in windows source +* code. It includes some experimental macros which should only be used by +* experts. +* +* DO NOT include this file directly. This file is include after +* specstrings.h. So we can undefine every possible old definition including +* private internal macros people should not be using, as well as macros from +* sal.h. Macros are redefined here in a way to cause syntax errors when used +* incorrectly during a normal build when specstrings.h is included and +* __SPECSTRINGS_STRICT_LEVEL is defined. +* +* There are several levels of strictness, each level includes the behavior of +* all previous levels. +* +* 0 - Disable strict checking +* 1 - Break on unapproved macros and misuse of statement +* macros such as __fallthrough (default) +* 2 - Deprecated some old macros that should not be used +* 3 - Use VS 2005 Source Annotation to make sure every macro +* is used in the right context. For example placing __in on a return +* parameter will result in an error. +* + +* +************************************************************************/ +#ifndef __SPECSTRINGS_STRICT_LEVEL +#define __SPECSTRINGS_STRICT_LEVEL 1 +#endif +/************************************************************************ +* Introduction +* +* specstrings.h provides a set of annotations to describe how a function uses +* its parameters - the assumptions it makes about them, and the guarantees it +* makes upon finishing. +* +* Annotations must be placed before a function parameter's type or its return +* type. There are two basic classes of common annotations buffer annotations +* and advanced annotations. Buffer annotations describe how functions use +* their pointer parameters, and advanced annotations either describe +* complex/unusual buffer behavior, or provide additional information about a +* parameter that is not otherwise expressible. +* +* Buffer Annotations +* +* The most important annotations in SpecStrings.h provide a consistent way to +* annotate buffer parameters or return values for a function. Each of these +* annotations describes a single buffer (which could be a string, a +* fixed-length or variable-length array, or just a pointer) that the function +* interacts with: where it is, how large it is, how much is initialized, and +* what the function does with it. +* +* The appropriate macro for a given buffer can be constructed using the table +* below. Just pick the appropriate values from each category, and combine +* them together with a leading underscore. Some combinations of values do not +* make sense as buffer annotations. Only meaningful annotations can be added +* to your code; for a list of these, see the buffer annotation definitions +* section. +* +* Only a single buffer annotation should be used for each parameter. +* +* |------------|------------|---------|--------|----------|---------------| +* | Level | Usage | Size | Output | Optional | Parameters | +* |------------|------------|---------|--------|----------|---------------| +* | <> | <> | <> | <> | <> | <> | +* | _deref | _in | _ecount | _full | _opt | (size) | +* | _deref_opt | _out | _bcount | _part | | (size,length) | +* | | _inout | | | | | +* | | | | | | | +* |------------|------------|---------|--------|----------|---------------| +* +* Note: "<>" represents the empty string. +* +* Level: Describes the buffer pointer's level of indirection from the +* parameter or return value 'p'. +* +* <> : p is the buffer pointer. +* _deref : *p is the buffer pointer. p must not be NULL. +* _deref_opt : *p may be the buffer pointer. p may be NULL, in which case the +* rest of the annotation is ignored. +* +* Usage: Describes how the function uses the buffer. +* +* <> : The buffer is not accessed. If used on the return value or with +* _deref, the function will provide the buffer, and it will be uninitialized +* at exit. Otherwise, the caller must provide the buffer. This should only +* be used for alloc and free functions. +* +* _in : The function will only read from the buffer. The caller must provide +* the buffer and initialize it. +* +* _out : The function will only write to the buffer. If used on the return +* value or with _deref, the function will provide the buffer and initialize +* it. Otherwise, the caller must provide the buffer, and the function will +* initialize it. +* +* _inout : The function may freely read from and write to the buffer. The +* caller must provide the buffer and initialize it. If used with _deref, the +* buffer may be reallocated by the function. +* +* Size: Describes the total size of the buffer. This may be less than the +* space actually allocated for the buffer, in which case it describes the +* accessible amount. +* +* <> : No buffer size is given. If the type specifies the buffer size (such +* as with LPSTR and LPWSTR), that amount is used. Otherwise, the buffer is +* one element long. Must be used with _in, _out, or _inout. +* +* _ecount : The buffer size is an explicit element count. +* +* _bcount : The buffer size is an explicit byte count. +* +* Output: Describes how much of the buffer will be initialized by the +* function. For _inout buffers, this also describes how much is initialized +* at entry. Omit this category for _in buffers; they must be fully +* initialized by the caller. +* +* <> : The type specifies how much is initialized. For instance, a function +* initializing an LPWSTR must NULL-terminate the string. +* +* _full : The function initializes the entire buffer. +* +* _part : The function initializes part of the buffer, and explicitly +* indicates how much. +* +* Optional: Describes if the buffer itself is optional. +* +* <> : The pointer to the buffer must not be NULL. +* +* _opt : The pointer to the buffer might be NULL. It will be checked before +* being dereferenced. +* +* Parameters: Gives explicit counts for the size and length of the buffer. +* +* <> : There is no explicit count. Use when neither _ecount nor _bcount is +* used. +* +* (size) : Only the buffer's total size is given. Use with _ecount or _bcount +* but not _part. +* +* (size,length) : The buffer's total size and initialized length are +* given. Use with _ecount_part and _bcount_part. +* +* ---------------------------------------------------------------------------- +* Buffer Annotation Examples +* +* LWSTDAPI_(BOOL) StrToIntExA( +* LPCSTR pszString, // No annotation required, const implies __in. +* DWORD dwFlags, +* __out int *piRet // A pointer whose dereference will be filled in. +* ); +* +* void MyPaintingFunction( +* __in HWND hwndControl, // An initialized read-only parameter. +* __in_opt HDC hdcOptional, // An initialized read-only parameter that +* // might be NULL. +* __inout IPropertyStore *ppsStore // An initialized parameter that +* // may be freely used and modified. +* ); +* +* LWSTDAPI_(BOOL) PathCompactPathExA( +* __out_ecount(cchMax) LPSTR pszOut, // A string buffer with cch elements +* // that will be '\0' terminated +* // on exit. +* LPCSTR pszSrc, // No annotation required, +* // const implies __in. +* UINT cchMax, +* DWORD dwFlags +* ); +* +* HRESULT SHLocalAllocBytes( +* size_t cb, +* __deref_bcount(cb) T **ppv // A pointer whose dereference will be set +* // to an uninitialized buffer with cb bytes. +* ); +* +* __inout_bcount_full(cb) : A buffer with cb elements that is fully +* initialized at entry and exit, and may be written to by this function. +* +* __out_ecount_part(count, *countOut) : A buffer with count elements that +* will be partially initialized by this function. The function indicates how +* much it initialized by setting *countOut. +* +************************************************************************/ + +#if (_MSC_VER >= 1400) && !defined(__midl) && !defined(_PREFAST_) && (__SPECSTRINGS_STRICT_LEVEL > 0) +#pragma once +#include +#define __ecount(size) _SAL_VERSION_CHECK(__ecount) +#define __bcount(size) _SAL_VERSION_CHECK(__bcount) +#define __xcount(size) _SAL_VERSION_CHECK(__xcount) +#define __in _SAL_VERSION_CHECK(__in) +#define __in_ecount(size) _SAL_VERSION_CHECK(__in_ecount) +#define __in_bcount(size) _SAL_VERSION_CHECK(__in_bcount) +#define __in_xcount(size) _SAL_VERSION_CHECK(__in_xcount) +#define __in_z _SAL_VERSION_CHECK(__in_z) +#define __in_ecount_z(size) _SAL_VERSION_CHECK(__in_ecount_z) +#define __in_bcount_z(size) _SAL_VERSION_CHECK(__in_bcount_z) +#define __out _SAL_VERSION_CHECK(__out) +#define __out_ecount(size) _SAL_VERSION_CHECK(__out_ecount) +#define __out_bcount(size) _SAL_VERSION_CHECK(__out_bcount) +#define __out_xcount(size) _SAL_VERSION_CHECK(__out_xcount) +#define __out_ecount_part(size,len) _SAL_VERSION_CHECK(__out_ecount_part) +#define __out_bcount_part(size,len) _SAL_VERSION_CHECK(__out_bcount_part) +#define __out_xcount_part(size,len) _SAL_VERSION_CHECK(__out_xcount_part) +#define __out_ecount_full(size) _SAL_VERSION_CHECK(__out_ecount_full) +#define __out_bcount_full(size) _SAL_VERSION_CHECK(__out_bcount_full) +#define __out_xcount_full(size) _SAL_VERSION_CHECK(__out_xcount_full) +#define __out_z _SAL_VERSION_CHECK(__out_z) +#define __out_ecount_z(size) _SAL_VERSION_CHECK(__out_ecount_z) +#define __out_bcount_z(size) _SAL_VERSION_CHECK(__out_bcount_z) +#define __inout _SAL_VERSION_CHECK(__inout) +#define __inout_ecount(size) _SAL_VERSION_CHECK(__inout_ecount) +#define __inout_bcount(size) _SAL_VERSION_CHECK(__inout_bcount) +#define __inout_xcount(size) _SAL_VERSION_CHECK(__inout_xcount) +#define __inout_ecount_part(size,len) _SAL_VERSION_CHECK(__inout_ecount_part) +#define __inout_bcount_part(size,len) _SAL_VERSION_CHECK(__inout_bcount_part) +#define __inout_xcount_part(size,len) _SAL_VERSION_CHECK(__inout_xcount_part) +#define __inout_ecount_full(size) _SAL_VERSION_CHECK(__inout_ecount_full) +#define __inout_bcount_full(size) _SAL_VERSION_CHECK(__inout_bcount_full) +#define __inout_xcount_full(size) _SAL_VERSION_CHECK(__inout_xcount_full) +#define __inout_z __allowed(on_parameter) +#define __inout_ecount_z(size) __allowed(on_parameter) +#define __inout_bcount_z(size) __allowed(on_parameter) +#define __ecount_opt(size) __allowed(on_parameter) +#define __bcount_opt(size) __allowed(on_parameter) +#define __xcount_opt(size) __allowed(on_parameter) +#define __in_opt _SAL_VERSION_CHECK(__in_opt) +#define __in_ecount_opt(size) _SAL_VERSION_CHECK(__in_ecount_opt) +#define __in_bcount_opt(size) _SAL_VERSION_CHECK(__in_bcount_opt) +#define __in_z_opt __allowed(on_parameter) +#define __in_ecount_z_opt(size) __allowed(on_parameter) +#define __in_bcount_z_opt(size) __allowed(on_parameter) +#define __in_xcount_opt(size) __allowed(on_parameter) +#define __out_opt _SAL_VERSION_CHECK(__out_opt) +#define __out_ecount_opt(size) _SAL_VERSION_CHECK(__out_ecount_opt) +#define __out_bcount_opt(size) _SAL_VERSION_CHECK(__out_bcount_opt) +#define __out_xcount_opt(size) __allowed(on_parameter) +#define __out_ecount_part_opt(size,len) __allowed(on_parameter) +#define __out_bcount_part_opt(size,len) __allowed(on_parameter) +#define __out_xcount_part_opt(size,len) __allowed(on_parameter) +#define __out_ecount_full_opt(size) __allowed(on_parameter) +#define __out_bcount_full_opt(size) __allowed(on_parameter) +#define __out_xcount_full_opt(size) __allowed(on_parameter) +#define __out_ecount_z_opt(size) __allowed(on_parameter) +#define __out_bcount_z_opt(size) __allowed(on_parameter) +#define __inout_opt _SAL_VERSION_CHECK(__inout_opt) +#define __inout_ecount_opt(size) _SAL_VERSION_CHECK(__inout_ecount_opt) +#define __inout_bcount_opt(size) _SAL_VERSION_CHECK(__inout_bcount_opt) +#define __inout_xcount_opt(size) _SAL_VERSION_CHECK(__inout_xcount_opt) +#define __inout_ecount_part_opt(size,len) _SAL_VERSION_CHECK(__inout_ecount_part_opt) +#define __inout_bcount_part_opt(size,len) _SAL_VERSION_CHECK(__inout_bcount_part_opt) +#define __inout_xcount_part_opt(size,len) _SAL_VERSION_CHECK(__inout_xcount_part_opt) +#define __inout_ecount_full_opt(size) _SAL_VERSION_CHECK(__inout_ecount_full_opt) +#define __inout_bcount_full_opt(size) _SAL_VERSION_CHECK(__inout_bcount_full_opt) +#define __inout_xcount_full_opt(size) _SAL_VERSION_CHECK(__inout_xcount_full_opt) +#define __inout_z_opt __allowed(on_parameter) +#define __inout_ecount_z_opt(size) __allowed(on_parameter) +#define __inout_ecount_z_opt(size) __allowed(on_parameter) +#define __inout_bcount_z_opt(size) __allowed(on_parameter) +#define __deref_ecount(size) __allowed(on_parameter) +#define __deref_bcount(size) __allowed(on_parameter) +#define __deref_xcount(size) __allowed(on_parameter) +#define __deref_in _SAL_VERSION_CHECK(__deref_in) +#define __deref_in_ecount(size) _SAL_VERSION_CHECK(__deref_in_ecount) +#define __deref_in_bcount(size) _SAL_VERSION_CHECK(__deref_in_bcount) +#define __deref_in_xcount(size) _SAL_VERSION_CHECK(__deref_in_xcount) +#define __deref_out _SAL_VERSION_CHECK(__deref_out) +#define __deref_out_ecount(size) _SAL_VERSION_CHECK(__deref_out_ecount) +#define __deref_out_bcount(size) _SAL_VERSION_CHECK(__deref_out_bcount) +#define __deref_out_xcount(size) _SAL_VERSION_CHECK(__deref_out_xcount) +#define __deref_out_ecount_part(size,len) _SAL_VERSION_CHECK(__deref_out_ecount_part) +#define __deref_out_bcount_part(size,len) _SAL_VERSION_CHECK(__deref_out_bcount_part) +#define __deref_out_xcount_part(size,len) _SAL_VERSION_CHECK(__deref_out_xcount_part) +#define __deref_out_ecount_full(size) _SAL_VERSION_CHECK(__deref_out_ecount_full) +#define __deref_out_bcount_full(size) _SAL_VERSION_CHECK(__deref_out_bcount_full) +#define __deref_out_xcount_full(size) _SAL_VERSION_CHECK(__deref_out_xcount_full) +#define __deref_out_z __allowed(on_parameter) +#define __deref_out_ecount_z(size) __allowed(on_parameter) +#define __deref_out_bcount_z(size) __allowed(on_parameter) +#define __deref_inout _SAL_VERSION_CHECK(__deref_inout) +#define __deref_inout_ecount(size) _SAL_VERSION_CHECK(__deref_inout_ecount) +#define __deref_inout_bcount(size) _SAL_VERSION_CHECK(__deref_inout_bcount) +#define __deref_inout_xcount(size) _SAL_VERSION_CHECK(__deref_inout_xcount) +#define __deref_inout_ecount_part(size,len) __allowed(on_parameter) +#define __deref_inout_bcount_part(size,len) __allowed(on_parameter) +#define __deref_inout_xcount_part(size,len) __allowed(on_parameter) +#define __deref_inout_ecount_full(size) __allowed(on_parameter) +#define __deref_inout_bcount_full(size) __allowed(on_parameter) +#define __deref_inout_xcount_full(size) __allowed(on_parameter) +#define __deref_inout_z __allowed(on_parameter) +#define __deref_inout_ecount_z(size) __allowed(on_parameter) +#define __deref_inout_bcount_z(size) __allowed(on_parameter) +#define __deref_ecount_opt(size) __allowed(on_parameter) +#define __deref_bcount_opt(size) __allowed(on_parameter) +#define __deref_xcount_opt(size) __allowed(on_parameter) +#define __deref_in_opt __allowed(on_parameter) +#define __deref_in_opt_out __allowed(on_parameter) +#define __deref_in_ecount_opt(size) __allowed(on_parameter) +#define __deref_in_bcount_opt(size) __allowed(on_parameter) +#define __deref_in_xcount_opt(size) __allowed(on_parameter) +#define __deref_out_opt _SAL_VERSION_CHECK(__deref_out_opt) +#define __deref_out_ecount_opt(size) _SAL_VERSION_CHECK(__deref_out_ecount_opt) +#define __deref_out_bcount_opt(size) _SAL_VERSION_CHECK(__deref_out_bcount_opt) +#define __deref_out_xcount_opt(size) _SAL_VERSION_CHECK(__deref_out_xcount_opt) +#define __deref_out_ecount_part_opt(size,len) _SAL_VERSION_CHECK(__deref_out_ecount_part_opt) +#define __deref_out_bcount_part_opt(size,len) _SAL_VERSION_CHECK(__deref_out_bcount_part_opt) +#define __deref_out_xcount_part_opt(size,len) _SAL_VERSION_CHECK(__deref_out_xcount_part_opt) +#define __deref_out_ecount_full_opt(size) _SAL_VERSION_CHECK(__deref_out_ecount_full_opt) +#define __deref_out_bcount_full_opt(size) _SAL_VERSION_CHECK(__deref_out_bcount_full_opt) +#define __deref_out_xcount_full_opt(size) _SAL_VERSION_CHECK(__deref_out_xcount_full_opt) +#define __deref_out_z_opt __allowed(on_parameter) +#define __deref_out_ecount_z_opt(size) __allowed(on_parameter) +#define __deref_out_bcount_z_opt(size) __allowed(on_parameter) +#define __deref_inout_opt __allowed(on_parameter) +#define __deref_inout_ecount_opt(size) __allowed(on_parameter) +#define __deref_inout_bcount_opt(size) __allowed(on_parameter) +#define __deref_inout_xcount_opt(size) __allowed(on_parameter) +#define __deref_inout_ecount_part_opt(size,len) __allowed(on_parameter) +#define __deref_inout_bcount_part_opt(size,len) __allowed(on_parameter) +#define __deref_inout_xcount_part_opt(size,len) __allowed(on_parameter) +#define __deref_inout_ecount_full_opt(size) __allowed(on_parameter) +#define __deref_inout_bcount_full_opt(size) __allowed(on_parameter) +#define __deref_inout_xcount_full_opt(size) __allowed(on_parameter) +#define __deref_inout_z_opt __allowed(on_parameter) +#define __deref_inout_ecount_z_opt(size) __allowed(on_parameter) +#define __deref_inout_bcount_z_opt(size) __allowed(on_parameter) +#define __deref_opt_ecount(size) __allowed(on_parameter) +#define __deref_opt_bcount(size) __allowed(on_parameter) +#define __deref_opt_xcount(size) __allowed(on_parameter) +#define __deref_opt_in __allowed(on_parameter) +#define __deref_opt_in_ecount(size) __allowed(on_parameter) +#define __deref_opt_in_bcount(size) __allowed(on_parameter) +#define __deref_opt_in_xcount(size) __allowed(on_parameter) +#define __deref_opt_out _SAL_VERSION_CHECK(__deref_opt_out) +#define __deref_opt_out_ecount(size) _SAL_VERSION_CHECK(__deref_opt_out_ecount) +#define __deref_opt_out_bcount(size) _SAL_VERSION_CHECK(__deref_opt_out_bcount) +#define __deref_opt_out_xcount(size) _SAL_VERSION_CHECK(__deref_opt_out_xcount) +#define __deref_opt_out_ecount_part(size,len) __allowed(on_parameter) +#define __deref_opt_out_bcount_part(size,len) __allowed(on_parameter) +#define __deref_opt_out_xcount_part(size,len) __allowed(on_parameter) +#define __deref_opt_out_ecount_full(size) __allowed(on_parameter) +#define __deref_opt_out_bcount_full(size) __allowed(on_parameter) +#define __deref_opt_out_xcount_full(size) __allowed(on_parameter) +#define __deref_opt_inout __allowed(on_parameter) +#define __deref_opt_inout_ecount(size) __allowed(on_parameter) +#define __deref_opt_inout_bcount(size) __allowed(on_parameter) +#define __deref_opt_inout_xcount(size) __allowed(on_parameter) +#define __deref_opt_inout_ecount_part(size,len) __allowed(on_parameter) +#define __deref_opt_inout_bcount_part(size,len) __allowed(on_parameter) +#define __deref_opt_inout_xcount_part(size,len) __allowed(on_parameter) +#define __deref_opt_inout_ecount_full(size) __allowed(on_parameter) +#define __deref_opt_inout_bcount_full(size) __allowed(on_parameter) +#define __deref_opt_inout_xcount_full(size) __allowed(on_parameter) +#define __deref_opt_inout_z __allowed(on_parameter) +#define __deref_opt_inout_ecount_z(size) __allowed(on_parameter) +#define __deref_opt_inout_bcount_z(size) __allowed(on_parameter) +#define __deref_opt_ecount_opt(size) __allowed(on_parameter) +#define __deref_opt_bcount_opt(size) __allowed(on_parameter) +#define __deref_opt_xcount_opt(size) __allowed(on_parameter) +#define __deref_opt_in_opt __allowed(on_parameter) +#define __deref_opt_in_ecount_opt(size) __allowed(on_parameter) +#define __deref_opt_in_bcount_opt(size) __allowed(on_parameter) +#define __deref_opt_in_xcount_opt(size) __allowed(on_parameter) +#define __deref_opt_out_opt __allowed(on_parameter) +#define __deref_opt_out_ecount_opt(size) __allowed(on_parameter) +#define __deref_opt_out_bcount_opt(size) __allowed(on_parameter) +#define __deref_opt_out_xcount_opt(size) __allowed(on_parameter) +#define __deref_opt_out_ecount_part_opt(size,len) __allowed(on_parameter) +#define __deref_opt_out_bcount_part_opt(size,len) __allowed(on_parameter) +#define __deref_opt_out_xcount_part_opt(size,len) __allowed(on_parameter) +#define __deref_opt_out_ecount_full_opt(size) __allowed(on_parameter) +#define __deref_opt_out_bcount_full_opt(size) __allowed(on_parameter) +#define __deref_opt_out_xcount_full_opt(size) __allowed(on_parameter) +#define __deref_opt_out_z_opt __allowed(on_parameter) +#define __deref_opt_out_ecount_z_opt(size) __allowed(on_parameter) +#define __deref_opt_out_bcount_z_opt(size) __allowed(on_parameter) +#define __deref_opt_inout_opt __allowed(on_parameter) +#define __deref_opt_inout_ecount_opt(size) __allowed(on_parameter) +#define __deref_opt_inout_bcount_opt(size) __allowed(on_parameter) +#define __deref_opt_inout_xcount_opt(size) __allowed(on_parameter) +#define __deref_opt_inout_ecount_part_opt(size,len) __allowed(on_parameter) +#define __deref_opt_inout_bcount_part_opt(size,len) __allowed(on_parameter) +#define __deref_opt_inout_xcount_part_opt(size,len) __allowed(on_parameter) +#define __deref_opt_inout_ecount_full_opt(size) __allowed(on_parameter) +#define __deref_opt_inout_bcount_full_opt(size) __allowed(on_parameter) +#define __deref_opt_inout_xcount_full_opt(size) __allowed(on_parameter) +#define __deref_opt_inout_z_opt __allowed(on_parameter) +#define __deref_opt_inout_ecount_z_opt(size) __allowed(on_parameter) +#define __deref_opt_inout_bcount_z_opt(size) __allowed(on_parameter) +#define __deref_in_ecount_iterator(size,incr) __allowed(on_parameter) +#define __deref_out_ecount_iterator(size,incr) __allowed(on_parameter) +#define __deref_inout_ecount_iterator(size,incr) __allowed(on_parameter) +#define __deref_realloc_bcount(insize,outsize) __allowed(on_parameter) + +/************************************************************************ +* SAL 2 _Ouptr_ family of annotations +************************************************************************/ + +#define _Outptr_ __allowed(on_parameter) +#define _Outptr_result_maybenull_ __allowed(on_parameter) +#define _Outptr_opt_ __allowed(on_parameter) +#define _Outptr_opt_result_maybenull_ __allowed(on_parameter) +#define _Outptr_result_z_ __allowed(on_parameter) +#define _Outptr_opt_result_z_ __allowed(on_parameter) +#define _Outptr_result_maybenull_z_ __allowed(on_parameter) +#define _Outptr_opt_result_maybenull_z_ __allowed(on_parameter) +#define _Outptr_result_nullonfailure_ __allowed(on_parameter) +#define _Outptr_opt_result_nullonfailure_ __allowed(on_parameter) +#define _COM_Outptr_ __allowed(on_parameter) +#define _COM_Outptr_result_maybenull_ __allowed(on_parameter) +#define _COM_Outptr_opt_ __allowed(on_parameter) +#define _COM_Outptr_opt_result_maybenull_ __allowed(on_parameter) +#define _Outptr_result_buffer_(size) __allowed(on_parameter) +#define _Outptr_opt_result_buffer_(size) __allowed(on_parameter) +#define _Outptr_result_buffer_to_(size, count) __allowed(on_parameter) +#define _Outptr_opt_result_buffer_to_(size, count) __allowed(on_parameter) +#define _Outptr_result_buffer_all_(size) __allowed(on_parameter) +#define _Outptr_opt_result_buffer_all_(size) __allowed(on_parameter) +#define _Outptr_result_buffer_maybenull_(size) __allowed(on_parameter) +#define _Outptr_opt_result_buffer_maybenull_(size) __allowed(on_parameter) +#define _Outptr_result_buffer_to_maybenull_(size, count) __allowed(on_parameter) +#define _Outptr_opt_result_buffer_to_maybenull_(size, count) __allowed(on_parameter) +#define _Outptr_result_buffer_all_maybenull_(size) __allowed(on_parameter) +#define _Outptr_opt_result_buffer_all_maybenull_(size) __allowed(on_parameter) +#define _Outptr_result_bytebuffer_(size) __allowed(on_parameter) +#define _Outptr_opt_result_bytebuffer_(size) __allowed(on_parameter) +#define _Outptr_result_bytebuffer_to_(size, count) __allowed(on_parameter) +#define _Outptr_opt_result_bytebuffer_to_(size, count) __allowed(on_parameter) +#define _Outptr_result_bytebuffer_all_(size) __allowed(on_parameter) +#define _Outptr_opt_result_bytebuffer_all_(size) __allowed(on_parameter) +#define _Outptr_result_bytebuffer_maybenull_(size) __allowed(on_parameter) +#define _Outptr_opt_result_bytebuffer_maybenull_(size) __allowed(on_parameter) +#define _Outptr_result_bytebuffer_to_maybenull_(size, count) __allowed(on_parameter) +#define _Outptr_opt_result_bytebuffer_to_maybenull_(size, count) __allowed(on_parameter) +#define _Outptr_result_bytebuffer_all_maybenull_(size) __allowed(on_parameter) +#define _Outptr_opt_result_bytebuffer_all_maybenull_(size) __allowed(on_parameter) + +/************************************************************************ +* Orcas SAL +************************************************************************/ +#define _Deref_out_ _SAL_VERSION_CHECK(_Deref_out_) +#define _Deref_out_opt_ _SAL_VERSION_CHECK(_Deref_out_opt_) +#define _Deref_opt_out_ _SAL_VERSION_CHECK(_Deref_opt_out_) +#define _Deref_opt_out_opt_ _SAL_VERSION_CHECK(_Deref_opt_out_opt_) +#define _In_count_(size) _SAL_VERSION_CHECK(_In_count_) +#define _In_opt_count_(size) _SAL_VERSION_CHECK(_In_opt_count_) +#define _In_bytecount_(size) _SAL_VERSION_CHECK(_In_bytecount_) +#define _In_opt_bytecount_(size) _SAL_VERSION_CHECK(_In_opt_bytecount_) +#define _Out_cap_(size) _SAL_VERSION_CHECK(_Out_cap_) +#define _Out_opt_cap_(size) _SAL_VERSION_CHECK(_Out_opt_cap_) +#define _Out_bytecap_(size) _SAL_VERSION_CHECK(_Out_bytecap_) +#define _Out_opt_bytecap_(size) _SAL_VERSION_CHECK(_Out_opt_bytecap_) +#define _Deref_post_count_(size) _SAL_VERSION_CHECK(_Deref_post_count_) +#define _Deref_post_opt_count_(size) _SAL_VERSION_CHECK(_Deref_post_opt_count_) +#define _Deref_post_bytecount_(size) _SAL_VERSION_CHECK(_Deref_post_bytecount_) +#define _Deref_post_opt_bytecount_(size) _SAL_VERSION_CHECK(_Deref_post_opt_bytecount_) +#define _Deref_post_cap_(size) _SAL_VERSION_CHECK(_Deref_post_cap_) +#define _Deref_post_opt_cap_(size) _SAL_VERSION_CHECK(_Deref_post_opt_cap_) +#define _Deref_post_bytecap_(size) _SAL_VERSION_CHECK(_Deref_post_bytecap_) +#define _Deref_post_opt_bytecap_(size) _SAL_VERSION_CHECK(_Deref_post_opt_bytecap_) + +/************************************************************************ +* Advanced Annotations +* +* Advanced annotations describe behavior that is not expressible with the +* regular buffer macros. These may be used either to annotate buffer +* parameters that involve complex or conditional behavior, or to enrich +* existing annotations with additional information. +* +* _At_(expr, annotes) : annotation list annotes applies to target 'expr' +* +* _When_(expr, annotes) : annotation list annotes applies when 'expr' is true +* +* __success(expr) T f() : indicates whether function f succeeded or +* not. If is true at exit, all the function's guarantees (as given +* by other annotations) must hold. If is false at exit, the caller +* should not expect any of the function's guarantees to hold. If not used, +* the function must always satisfy its guarantees. Added automatically to +* functions that indicate success in standard ways, such as by returning an +* HRESULT. +* +* __out_awcount(expr, size) T *p : Pointer p is a buffer whose size may be +* given in either bytes or elements. If is true, this acts like +* __out_bcount. If is false, this acts like __out_ecount. This +* should only be used to annotate old APIs. +* +* __in_awcount(expr, size) T* p : Pointer p is a buffer whose size may be given +* in either bytes or elements. If is true, this acts like +* __in_bcount. If is false, this acts like __in_ecount. This should +* only be used to annotate old APIs. +* +* __nullterminated T* p : Pointer p is a buffer that may be read or written +* up to and including the first '\0' character or pointer. May be used on +* typedefs, which marks valid (properly initialized) instances of that type +* as being null-terminated. +* +* __nullnullterminated T* p : Pointer p is a buffer that may be read or +* written up to and including the first sequence of two '\0' characters or +* pointers. May be used on typedefs, which marks valid instances of that +* type as being double-null terminated. +* +* __reserved T v : Value v must be 0/NULL, reserved for future use. +* +* __checkReturn T f(); : Return value of f must not be ignored by callers +* of this function. +* +* __typefix(ctype) T v : Value v should be treated as an instance of ctype, +* rather than its declared type when considering validity. +* +* __override T f(); : Specify C#-style 'override' behaviour for overriding +* virtual methods. +* +* __callback T f(); : Function f can be used as a function pointer. +* +* __format_string T p : Pointer p is a string that contains % markers in +* the style of printf. +* +* __blocksOn(resource) f(); : Function f blocks on the resource 'resource'. +* +* __fallthrough : Annotates switch statement labels where fall-through is +* desired, to distinguish from forgotten break statements. +* +* __range(low_bnd, up_bnd) int f(): The return from the function "f" must +* be in the inclusive numeric range [low_bnd, up_bnd]. +* +* __in_range(low_bnd, up_bnd) int i : Precondition that integer i must be +* in the inclusive numeric range [low_bnd, up_bnd]. +* +* __out_range(low_bnd, up_bnd) int i : Postcondition that integer i must be +* in the inclusive numeric range [low_bnd, up_bnd]. +* +* __deref_in_range(low_bnd, up_bnd) int* pi : Precondition that integer *pi +* must be in the inclusive numeric range [low_bnd, up_bnd]. +* +* __deref_out_range(low_bnd, up_bnd) int* pi : Postcondition that integer +* *pi must be in the inclusive numeric range [low_bnd, up_bnd]. +* +* __deref_inout_range(low_bnd, up_bnd) int* pi : Invariant that the integer +* *pi must be in the inclusive numeric range [low_bnd, up_bnd]. +* +* The first argument of a range macro may also be a C relational operator +* (<,>,!=, ==, <=, >=). +* +* __range(rel_op, j) int f(): Postcondition that "f() rel_op j" must be +* true. Note that j may be a expression known only at runtime. +* +* __in_range(rel_op, j) int i : Precondition that "i rel_op j" must be +* true. Note that j may be a expression known only at runtime. +* +* __out_range(rel_op, j) int i : Postcondition that integer "i rel_op j" +* must be true. Note that j may be a expression known only at runtime. +* +* __deref_in_range(rel_op, j) int *pi : Precondition that "*pi rel_op j" +* must be true. Note that j may be a expression known only at runtime. +* +* __deref_out_range(rel_op, j) int *pi : Postcondition that "*pi rel_op j" +* must be true. Note that j may be a expression known only at runtime. +* +* __deref_inout_range(rel_op, j) int *pi : Invariant that "*pi rel_op j" +* must be true. Note that j may be a expression known only at runtime. +* +* __range_max(a, b) int f(): Postcondition f acts as 'max', returns larger +* of a and b. Note that a and b may be expressions known only at runtime. +* +* __range_min(a, b) int f(): Postcondition f acts as 'min', returns smaller +* of a and b. Note that a and b may be expressions known only at runtime. +* +* __in_bound int i : Precondition that integer i must be bound, but the +* exact range can't be specified at compile time. __in_range should be +* used if the range can be explicitly stated. +* +* __out_bound int i : Postcondition that integer i must be bound, but the +* exact range can't be specified at compile time. __out_range should be +* used if the range can be explicitly stated. +* +* __deref_out_bound int pi : Postcondition that integer *pi must be bound, +* but the exact range can't be specified at compile time. +* __deref_out_range should be used if the range can be explicitly stated. +* +* __assume_bound(expr); : Assume that the expression is bound to some known +* range. This can be used to suppress integer overflow warnings on integral +* expressions that are known to be bound due to reasons not explicit in the +* code. Use as a statement in the body of a function. +* +* __analysis_assume_nulltermianted(expr); : Assume that the expression is +* a null terminated buffer. Use this to suppress tool noise specific to +* nulltermination warnings, and capture deeper invariants tools can not +* discover. +* +* __allocator void f(): Function allocates memory using an integral size +* argument +* +* void myfree(__deallocate(Mem) void *p) : Memory is freed, no longer usable +* upon return, and p may not be null. +* +* void myfree(__deallocate_opt(Mem) void *p) : Memory is freed, no longer +* usable upon return, and p may be null. +* +* void free(__post_invalid void* x): Mark memory as untouchable when +* function returns. +* +* ---------------------------------------------------------------------------- +* Advanced Annotation Examples +* +* __success(return == TRUE) LWSTDAPI_(BOOL) +* PathCanonicalizeA(__out_ecount(MAX_PATH) LPSTR pszBuf, LPCSTR pszPath); +* // pszBuf is only guaranteed to be null-terminated when TRUE is returned. +* +* // Initialized LPWSTRs are null-terminated strings. +* typedef __nullterminated WCHAR* LPWSTR; +* +* __out_ecount(cch) __typefix(LPWSTR) void *psz; +* // psz is a buffer parameter which will be a null-terminated WCHAR string +* // at exit, and which initially contains cch WCHARs. +* +************************************************************************/ +#define _At_(expr, annotes) __allowed(on_parameter_or_return) +#define _When_(expr, annotes) __allowed(on_parameter_or_return) +#define __success(expr) _SAL_VERSION_CHECK(__success) +#define __out_awcount(expr,size) __allowed(on_parameter) +#define __in_awcount(expr,size) __allowed(on_parameter) +#define __nullterminated _SAL_VERSION_CHECK(__nullterminated) +#define __nullnullterminated _SAL_VERSION_CHECK(__nullnullterminated) +#define __reserved _SAL_VERSION_CHECK(__reserved) +#define __checkReturn _SAL_VERSION_CHECK(__checkReturn) +#define __typefix(ctype) __allowed(on_parameter_or_return) +#define __override __allowed(on_function) +#define __callback __allowed(on_function) +#define __format_string __allowed(on_parameter_or_return) +#define __blocksOn(resource) __allowed(on_function) +#define __fallthrough __allowed(as_statement) +#define __range(lb,ub) __allowed(on_return) +#define __in_range(lb,ub) _SAL_VERSION_CHECK(__in_range) +#define __out_range(lb,ub) _SAL_VERSION_CHECK(__out_range) +#define __deref_in_range(lb,ub) __allowed(on_parameter) +#define __deref_out_range(lb,ub) _SAL_VERSION_CHECK(__deref_out_range) +#define __deref_inout_range(lb,ub) __allowed(on_parameter) +#define __field_range(lb,ub) _SAL_VERSION_CHECK(__field_range) +#define __range_max(a,b) __allowed(on_return) +#define __range_min(a,b) __allowed(on_return) +#define __bound __allowed(on_return) +#define __in_bound __allowed(on_parameter) +#define __out_bound __allowed(on_parameter) +#define __deref_out_bound __allowed(on_parameter) +#define __assume_bound(i) __allowed(as_statement_with_arg(i)) +#define __analysis_assume_nullterminated(x) \ + __allowed(as_statement_with_arg(x)) +#define __allocator __allowed(on_function) +#define __deallocate(kind) __allowed(on_parameter) +#define __deallocate_opt(kind) __allowed(on_parameter) +#define __post_invalid __allowed(on_parameter_or_return) +#define __post_nullnullterminated \ + __allowed(on_parameter_or_return) +/*************************************************************************** +* Expert Macros +***************************************************************************/ +#define __null __allowed(on_typedecl) +#define __notnull __allowed(on_typedecl) +#define __maybenull __allowed(on_typedecl) +#define __exceptthat __allowed(on_typedecl) +/*************************************************************************** +* Macros to classify fields of structures. +* Structure Annotations +* +* The buffer annotations are a convenient way of describing +* relationships between buffers and their size on a function by +* function basis. Very often struct or class data members have similar +* invariants, which can be expressed directly on the type. +* +* Similar to our buffer annotations we can summarize all the various +* structure annotations by one choosing an element from each column of +* this table to build a composite annotation. +* +* +--------------------------------------------------+ +* | Selector | Units | Size/Init | Optional | +* |----------+---------+------------------+----------| +* | __field | _ecount | (size) | empty | +* |----------+---------+------------------+----------| +* | __struct | _bcount | _full(size) | _opt | +* |----------+---------+------------------+----------| +* | | _xcount | _part(size,init) | | +* +--------------------------------------------------+ +* +* Note that empty represents the empty string. Sometime arguments need +* to be "floated" to the left to give us a valid annotation name. For +* example the naive combination __field_ecount(size)_opt is actually +* written as __field_ecount_opt(size). Not all possible combinations +* are currently supported or sensible. See specstrings_strict.h for +* the currently supported set. Those that are supported are documented +* below. +* +*Summary of Elements +* +* Selector +* +* __field +* The annotation should only be placed in front +* of data members of structures and classes. The +* data members are pointers to a block of data. +* The annotations describe properties about the +* size of the block of data. This can be used for +* +* __struct +* The annotation should only be placed at the +* beginning of the definition of a structure or +* class. These annotations are used when a struct +* or class is used as a "header" that is +* allocated inline with a block of data and there +* is no apparent field that represents the tail +* end of the structure. +* +* Units +* +* _ecount +* All size and initialization values are in terms +* of elements of the appropriate type +* +* _bcount +* All size and initialization values are in terms +* of raw byte sizes. +* +* _xcount +* The size or initialization values cannot be +* properly expressed as a simple byte or element +* count, and instead a place holder is used to +* document the relationship. +* +* Size/Init +* All the size/init expressions can contain references to +* other fields in the struct or class. +* +* (size) +* The size of the buffer is determined by the +* expression size. Unless, the type of the buffer +* provides more information nothing is know about +* how much of this data is initialized. For +* example, if the data member happens to be a +* string type such as LPSTR. It is assumed that +* the data is initialized to the first '\0'. +* +* _full(size) +* The size of the buffer is determined by the +* expression size and all the data in the buffer +* is guaranteed to be initialized. +* +* _part(size,init) +* The size of the buffer is determined by the +* expression size and all the data in the buffer +* is guaranteed to be initialized up to init +* elements or bytes. +* +* Optional +* +* empty +* The pointer to the block of memory is never +* NULL +* +* _opt +* The pointer to the block of memory is may be +* NULL +* +* +* // Basic Usage of Struct Annotations +* #include +* #include +* struct buf_s { +* int sz; +* __field_bcount_full(sz) +* char *buf; +* }; +* void InitBuf(__out struct *buf_s b,int sz) { +* b->buf = calloc(sz,sizeof(char)); +* b->sz = sz; +* } +* void WriteBuf(__in FILE *fp,__in struct *buf_s b) { +* fwrite(b->buf,b->sz,sizeof(char),fp); +* } +* void ReadBuf(__in FILE *fp,__inout struct *buf_s b) { +* fread(b->buf,b->sz,sizeof(char),fp); +* } +* +* +* +* // Inline Allocated Buffer +* struct buf_s { +* int sz; +* __field_bcount(sz) +* char buf[1]; +* }; +* void WriteBuf(__in FILE *fp,__in struct *buf_s b) { +* fwrite(&(b->buf),b->sz,sizeof(char),fp); +* } +* void ReadBuf(__in FILE *fp,__inout struct *buf_s b) { +* fread(&(b->buf),b->sz,sizeof(char),fp); +* } +* +* +* +* // Embedded Header Structure +* __struct_bcount(sz) +* struct buf_s { +* int sz; +* }; +* void WriteBuf(__in FILE *fp,__in struct *buf_s b) { +* fwrite(&b,b->sz,sizeof(char),fp); +* } +* void ReadBuf(__in FILE *fp,__inout struct *buf_s b) { +* fread(&b,b->sz,sizeof(char),fp); +* } +* +* +****************************************************************************/ +#define __field_ecount(size) _SAL_VERSION_CHECK(__field_ecount) +#define __field_bcount(size) _SAL_VERSION_CHECK(__field_bcount) +#define __field_xcount(size) __allowed(on_field) +#define __field_ecount_opt(size) __allowed(on_field) +#define __field_bcount_opt(size) __allowed(on_field) +#define __field_xcount_opt(size) __allowed(on_field) +#define __field_ecount_part(size,init) __allowed(on_field) +#define __field_bcount_part(size,init) __allowed(on_field) +#define __field_xcount_part(size,init) __allowed(on_field) +#define __field_ecount_part_opt(size,init) __allowed(on_field) +#define __field_bcount_part_opt(size,init) __allowed(on_field) +#define __field_xcount_part_opt(size,init) __allowed(on_field) +#define __field_ecount_full(size) __allowed(on_field) +#define __field_bcount_full(size) __allowed(on_field) +#define __field_xcount_full(size) __allowed(on_field) +#define __field_ecount_full_opt(size) __allowed(on_field) +#define __field_bcount_full_opt(size) __allowed(on_field) +#define __field_xcount_full_opt(size) __allowed(on_field) +#define __field_nullterminated __allowed(on_field) +#define __struct_bcount(size) __allowed(on_struct) +#define __struct_xcount(size) __allowed(on_struct) + +/*************************************************************************** +* Macros to classify the entrypoints and indicate their category. +* +* Pre-defined control point categories include: RPC, KERNEL, GDI. +* +* Pre-defined control point macros include: +* __rpc_entry, __kernel_entry, __gdi_entry. +***************************************************************************/ +#define __control_entrypoint(category) __allowed(on_function) +#define __rpc_entry __allowed(on_function) +#define __kernel_entry __allowed(on_function) +#define __gdi_entry __allowed(on_function) + +/*************************************************************************** +* Macros to track untrusted data and their validation. The list of untrusted +* sources include: +* +* FILE - File reading stream or API +* NETWORK - Socket readers +* INTERNET - WinInet and WinHttp readers +* USER_REGISTRY - HKCU portions of the registry +* USER_MODE - Parameters to kernel entry points +* RPC - Parameters to RPC entry points +* DRIVER - Device driver +***************************************************************************/ +#define __in_data_source(src_sym) __allowed(on_parameter) +#define __out_data_source(src_sym) __allowed(on_parameter) +#define __field_data_source(src_sym) __allowed(on_field) +#define __this_out_data_source(src_syn) __allowed(on_function) + +/************************************************************************** +* Macros to tag file parsing code. Predefined formats include: +* PNG - Portable Network Graphics +* JPEG - Joint Photographic Experts Group +* BMP - Bitmap +* RC_BMP - Resource bitmap +* WMF - Windows Metafile +* EMF - Windows Enhanced Metafile +* GIF - Graphics Interchange Format +* MIME_TYPE - MIME type from header tokens +* MAIL_MONIKER - MAIL information refered by URL moniker +* HTML - HyperText Markup Language +* WMPHOTO - Windows media photo +* OE_VCARD - Outlook Express virtual card +* OE_CONTACT - Outlook Express contact +* MIDI - Musical Instrument Digital Interface +* LDIF - LDAP Data Interchange Format +* AVI - Audio Visual Interchange +* ACM - Audio Compression Manager +**************************************************************************/ +#define __out_validated(filetype_sym) __allowed(on_parameter) +#define __this_out_validated(filetype_sym) __allowed(on_function) +#define __file_parser(filetype_sym) __allowed(on_function) +#define __file_parser_class(filetype_sym) __allowed(on_struct) +#define __file_parser_library(filetype_sym) __allowed(as_global_decl) + +/*************************************************************************** +* Macros to track the code content in the file. The type of code +* contents currently tracked: +* +* NDIS_DRIVER - NDIS Device driver +***************************************************************************/ +#define __source_code_content(codetype_sym) __allowed(as_global_decl) + +/*************************************************************************** +* Macros to track the code content in the class. The type of code +* contents currently tracked: +* +* DCOM - Class implementing DCOM +***************************************************************************/ +#define __class_code_content(codetype_sym) __allowed(on_struct) + +/************************************************************************* +* Macros to tag encoded function pointers +**************************************************************************/ +#define __encoded_pointer +#define __encoded_array +#define __field_encoded_pointer __allowed(on_field) +#define __field_encoded_array __allowed(on_field) + +#define __transfer(formal) __allowed(on_parameter_or_return) +#define __assume_validated(exp) __allowed(as_statement_with_arg(exp)) + +/************************************************************************* +* __analysis_assume(expr) : Expert macro use only when directed. Use this to +* tell static analysis tools like PREfix and PREfast about a non-coded +* assumption that you wish the tools to assume. The assumption will be +* understood by those tools. By default there is no dynamic checking or +* static checking of the assumption in any build. +* +* To obtain dynamic checking wrap this macro in your local version of a debug +* assert. +* Please do not put function calls in the expression because this is not +* supported by all tools: +* __analysis_assume(GetObject () != NULL); // DO NOT DO THIS +* +*************************************************************************/ +#define __analysis_assume(expr) __allowed(as_statement_with_arg(expr)) +#define __analysis_assert(expr) __allowed(as_statement_with_arg(expr)) + +/************************************************************************* +* __analysis_hint(hint_sym) : Expert macro use only when +* directed. Use this to influence certain analysis heuristics +* used by the tools. These hints do not describe the semantics +* of functions but simply direct the tools to act in a certain +* way. +* +* Current hints that are supported are: +* +* INLINE - inline this function during analysis overrides any +* default heuristics +* NOINLINE - do not inline this function during analysis overrides +* and default heuristics +*************************************************************************/ +#define __analysis_hint(hint) __allowed(on_function) + +/************************************************************************* +* Macros to encode abstract properties of values. Used by SALadt.h +*************************************************************************/ +#define __type_has_adt_prop(adt,prop) __allowed(on_typdecl) +#define __out_has_adt_prop(adt,prop) __allowed(on_parameter) +#define __out_not_has_adt_prop(adt,prop) __allowed(on_parameter) +#define __out_transfer_adt_prop(arg) __allowed(on_parameter) +#define __out_has_type_adt_props(typ) __allowed(on_parameter) + +/************************************************************************* +* Macros used by Prefast for Drivers +* +* __possibly_notnullterminated : +* +* Used for return values of parameters or functions that do not +* guarantee nulltermination in all cases. +* +*************************************************************************/ +#define __possibly_notnullterminated __allowed(on_parameter_or_return) + +/************************************************************************* +* Advanced macros +* +* __volatile +* The __volatile annotation identifies a global variable or +* structure field that: +* 1) is not declared volatile; +* 2) is accessed concurrently by multiple threads. +* +* The __deref_volatile annotation identifies a global variable +* or structure field that stores a pointer to some data that: +* 1) is not declared volatile; +* 2) is accessed concurrently by multiple threads. +* +* Prefast uses these annotations to find patterns of code that +* may result in unexpected re-fetching of the global variable +* into a local variable. +* +* We also provide two complimentary annotations __nonvolatile +* and __deref_nonvolatile that could be used to suppress Prefast +* +* re-fetching warnings on variables that are known either: +* 1) not to be in danger of being re-fetched or, +* 2) not to lead to incorrect results if they are re-fetched +* +*************************************************************************/ +#define __volatile __allowed(on_global_or_field) +#define __deref_volatile __allowed(on_global_or_field) +#define __nonvolatile __allowed(on_global_or_field) +#define __deref_nonvolatile __allowed(on_global_or_field) + +/************************************************************************* +* Macros deprecated with strict level greater then 1. +**************************************************************************/ +#if (__SPECSTRINGS_STRICT_LEVEL > 1) +/* Must come before macro defintions */ +#pragma deprecated(__in_nz) +#pragma deprecated(__in_ecount_nz) +#pragma deprecated(__in_bcount_nz) +#pragma deprecated(__out_nz) +#pragma deprecated(__out_nz_opt) +#pragma deprecated(__out_ecount_nz) +#pragma deprecated(__out_bcount_nz) +#pragma deprecated(__inout_nz) +#pragma deprecated(__inout_ecount_nz) +#pragma deprecated(__inout_bcount_nz) +#pragma deprecated(__in_nz_opt) +#pragma deprecated(__in_ecount_nz_opt) +#pragma deprecated(__in_bcount_nz_opt) +#pragma deprecated(__out_ecount_nz_opt) +#pragma deprecated(__out_bcount_nz_opt) +#pragma deprecated(__inout_nz_opt) +#pragma deprecated(__inout_ecount_nz_opt) +#pragma deprecated(__inout_bcount_nz_opt) +#pragma deprecated(__deref_out_nz) +#pragma deprecated(__deref_out_ecount_nz) +#pragma deprecated(__deref_out_bcount_nz) +#pragma deprecated(__deref_inout_nz) +#pragma deprecated(__deref_inout_ecount_nz) +#pragma deprecated(__deref_inout_bcount_nz) +#pragma deprecated(__deref_out_nz_opt) +#pragma deprecated(__deref_out_ecount_nz_opt) +#pragma deprecated(__deref_out_bcount_nz_opt) +#pragma deprecated(__deref_inout_nz_opt) +#pragma deprecated(__deref_inout_ecount_nz_opt) +#pragma deprecated(__deref_inout_bcount_nz_opt) +#pragma deprecated(__deref_opt_inout_nz) +#pragma deprecated(__deref_opt_inout_ecount_nz) +#pragma deprecated(__deref_opt_inout_bcount_nz) +#pragma deprecated(__deref_opt_out_nz_opt) +#pragma deprecated(__deref_opt_out_ecount_nz_opt) +#pragma deprecated(__deref_opt_out_bcount_nz_opt) +#pragma deprecated(__deref_opt_inout_nz_opt) +#pragma deprecated(__deref_opt_inout_ecount_nz_opt) +#pragma deprecated(__deref_opt_inout_bcount_nz_opt) +#pragma deprecated(__deref) +#pragma deprecated(__pre) +#pragma deprecated(__post) +#pragma deprecated(__readableTo) +#pragma deprecated(__writableTo) +#pragma deprecated(__maybevalid) +#pragma deprecated(__data_entrypoint) +#pragma deprecated(__inexpressible_readableTo) +#pragma deprecated(__readonly) +#pragma deprecated(__byte_writableTo) +#pragma deprecated(__byte_readableTo) +#pragma deprecated(__elem_readableTo) +#pragma deprecated(__elem_writableTo) +#pragma deprecated(__valid) +#pragma deprecated(__notvalid) +#pragma deprecated(__refparam) +#pragma deprecated(__precond) +#endif +/* Define soon to be deprecated macros to nops. */ +#define __in_nz +#define __in_ecount_nz(size) +#define __in_bcount_nz(size) +#define __out_nz +#define __out_nz_opt +#define __out_ecount_nz(size) +#define __out_bcount_nz(size) +#define __inout_nz +#define __inout_ecount_nz(size) +#define __inout_bcount_nz(size) +#define __in_nz_opt +#define __in_ecount_nz_opt(size) +#define __in_bcount_nz_opt(size) +#define __out_ecount_nz_opt(size) +#define __out_bcount_nz_opt(size) +#define __inout_nz_opt +#define __inout_ecount_nz_opt(size) +#define __inout_bcount_nz_opt(size) +#define __deref_out_nz +#define __deref_out_ecount_nz(size) +#define __deref_out_bcount_nz(size) +#define __deref_inout_nz +#define __deref_inout_ecount_nz(size) +#define __deref_inout_bcount_nz(size) +#define __deref_out_nz_opt +#define __deref_out_ecount_nz_opt(size) +#define __deref_out_bcount_nz_opt(size) +#define __deref_inout_nz_opt +#define __deref_inout_ecount_nz_opt(size) +#define __deref_inout_bcount_nz_opt(size) +#define __deref_opt_inout_nz +#define __deref_opt_inout_ecount_nz(size) +#define __deref_opt_inout_bcount_nz(size) +#define __deref_opt_out_nz_opt +#define __deref_opt_out_ecount_nz_opt(size) +#define __deref_opt_out_bcount_nz_opt(size) +#define __deref_opt_inout_nz_opt +#define __deref_opt_inout_ecount_nz_opt(size) +#define __deref_opt_inout_bcount_nz_opt(size) +#define __deref +#define __pre +#define __post +#define __readableTo(count) +#define __writableTo(count) +#define __maybevalid +#define __inexpressible_readableTo(string) +#define __data_entrypoint(category) +#define __readonly +#define __byte_writableTo(count) +#define __byte_readableTo(count) +#define __elem_readableTo(count) +#define __elem_writableTo(count) +#define __valid +#define __notvalid +#define __refparam +#define __precond(condition) + +/************************************************************************* +* Definitions to force a compile error when macros are used improperly. +* Relies on VS 2005 source annotations. +*************************************************************************/ +#if !defined(_MSC_EXTENSIONS) && !defined(_PREFAST_) && !defined(OACR) +#define __allowed(p) /* nothing */ +#else +#define __allowed(p) __$allowed_##p +#define __$allowed_as_global_decl /* empty */ +#define __$allowed_as_statement_with_arg(x) \ + __pragma(warning(push)) __pragma(warning(disable : 4548)) \ + do {__noop(x);} while((0,0) __pragma(warning(pop)) ) +#define __$allowed_as_statement __$allowed_as_statement_with_arg(1) + +/************************************************************************** +* This should go away. It's only for __success which we should split into. +* __success and __typdecl_sucess +***************************************************************************/ +#define __$allowed_on_function_or_typedecl /* empty */ +#if (__SPECSTRINGS_STRICT_LEVEL == 1) || (__SPECSTRINGS_STRICT_LEVEL == 2) +#define __$allowed_on_typedecl /* empty */ +#define __$allowed_on_return /* empty */ +#define __$allowed_on_parameter /* empty */ +#define __$allowed_on_function /* empty */ +#define __$allowed_on_struct /* empty */ +#define __$allowed_on_field /* empty */ +#define __$allowed_on_parameter_or_return /* empty */ +#define __$allowed_on_global_or_field /* empty */ +#elif __SPECSTRINGS_STRICT_LEVEL == 3 +#define __$allowed_on_typedecl /* empty */ +/* Define dummy source attributes. Still needs more testing */ +#define __$allowed_on_return [returnvalue: OnReturnOnly] +#define __$allowed_on_parameter [OnParameterOnly] +#define __$allowed_on_function [method: OnFunctionOnly] +#define __$allowed_on_struct [OnStructOnly] +#define __$allowed_on_field [OnFieldOnly] +#define __$allowed_on_parameter_or_return [OnParameterOrReturnOnly] +#define __$allowed_on_global_or_field /* empty */ +#pragma push_macro( "DECL_SA" ) +#pragma push_macro( "SA" ) +#ifdef __cplusplus +#define SA(x) x +#define DECL_SA(name,loc) \ + [repeatable] \ + [source_annotation_attribute( loc )] \ + struct name##Attribute { name##Attribute(); const char* ignored; }; +#else +#define SA(x) SA_##x +#define DECL_SA(name,loc) \ + [source_annotation_attribute( loc )] \ + struct name { const char* ignored; };\ + typedef struct name name; +#endif /* #endif __cplusplus */ +DECL_SA(OnParameterOnly,SA(Parameter)); +DECL_SA(OnReturnOnly,SA(ReturnValue)); +DECL_SA(OnFunctionOnly,SA(Method)); +DECL_SA(OnStructOnly,SA(Struct)); +DECL_SA(OnFieldOnly,SA(Field)); +DECL_SA(OnParameterOrReturnOnly,SA(Parameter) | SA(ReturnValue)); +#pragma pop_macro( "SA" ) +#pragma pop_macro( "DECL_SA" ) +#endif +#endif +#endif diff --git a/src/coreclr/src/nativeaot/Runtime/unix/unixasmmacros.inc b/src/coreclr/src/nativeaot/Runtime/unix/unixasmmacros.inc new file mode 100644 index 0000000000000..ef6d393fd248b --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/unix/unixasmmacros.inc @@ -0,0 +1,37 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#define INVALIDGCVALUE 0xCCCCCCCD + +#if defined(__APPLE__) +#define C_FUNC(name) _##name +#define EXTERNAL_C_FUNC(name) C_FUNC(name) +#define LOCAL_LABEL(name) L##name +#else +#define C_FUNC(name) name +#define EXTERNAL_C_FUNC(name) C_FUNC(name)@plt +#define LOCAL_LABEL(name) .L##name +#endif + +#if defined(__APPLE__) +#define C_PLTFUNC(name) _##name +#else +#define C_PLTFUNC(name) name@PLT +#endif + +.macro END_PROLOGUE +.endm + +.macro SETALIAS New, Old + .equiv \New, \Old +.endm + +#if defined(HOST_AMD64) +#include "unixasmmacrosamd64.inc" +#elif defined(HOST_ARM) +#include "unixasmmacrosarm.inc" +#elif defined(HOST_ARM64) +#include "unixasmmacrosarm64.inc" +#elif defined(HOST_X86) +#include "unixasmmacrosx86.inc" +#endif diff --git a/src/coreclr/src/nativeaot/Runtime/unix/unixasmmacrosamd64.inc b/src/coreclr/src/nativeaot/Runtime/unix/unixasmmacrosamd64.inc new file mode 100644 index 0000000000000..b45c31007f0f1 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/unix/unixasmmacrosamd64.inc @@ -0,0 +1,351 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#define C_VAR(Name) rip + C_FUNC(Name) + +.macro NESTED_ENTRY Name, Section, Handler + LEAF_ENTRY \Name, \Section + .ifnc \Handler, NoHandler +#if defined(__APPLE__) + .cfi_personality 0x9b, C_FUNC(\Handler) // 0x9b == DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4 +#else + .cfi_personality 0, C_FUNC(\Handler) // 0 == DW_EH_PE_absptr +#endif + .endif +.endm + +.macro NESTED_END Name, Section + LEAF_END \Name, \Section +#if defined(__APPLE__) + .set LOCAL_LABEL(\Name\()_Size), . - C_FUNC(\Name) + .section __LD,__compact_unwind,regular,debug + .quad C_FUNC(\Name) + .long LOCAL_LABEL(\Name\()_Size) + .long 0x04000000 # DWARF + .quad 0 + .quad 0 +#endif +.endm + +.macro PATCH_LABEL Name + .global C_FUNC(\Name) +C_FUNC(\Name): +.endm + +.macro ALTERNATE_ENTRY Name + .global C_FUNC(\Name) +C_FUNC(\Name): +.endm + +.macro LEAF_ENTRY Name, Section + .global C_FUNC(\Name) +#if defined(__APPLE__) + .text +#else + .global C_FUNC(_\Name) + .type \Name, %function +#endif +C_FUNC(\Name): + .cfi_startproc +.endm + +.macro LEAF_END Name, Section +#if !defined(__APPLE__) + .size \Name, .-\Name +#endif + .cfi_endproc +.endm + +.macro push_nonvol_reg Register + push \Register + .cfi_adjust_cfa_offset 8 + .cfi_rel_offset \Register, 0 +.endm + +.macro pop_nonvol_reg Register + pop \Register + .cfi_adjust_cfa_offset -8 + .cfi_restore \Register +.endm + +.macro alloc_stack Size +.att_syntax + lea -(\Size)(%rsp), %rsp +.intel_syntax noprefix + .cfi_adjust_cfa_offset (\Size) +.endm + +.macro free_stack Size +.att_syntax + lea (\Size)(%rsp), %rsp +.intel_syntax noprefix + .cfi_adjust_cfa_offset -(\Size) +.endm + +.macro set_cfa_register Reg, Offset + .cfi_def_cfa_register \Reg + .cfi_def_cfa_offset \Offset +.endm + +.macro save_reg_postrsp Reg, Offset + __Offset = \Offset + mov qword ptr [rsp + __Offset], \Reg + .cfi_rel_offset \Reg, __Offset +.endm + +.macro restore_reg Reg, Offset + __Offset = \Offset + mov \Reg, [rsp + __Offset] + .cfi_restore \Reg +.endm + +.macro save_xmm128_postrsp Reg, Offset + __Offset = \Offset + movdqa xmmword ptr [rsp + __Offset], \Reg + // NOTE: We cannot use ".cfi_rel_offset \Reg, __Offset" here, + // the xmm registers are not supported by the libunwind +.endm + +.macro restore_xmm128 Reg, ofs + __Offset = \ofs + movdqa \Reg, xmmword ptr [rsp + __Offset] + // NOTE: We cannot use ".cfi_restore \Reg" here, + // the xmm registers are not supported by the libunwind + +.endm + +.macro RESET_FRAME_WITH_RBP + + mov rsp, rbp + set_cfa_register rsp, 16 + pop_nonvol_reg rbp + .cfi_same_value rbp + +.endm + +.macro PUSH_CALLEE_SAVED_REGISTERS + + push_register rbp + push_register rbx + push_register r15 + push_register r14 + push_register r13 + push_register r12 + +.endm + +.macro POP_CALLEE_SAVED_REGISTERS + + pop_nonvol_reg r12 + pop_nonvol_reg r13 + pop_nonvol_reg r14 + pop_nonvol_reg r15 + pop_nonvol_reg rbx + pop_nonvol_reg rbp + +.endm + +.macro push_register Reg + push \Reg + .cfi_adjust_cfa_offset 8 +.endm + +.macro push_imm imm +.att_syntax + push $\imm +.intel_syntax noprefix + .cfi_adjust_cfa_offset 8 +.endm + +.macro push_eflags + pushfq + .cfi_adjust_cfa_offset 8 +.endm + +.macro push_argument_register Reg + push_register \Reg +.endm + +.macro PUSH_ARGUMENT_REGISTERS + + push_argument_register r9 + push_argument_register r8 + push_argument_register rcx + push_argument_register rdx + push_argument_register rsi + push_argument_register rdi + +.endm + +.macro pop_register Reg + pop \Reg + .cfi_adjust_cfa_offset -8 +.endm + +.macro pop_eflags + popfq + .cfi_adjust_cfa_offset -8 +.endm + +.macro pop_argument_register Reg + pop_register \Reg +.endm + +.macro POP_ARGUMENT_REGISTERS + + pop_argument_register rdi + pop_argument_register rsi + pop_argument_register rdx + pop_argument_register rcx + pop_argument_register r8 + pop_argument_register r9 + +.endm + +#define SIZEOF_FP_REGS 0x80 + +.macro SAVE_FLOAT_ARGUMENT_REGISTERS ofs + + save_xmm128_postrsp xmm0, \ofs + save_xmm128_postrsp xmm1, \ofs + 0x10 + save_xmm128_postrsp xmm2, \ofs + 0x20 + save_xmm128_postrsp xmm3, \ofs + 0x30 + save_xmm128_postrsp xmm4, \ofs + 0x40 + save_xmm128_postrsp xmm5, \ofs + 0x50 + save_xmm128_postrsp xmm6, \ofs + 0x60 + save_xmm128_postrsp xmm7, \ofs + 0x70 + +.endm + +.macro RESTORE_FLOAT_ARGUMENT_REGISTERS ofs + + restore_xmm128 xmm0, \ofs + restore_xmm128 xmm1, \ofs + 0x10 + restore_xmm128 xmm2, \ofs + 0x20 + restore_xmm128 xmm3, \ofs + 0x30 + restore_xmm128 xmm4, \ofs + 0x40 + restore_xmm128 xmm5, \ofs + 0x50 + restore_xmm128 xmm6, \ofs + 0x60 + restore_xmm128 xmm7, \ofs + 0x70 + +.endm + +.macro EXPORT_POINTER_TO_ADDRESS Name + +// NOTE: The label is intentionally left as 2 - otherwise on OSX 0b or 1b will be incorrectly interpreted as binary integers + +2: + + .data + .align 8 +C_FUNC(\Name): + .quad 2b + .global C_FUNC(\Name) + .text + +.endm + +// +// CONSTANTS -- INTEGER +// +#define TSF_Attached 0x01 +#define TSF_SuppressGcStress 0x08 +#define TSF_DoNotTriggerGc 0x10 + +// +// Rename fields of nested structs +// +#define OFFSETOF__Thread__m_alloc_context__alloc_ptr OFFSETOF__Thread__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr +#define OFFSETOF__Thread__m_alloc_context__alloc_limit OFFSETOF__Thread__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_limit + +// GC type flags +#define GC_ALLOC_FINALIZE 1 + +// Note: these must match the defs in PInvokeTransitionFrameFlags +#define PTFF_SAVE_RBX 0x00000001 +#define PTFF_SAVE_R12 0x00000010 +#define PTFF_SAVE_R13 0x00000020 +#define PTFF_SAVE_R14 0x00000040 +#define PTFF_SAVE_R15 0x00000080 +#define PTFF_SAVE_ALL_PRESERVED 0x000000F1 // NOTE: RBP is not included in this set! +#define PTFF_SAVE_RSP 0x00008000 +#define PTFF_SAVE_RAX 0x00000100 // RAX is saved if it contains a GC ref and we're in hijack handler +#define PTFF_SAVE_ALL_SCRATCH 0x00007F00 +#define PTFF_RAX_IS_GCREF 0x00010000 // iff PTFF_SAVE_RAX: set -> eax is Object, clear -> eax is scalar +#define PTFF_RAX_IS_BYREF 0x00020000 // iff PTFF_SAVE_RAX: set -> eax is ByRef, clear -> eax is Object or scalar +#define PTFF_THREAD_ABORT 0x00040000 // indicates that ThreadAbortException should be thrown when returning from the transition + +// These must match the TrapThreadsFlags enum +#define TrapThreadsFlags_None 0 +#define TrapThreadsFlags_AbortInProgress 1 +#define TrapThreadsFlags_TrapThreads 2 + +.macro INLINE_GET_TLS_VAR Var + .att_syntax +#if defined(__APPLE__) + movq _\Var@TLVP(%rip), %rdi + callq *(%rdi) +#else + leaq \Var@TLSLD(%rip), %rdi + callq __tls_get_addr@PLT + addq $\Var@DTPOFF, %rax +#endif + .intel_syntax noprefix +.endm + + +.macro INLINE_GETTHREAD + // Inlined version of call C_FUNC(RhpGetThread) + INLINE_GET_TLS_VAR tls_CurrentThread +.endm + +.macro INLINE_THREAD_UNHIJACK threadReg, trashReg1, trashReg2 + // + // Thread::Unhijack() + // + mov \trashReg1, [\threadReg + OFFSETOF__Thread__m_pvHijackedReturnAddress] + cmp \trashReg1, 0 + je 1f + + mov \trashReg2, [\threadReg + OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation] + mov [\trashReg2], \trashReg1 + mov qword ptr [\threadReg + OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation], 0 + mov qword ptr [\threadReg + OFFSETOF__Thread__m_pvHijackedReturnAddress], 0 + +1: +.endm + +DEFAULT_FRAME_SAVE_FLAGS = PTFF_SAVE_ALL_PRESERVED + PTFF_SAVE_RSP + +.macro PUSH_COOP_PINVOKE_FRAME trashReg + push_nonvol_reg rbp // push RBP frame + mov rbp, rsp + lea \trashReg, [rsp + 0x10] + push_register \trashReg // save caller's RSP + push_nonvol_reg r15 // save preserved registers + push_nonvol_reg r14 // .. + push_nonvol_reg r13 // .. + push_nonvol_reg r12 // .. + push_nonvol_reg rbx // .. + push_imm DEFAULT_FRAME_SAVE_FLAGS // save the register bitmask + push_register \trashReg // Thread * (unused by stackwalker) + mov \trashReg, [rsp + 8*8] // Find and save the callers RBP + push_register \trashReg + mov \trashReg, [rsp + 10*8] // Find and save the return address + push_register \trashReg + lea \trashReg, [rsp] // trashReg == address of frame +.endm + +.macro POP_COOP_PINVOKE_FRAME + pop_register r10 // discard RIP + pop_nonvol_reg rbp // restore RBP + pop_register r10 // discard thread + pop_register r10 // discard bitmask + pop_nonvol_reg rbx + pop_nonvol_reg r12 + pop_nonvol_reg r13 + pop_nonvol_reg r14 + pop_nonvol_reg r15 + pop_register r10 // discard caller RSP + pop_register r10 // discard RBP frame +.endm diff --git a/src/coreclr/src/nativeaot/Runtime/unix/unixasmmacrosarm.inc b/src/coreclr/src/nativeaot/Runtime/unix/unixasmmacrosarm.inc new file mode 100644 index 0000000000000..73a9968f3b8d6 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/unix/unixasmmacrosarm.inc @@ -0,0 +1,306 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// +// CONSTANTS -- INTEGER +// + +// GC type flags +#define GC_ALLOC_FINALIZE 1 +#define GC_ALLOC_ALIGN8_BIAS 4 +#define GC_ALLOC_ALIGN8 8 + +#define TSF_Attached 0x01 +#define TSF_SuppressGcStress 0x08 +#define TSF_DoNotTriggerGc 0x10 + +#define PTFF_SAVE_ALL_PRESERVED 0x0000007F // NOTE: R11 is not included in this set! +#define PTFF_SAVE_SP 0x00000100 +#define DEFAULT_FRAME_SAVE_FLAGS (PTFF_SAVE_ALL_PRESERVED + PTFF_SAVE_SP) + +// These must match the TrapThreadsFlags enum +#define TrapThreadsFlags_None 0 +#define TrapThreadsFlags_AbortInProgress 1 +#define TrapThreadsFlags_TrapThreads 2 + +// Rename fields of nested structs +#define OFFSETOF__Thread__m_alloc_context__alloc_ptr (OFFSETOF__Thread__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr) +#define OFFSETOF__Thread__m_alloc_context__alloc_limit (OFFSETOF__Thread__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_limit) + +// GC minimal sized object. We use this to switch between 4 and 8 byte alignment in the GC heap (see AllocFast.asm). +#define SIZEOF__MinObject 12 + +// Maximum subsection number in .text section +#define MAX_NUMBER_SUBSECTION_TEXT 0x2000 + +.macro NESTED_ENTRY Name, Section, Handler + LEAF_ENTRY \Name, \Section + .ifnc \Handler, NoHandler + .personality C_FUNC(\Handler) + .endif +.endm + +.macro NESTED_END Name, Section + LEAF_END \Name, \Section +.endm + +.macro PATCH_LABEL Name + .thumb_func + .global C_FUNC(\Name) +C_FUNC(\Name): +.endm + +.macro ALTERNATE_ENTRY Name + .global C_FUNC(\Name) +C_FUNC(\Name): +.endm + +.macro LEAF_ENTRY Name, Section + .thumb_func + .global C_FUNC(\Name) + .type \Name, %function +C_FUNC(\Name): + .fnstart +.endm + +.macro LEAF_END Name, Section + .size \Name, .-\Name + .fnend +.endm + +.macro PREPARE_EXTERNAL_VAR Name, HelperReg + ldr \HelperReg, [pc, #C_FUNC(\Name)@GOTPCREL] +.endm + +.macro push_nonvol_reg Register + push \Register + .save \Register +.endm + +.macro pop_nonvol_reg Register + pop \Register +.endm + +.macro vpush_nonvol_reg Register + vpush \Register + .vsave \Register +.endm + +.macro vpop_nonvol_reg Register + vpop \Register +.endm + +.macro alloc_stack Size + sub sp, sp, (\Size) + .pad #(\Size) +.endm + +.macro free_stack Size + add sp, sp, (\Size) + .pad #-(\Size) +.endm + +.macro POP_CALLEE_SAVED_REGISTERS + pop_nonvol_reg "{r4-r11, lr}" +.endm + +.macro PUSH_CALLEE_SAVED_REGISTERS + push_nonvol_reg "{r4-r11, lr}" +.endm + +.macro push_register Reg + push \Reg +.endm + +.macro push_argument_register Reg + push_register \Reg +.endm + +.macro PUSH_ARGUMENT_REGISTERS + push {r0-r3} +.endm + +.macro pop_register Reg + pop \Reg +.endm + +.macro pop_argument_register Reg + pop_register \Reg +.endm + +.macro POP_ARGUMENT_REGISTERS + pop {r0-r3} +.endm + +.macro EMIT_BREAKPOINT + .inst.w 0xde01 +.endm + +.macro PROLOG_PUSH RegList + push_nonvol_reg "\RegList" +.endm + +.macro PROLOG_VPUSH RegList + vpush_nonvol_reg "\RegList" +.endm + +.macro PROLOG_STACK_SAVE Register + .setfp \Register, sp + mov \Register, sp +.endm + +.macro EPILOG_STACK_RESTORE Register + mov sp, \Register +.endm + +.macro EPILOG_POP RegList + pop_nonvol_reg "\RegList" +.endm + +.macro EPILOG_VPOP RegList + vpop_nonvol_reg "\RegList" +.endm + +.macro PROLOG_STACK_ALLOC Size + sub sp, sp, #\Size + .pad #\Size +.endm + +.macro EPILOG_STACK_FREE Size + add sp, sp, #\Size + .pad #-\Size +.endm + +//----------------------------------------------------------------------------- +// Macro used to check (in debug builds only) whether the stack is 64-bit aligned (a requirement before calling +// out into C++/OS code). Invoke this directly after your prolog (if the stack frame size is fixed) or directly +// before a call (if you have a frame pointer and a dynamic stack). A breakpoint will be invoked if the stack +// is misaligned. +// +.macro CHECK_STACK_ALIGNMENT + +#ifdef _DEBUG + push {r0} + add r0, sp, #4 + tst r0, #7 + pop {r0} + beq 0f + EMIT_BREAKPOINT +0: +#endif +.endm + +// Loads a 32bit constant into destination register +.macro MOV32 DestReg, Constant + movw \DestReg, #((\Constant) & 0xFFFF) + movt \DestReg, #((\Constant) >> 16) +.endm + +.macro EXPORT_POINTER_TO_ADDRESS Name + +1: + + .data + .align 4 +C_FUNC(\Name): + .word 1b + 1 // Add 1 to indicate thumb code + .global C_FUNC(\Name) + .text + +.endm + +// +// Macro used from unmanaged helpers called from managed code where the helper does not transition immediately +// into pre-emptive mode but may cause a GC and thus requires the stack is crawlable. This is typically the +// case for helpers that meddle in GC state (e.g. allocation helpers) where the code must remain in +// cooperative mode since it handles object references and internal GC state directly but a garbage collection +// may be inevitable. In these cases we need to be able to transition to pre-meptive mode deep within the +// unmanaged code but still be able to initialize the stack iterator at the first stack frame which may hold +// interesting GC references. In all our helper cases this corresponds to the most recent managed frame (e.g. +// the helper's caller). +// +// This macro builds a frame describing the current state of managed code. +// +// INVARIANTS +// - The macro assumes it defines the method prolog, it should typically be the first code in a method and +// certainly appear before any attempt to alter the stack pointer. +// - This macro uses trashReg (after its initial value has been saved in the frame) and upon exit trashReg +// will contain the address of transition frame. +// +.macro PUSH_COOP_PINVOKE_FRAME trashReg + + PROLOG_STACK_ALLOC 4 // Save space for caller's SP + PROLOG_PUSH "{r4-r10}" // Save preserved registers + PROLOG_STACK_ALLOC 8 // Save space for flags and Thread* + PROLOG_PUSH "{r11}" // Save caller's FP + PROLOG_PUSH "{r11,lr}" // Save caller's frame-chain pointer and PC + + // Compute SP value at entry to this method and save it in the last slot of the frame (slot #12). + add \trashReg, sp, #(13 * 4) + str \trashReg, [sp, #(12 * 4)] + + // Record the bitmask of saved registers in the frame (slot #4). + mov \trashReg, #DEFAULT_FRAME_SAVE_FLAGS + str \trashReg, [sp, #(4 * 4)] + + mov \trashReg, sp +.endm + +// Pop the frame and restore register state preserved by PUSH_COOP_PINVOKE_FRAME +.macro POP_COOP_PINVOKE_FRAME + EPILOG_POP "{r11,lr}" // Restore caller's frame-chain pointer and PC (return address) + EPILOG_POP "{r11}" // Restore caller's FP + EPILOG_STACK_FREE 8 // Discard flags and Thread* + EPILOG_POP "{r4-r10}" // Restore preserved registers + EPILOG_STACK_FREE 4 // Discard caller's SP +.endm + +#ifdef _DEBUG +.macro GEN_ASSERT message, fileName, funcName + ldr r0, =\message + ldr r1, =\fileName + ldr r2, =\funcName + bl C_FUNC(NYI_Assert) +.endm +#endif + +// thumb with PIC version +.macro INLINE_GET_TLS_VAR Var + ldr r0, 2f +1: + add r0, pc, r0 + bl __tls_get_addr(PLT) + // push data at the end of text section + .pushsection .text, MAX_NUMBER_SUBSECTION_TEXT, "aM", %progbits, 4 + .balign 4 +2: + .4byte \Var(TLSGD) + (. - 1b - 4) + .popsection +.endm + +.macro INLINE_GETTHREAD + // Inlined version of call C_FUNC(RhpGetThread) + INLINE_GET_TLS_VAR tls_CurrentThread +.endm + +.macro INLINE_THREAD_UNHIJACK threadReg, trashReg1, trashReg2 + // + // Thread::Unhijack() + // + ldr \trashReg1, [\threadReg, #OFFSETOF__Thread__m_pvHijackedReturnAddress] + cbz \trashReg1, 1f + + ldr \trashReg2, [\threadReg, #OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation] + str \trashReg1, [\trashReg2] + mov \trashReg1, #0 + str \trashReg1, [\threadReg, #OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation] + str \trashReg1, [\threadReg, #OFFSETOF__Thread__m_pvHijackedReturnAddress] + +1: +.endm + +.macro EPILOG_BRANCH_REG reg + + bx \reg + +.endm diff --git a/src/coreclr/src/nativeaot/Runtime/unix/unixasmmacrosarm64.inc b/src/coreclr/src/nativeaot/Runtime/unix/unixasmmacrosarm64.inc new file mode 100644 index 0000000000000..d031a77085e2c --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/unix/unixasmmacrosarm64.inc @@ -0,0 +1,139 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +.macro NESTED_ENTRY Name, Section, Handler + LEAF_ENTRY \Name, \Section + .ifnc \Handler, NoHandler + .cfi_personality 0, C_FUNC(\Handler) // 0 == DW_EH_PE_absptr + .endif +.endm + +.macro NESTED_END Name, Section + LEAF_END \Name, \Section +.endm + +.macro PATCH_LABEL Name + .global C_FUNC(\Name) +C_FUNC(\Name): +.endm + +.macro ALTERNATE_ENTRY Name + .global C_FUNC(\Name) +C_FUNC(\Name): +.endm + +.macro LEAF_ENTRY Name, Section + .global C_FUNC(\Name) + .type \Name, %function +C_FUNC(\Name): + .cfi_startproc +.endm + +.macro LEAF_END Name, Section + .size \Name, .-\Name + .cfi_endproc +.endm + +.macro PREPARE_EXTERNAL_VAR Name, HelperReg + ldr \HelperReg, [pc, #C_FUNC(\Name)@GOTPCREL] +.endm + +.macro PROLOG_STACK_ALLOC Size + sub sp, sp, \Size + .cfi_adjust_cfa_offset \Size +.endm + +.macro EPILOG_STACK_FREE Size + add sp, sp, \Size +.endm + +.macro EPILOG_STACK_RESTORE + mov sp, fp +.endm + +.macro PROLOG_SAVE_REG reg, ofs + str \reg, [sp, \ofs] +.endm + +.macro PROLOG_SAVE_REG_PAIR reg1, reg2, ofs + stp \reg1, \reg2, [sp, \ofs] + .ifc \reg1, fp + mov fp, sp + .endif +.endm + +.macro PROLOG_SAVE_REG_PAIR_INDEXED reg1, reg2, ofs + stp \reg1, \reg2, [sp, \ofs]! + .ifc \reg1, fp + mov fp, sp + .endif +.endm + +.macro EPILOG_RESTORE_REG reg, ofs + ldr \reg, [sp, \ofs] +.endm + +.macro EPILOG_RESTORE_REG_PAIR reg1, reg2, ofs + ldp \reg1, \reg2, [sp, \ofs] +.endm + +.macro EPILOG_RESTORE_REG_PAIR_INDEXED reg1, reg2, ofs + ldp \reg1, \reg2, [sp], \ofs +.endm + +.macro EPILOG_RETURN + ret +.endm + +.macro EMIT_BREAKPOINT + brk #0 +.endm + +//----------------------------------------------------------------------------- +// The Following sets of SAVE_*_REGISTERS expect the memory to be reserved and +// base address to be passed in $reg +// + +// Reserve 64 bytes of memory before calling SAVE_ARGUMENT_REGISTERS +.macro SAVE_ARGUMENT_REGISTERS reg, ofs + + stp x0, x1, [\reg, #(\ofs)] + stp x2, x3, [\reg, #(\ofs + 16)] + stp x4, x5, [\reg, #(\ofs + 32)] + stp x6, x7, [\reg, #(\ofs + 48)] + +.endm + +// Reserve 64 bytes of memory before calling SAVE_FLOAT_ARGUMENT_REGISTERS +.macro SAVE_FLOAT_ARGUMENT_REGISTERS reg, ofs + + stp d0, d1, [\reg, #(\ofs)] + stp d2, d3, [\reg, #(\ofs + 16)] + stp d4, d5, [\reg, #(\ofs + 32)] + stp d6, d7, [\reg, #(\ofs + 48)] + +.endm + +.macro RESTORE_ARGUMENT_REGISTERS reg, ofs + + ldp x0, x1, [\reg, #(\ofs)] + ldp x2, x3, [\reg, #(\ofs + 16)] + ldp x4, x5, [\reg, #(\ofs + 32)] + ldp x6, x7, [\reg, #(\ofs + 48)] + +.endm + +.macro RESTORE_FLOAT_ARGUMENT_REGISTERS reg, ofs + + ldp d0, d1, [\reg, #(\ofs)] + ldp d2, d3, [\reg, #(\ofs + 16)] + ldp d4, d5, [\reg, #(\ofs + 32)] + ldp d6, d7, [\reg, #(\ofs + 48)] + +.endm + +.macro EPILOG_BRANCH_REG reg + + br \reg + +.endm diff --git a/src/coreclr/src/nativeaot/Runtime/unix/unixasmmacrosx86.inc b/src/coreclr/src/nativeaot/Runtime/unix/unixasmmacrosx86.inc new file mode 100644 index 0000000000000..08d57bb93be0c --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/unix/unixasmmacrosx86.inc @@ -0,0 +1,117 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +.macro NESTED_ENTRY Name, Section, Handler + LEAF_ENTRY \Name, \Section + .ifnc \Handler, NoHandler + .cfi_personality 0x1b, C_FUNC(\Handler) // 0x1b == DW_EH_PE_pcrel | DW_EH_PE_sdata4 + .endif +.endm + +.macro NESTED_END Name, Section + LEAF_END \Name, \Section +.endm + +.macro LEAF_ENTRY Name, Section + .global C_FUNC(\Name) + .type \Name, %function +C_FUNC(\Name): + .cfi_startproc +.endm + +.macro PATCH_LABEL Name + .global C_FUNC(\Name) +C_FUNC(\Name): +.endm + +.macro LEAF_END Name, Section + .size \Name, .-\Name + .cfi_endproc +.endm + +.macro LEAF_END_MARKED Name, Section +C_FUNC(\Name\()_End): + .global C_FUNC(\Name\()_End) + LEAF_END \Name, \Section +.endm + +.macro PROLOG_BEG + push ebp + .cfi_def_cfa_offset 8 + .cfi_offset ebp, -8 + mov ebp, esp +.endm + +.macro PROLOG_PUSH Reg + push \Reg + .cfi_adjust_cfa_offset 4 + .cfi_rel_offset \Reg, 0 +.endm + +.macro PROLOG_END + .cfi_def_cfa_register ebp + .cfi_def_cfa_offset 8 +.endm + +.macro EPILOG_BEG +.endm + +.macro EPILOG_POP Reg + pop \Reg + .cfi_restore \Reg +.endm + +.macro EPILOG_END + pop ebp +.endm + +.macro ESP_PROLOG_BEG +.endm + +.macro ESP_PROLOG_PUSH Reg + PROLOG_PUSH \Reg +.endm + +.macro ESP_PROLOG_ALLOC Size + sub esp, \Size + .cfi_adjust_cfa_offset \Size +.endm + +.macro ESP_PROLOG_END + .cfi_def_cfa_register esp +.endm + +.macro ESP_EPILOG_BEG +.endm + +.macro ESP_EPILOG_POP Reg + EPILOG_POP \Reg +.endm + +.macro ESP_EPILOG_FREE Size + add esp, \Size + .cfi_adjust_cfa_offset -\Size +.endm + +.macro ESP_EPILOG_END +.endm + +.macro PREPARE_EXTERNAL_VAR Name, Reg +.att_syntax + call 0f +0: + popl %\Reg +1: + addl $_GLOBAL_OFFSET_TABLE_ + (1b - 0b), %\Reg + movl C_FUNC(\Name)@GOT(%\Reg), %\Reg +.intel_syntax noprefix +.endm + +.macro CHECK_STACK_ALIGNMENT +#ifdef _DEBUG + test esp, 0Fh + je 0f + int3 +0: +#endif // _DEBUG +.endm diff --git a/src/coreclr/src/nativeaot/Runtime/wasm/AsmOffsetsCpu.h b/src/coreclr/src/nativeaot/Runtime/wasm/AsmOffsetsCpu.h new file mode 100644 index 0000000000000..23976f026fed4 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/wasm/AsmOffsetsCpu.h @@ -0,0 +1,30 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// +// This file is used by AsmOffsets.h to validate that our +// assembly-code offsets always match their C++ counterparts. +// +// NOTE: the offsets MUST be in hex notation WITHOUT the 0x prefix + +PLAT_ASM_SIZEOF(a4, ExInfo) +PLAT_ASM_OFFSET(0, ExInfo, m_pPrevExInfo) +PLAT_ASM_OFFSET(4, ExInfo, m_pExContext) +PLAT_ASM_OFFSET(8, ExInfo, m_exception) +PLAT_ASM_OFFSET(0c, ExInfo, m_kind) +PLAT_ASM_OFFSET(0d, ExInfo, m_passNumber) +PLAT_ASM_OFFSET(10, ExInfo, m_idxCurClause) +PLAT_ASM_OFFSET(14, ExInfo, m_frameIter) +PLAT_ASM_OFFSET(a0, ExInfo, m_notifyDebuggerSP) + +PLAT_ASM_SIZEOF(8c, StackFrameIterator) +PLAT_ASM_OFFSET(08, StackFrameIterator, m_FramePointer) +PLAT_ASM_OFFSET(0c, StackFrameIterator, m_ControlPC) +PLAT_ASM_OFFSET(10, StackFrameIterator, m_RegDisplay) +PLAT_ASM_OFFSET(88, StackFrameIterator, m_OriginalControlPC) + +PLAT_ASM_SIZEOF(4, PAL_LIMITED_CONTEXT) +PLAT_ASM_OFFSET(0, PAL_LIMITED_CONTEXT, IP) + +PLAT_ASM_SIZEOF(0c, REGDISPLAY) +PLAT_ASM_OFFSET(0, REGDISPLAY, SP) diff --git a/src/coreclr/src/nativeaot/Runtime/windows/AsmOffsets.cpp b/src/coreclr/src/nativeaot/Runtime/windows/AsmOffsets.cpp new file mode 100644 index 0000000000000..8a852548d49eb --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/windows/AsmOffsets.cpp @@ -0,0 +1,18 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +#if defined(HOST_ARM) || defined(HOST_ARM64) + +#define HASH_DEFINE #define +#define PLAT_ASM_OFFSET(offset, cls, member) HASH_DEFINE OFFSETOF__##cls##__##member 0x##offset +#define PLAT_ASM_SIZEOF(size, cls ) HASH_DEFINE SIZEOF__##cls 0x##size +#define PLAT_ASM_CONST(constant, expr) HASH_DEFINE expr 0x##constant + +#else + +#define PLAT_ASM_OFFSET(offset, cls, member) OFFSETOF__##cls##__##member equ 0##offset##h +#define PLAT_ASM_SIZEOF(size, cls ) SIZEOF__##cls equ 0##size##h +#define PLAT_ASM_CONST(constant, expr) expr equ 0##constant##h + +#endif + +#include "AsmOffsets.h" diff --git a/src/coreclr/src/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp b/src/coreclr/src/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp new file mode 100644 index 0000000000000..c42dff7b72c4a --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp @@ -0,0 +1,850 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +#include "common.h" + +#include + +#include "CommonTypes.h" +#include "CommonMacros.h" +#include "daccess.h" +#include "PalRedhawkCommon.h" +#include "regdisplay.h" +#include "ICodeManager.h" +#include "CoffNativeCodeManager.h" +#include "varint.h" +#include "holder.h" + +#include "CommonMacros.inl" + +#define GCINFODECODER_NO_EE +#include "coreclr/gcinfodecoder.cpp" + +#define UBF_FUNC_KIND_MASK 0x03 +#define UBF_FUNC_KIND_ROOT 0x00 +#define UBF_FUNC_KIND_HANDLER 0x01 +#define UBF_FUNC_KIND_FILTER 0x02 + +#define UBF_FUNC_HAS_EHINFO 0x04 +#define UBF_FUNC_REVERSE_PINVOKE 0x08 +#define UBF_FUNC_HAS_ASSOCIATED_DATA 0x10 + +#ifdef TARGET_X86 +// +// x86 ABI does not define RUNTIME_FUNCTION. Define our own to allow unification between x86 and other platforms. +// +typedef struct _RUNTIME_FUNCTION { + DWORD BeginAddress; + DWORD EndAddress; + DWORD UnwindData; +} RUNTIME_FUNCTION, *PRUNTIME_FUNCTION; + +typedef struct _KNONVOLATILE_CONTEXT_POINTERS { + + // The ordering of these fields should be aligned with that + // of corresponding fields in CONTEXT + // + // (See REGDISPLAY in Runtime/regdisp.h for details) + PDWORD Edi; + PDWORD Esi; + PDWORD Ebx; + PDWORD Edx; + PDWORD Ecx; + PDWORD Eax; + + PDWORD Ebp; + +} KNONVOLATILE_CONTEXT_POINTERS, *PKNONVOLATILE_CONTEXT_POINTERS; + +typedef struct _UNWIND_INFO { + ULONG FunctionLength; +} UNWIND_INFO, *PUNWIND_INFO; + +#elif defined(TARGET_AMD64) + +#define UNW_FLAG_NHANDLER 0x0 +#define UNW_FLAG_EHANDLER 0x1 +#define UNW_FLAG_UHANDLER 0x2 +#define UNW_FLAG_CHAININFO 0x4 + +// +// The following structures are defined in Windows x64 unwind info specification +// http://www.bing.com/search?q=msdn+Exception+Handling+x64 +// +typedef union _UNWIND_CODE { + struct { + uint8_t CodeOffset; + uint8_t UnwindOp : 4; + uint8_t OpInfo : 4; + }; + + uint16_t FrameOffset; +} UNWIND_CODE, *PUNWIND_CODE; + +typedef struct _UNWIND_INFO { + uint8_t Version : 3; + uint8_t Flags : 5; + uint8_t SizeOfProlog; + uint8_t CountOfUnwindCodes; + uint8_t FrameRegister : 4; + uint8_t FrameOffset : 4; + UNWIND_CODE UnwindCode[1]; +} UNWIND_INFO, *PUNWIND_INFO; + +#endif // TARGET_X86 + +typedef DPTR(struct _UNWIND_INFO) PTR_UNWIND_INFO; +typedef DPTR(union _UNWIND_CODE) PTR_UNWIND_CODE; + +static PTR_VOID GetUnwindDataBlob(TADDR moduleBase, PTR_RUNTIME_FUNCTION pRuntimeFunction, /* out */ size_t * pSize) +{ +#if defined(TARGET_AMD64) + PTR_UNWIND_INFO pUnwindInfo(dac_cast(moduleBase + pRuntimeFunction->UnwindInfoAddress)); + + size_t size = offsetof(UNWIND_INFO, UnwindCode) + sizeof(UNWIND_CODE) * pUnwindInfo->CountOfUnwindCodes; + + // Chained unwind info is not supported at this time + ASSERT((pUnwindInfo->Flags & UNW_FLAG_CHAININFO) == 0); + + if (pUnwindInfo->Flags & (UNW_FLAG_EHANDLER | UNW_FLAG_UHANDLER)) + { + // Personality routine + size = ALIGN_UP(size, sizeof(DWORD)) + sizeof(DWORD); + } + + *pSize = size; + + return pUnwindInfo; + +#elif defined(TARGET_X86) + + PTR_UNWIND_INFO pUnwindInfo(dac_cast(moduleBase + pRuntimeFunction->UnwindInfoAddress)); + + *pSize = sizeof(UNWIND_INFO); + + return pUnwindInfo; + +#elif defined(TARGET_ARM) + + // if this function uses packed unwind data then at least one of the two least significant bits + // will be non-zero. if this is the case then there will be no xdata record to enumerate. + ASSERT((pRuntimeFunction->UnwindData & 0x3) == 0); + + // compute the size of the unwind info + PTR_TADDR xdata = dac_cast(pRuntimeFunction->UnwindData + moduleBase); + + ULONG epilogScopes = 0; + ULONG unwindWords = 0; + ULONG size = 0; + + if ((xdata[0] >> 23) != 0) + { + size = 4; + epilogScopes = (xdata[0] >> 23) & 0x1f; + unwindWords = (xdata[0] >> 28) & 0x0f; + } + else + { + size = 8; + epilogScopes = xdata[1] & 0xffff; + unwindWords = (xdata[1] >> 16) & 0xff; + } + + if (!(xdata[0] & (1 << 21))) + size += 4 * epilogScopes; + + size += 4 * unwindWords; + + if ((xdata[0] & (1 << 20)) != 0) + { + // Personality routine + size += 4; + } + + *pSize = size; + return xdata; +#else + PORTABILITY_ASSERT("GetUnwindDataBlob"); + *pSize = 0; + return NULL; +#endif +} + + +CoffNativeCodeManager::CoffNativeCodeManager(TADDR moduleBase, + PTR_VOID pvManagedCodeStartRange, UInt32 cbManagedCodeRange, + PTR_RUNTIME_FUNCTION pRuntimeFunctionTable, UInt32 nRuntimeFunctionTable, + PTR_PTR_VOID pClasslibFunctions, UInt32 nClasslibFunctions) + : m_moduleBase(moduleBase), + m_pvManagedCodeStartRange(pvManagedCodeStartRange), m_cbManagedCodeRange(cbManagedCodeRange), + m_pRuntimeFunctionTable(pRuntimeFunctionTable), m_nRuntimeFunctionTable(nRuntimeFunctionTable), + m_pClasslibFunctions(pClasslibFunctions), m_nClasslibFunctions(nClasslibFunctions) +{ +} + +CoffNativeCodeManager::~CoffNativeCodeManager() +{ +} + +static int LookupUnwindInfoForMethod(UInt32 relativePc, + PTR_RUNTIME_FUNCTION pRuntimeFunctionTable, + int low, + int high) +{ +#ifdef TARGET_ARM + relativePc |= THUMB_CODE; +#endif + + // Binary search the RUNTIME_FUNCTION table + // Use linear search once we get down to a small number of elements + // to avoid Binary search overhead. + while (high - low > 10) + { + int middle = low + (high - low) / 2; + + PTR_RUNTIME_FUNCTION pFunctionEntry = pRuntimeFunctionTable + middle; + if (relativePc < pFunctionEntry->BeginAddress) + { + high = middle - 1; + } + else + { + low = middle; + } + } + + for (int i = low; i < high; i++) + { + PTR_RUNTIME_FUNCTION pNextFunctionEntry = pRuntimeFunctionTable + (i + 1); + if (relativePc < pNextFunctionEntry->BeginAddress) + { + high = i; + break; + } + } + + PTR_RUNTIME_FUNCTION pFunctionEntry = pRuntimeFunctionTable + high; + if (relativePc >= pFunctionEntry->BeginAddress) + { + return high; + } + + ASSERT_UNCONDITIONALLY("Invalid code address"); + return -1; +} + +struct CoffNativeMethodInfo +{ + PTR_RUNTIME_FUNCTION mainRuntimeFunction; + PTR_RUNTIME_FUNCTION runtimeFunction; + bool executionAborted; +}; + +// Ensure that CoffNativeMethodInfo fits into the space reserved by MethodInfo +static_assert(sizeof(CoffNativeMethodInfo) <= sizeof(MethodInfo), "CoffNativeMethodInfo too big"); + +bool CoffNativeCodeManager::FindMethodInfo(PTR_VOID ControlPC, + MethodInfo * pMethodInfoOut) +{ + // Stackwalker may call this with ControlPC that does not belong to this code manager + if (dac_cast(ControlPC) < dac_cast(m_pvManagedCodeStartRange) || + dac_cast(m_pvManagedCodeStartRange) + m_cbManagedCodeRange <= dac_cast(ControlPC)) + { + return false; + } + + CoffNativeMethodInfo * pMethodInfo = (CoffNativeMethodInfo *)pMethodInfoOut; + + TADDR relativePC = dac_cast(ControlPC) - m_moduleBase; + + int MethodIndex = LookupUnwindInfoForMethod((UInt32)relativePC, m_pRuntimeFunctionTable, + 0, m_nRuntimeFunctionTable - 1); + if (MethodIndex < 0) + return false; + + PTR_RUNTIME_FUNCTION pRuntimeFunction = m_pRuntimeFunctionTable + MethodIndex; + + pMethodInfo->runtimeFunction = pRuntimeFunction; + + // The runtime function could correspond to a funclet. We need to get to the + // runtime function of the main method. + for (;;) + { + size_t unwindDataBlobSize; + PTR_VOID pUnwindDataBlob = GetUnwindDataBlob(m_moduleBase, pRuntimeFunction, &unwindDataBlobSize); + + uint8_t unwindBlockFlags = *(dac_cast(pUnwindDataBlob) + unwindDataBlobSize); + if ((unwindBlockFlags & UBF_FUNC_KIND_MASK) == UBF_FUNC_KIND_ROOT) + break; + + pRuntimeFunction--; + } + + pMethodInfo->mainRuntimeFunction = pRuntimeFunction; + + pMethodInfo->executionAborted = false; + + return true; +} + +bool CoffNativeCodeManager::IsFunclet(MethodInfo * pMethInfo) +{ + CoffNativeMethodInfo * pMethodInfo = (CoffNativeMethodInfo *)pMethInfo; + + size_t unwindDataBlobSize; + PTR_VOID pUnwindDataBlob = GetUnwindDataBlob(m_moduleBase, pMethodInfo->runtimeFunction, &unwindDataBlobSize); + + uint8_t unwindBlockFlags = *(dac_cast(pUnwindDataBlob) + unwindDataBlobSize); + + // A funclet will have an entry in funclet to main method map + return (unwindBlockFlags & UBF_FUNC_KIND_MASK) != UBF_FUNC_KIND_ROOT; +} + +bool CoffNativeCodeManager::IsFilter(MethodInfo * pMethInfo) +{ + CoffNativeMethodInfo * pMethodInfo = (CoffNativeMethodInfo *)pMethInfo; + + size_t unwindDataBlobSize; + PTR_VOID pUnwindDataBlob = GetUnwindDataBlob(m_moduleBase, pMethodInfo->runtimeFunction, &unwindDataBlobSize); + + uint8_t unwindBlockFlags = *(dac_cast(pUnwindDataBlob) + unwindDataBlobSize); + + return (unwindBlockFlags & UBF_FUNC_KIND_MASK) == UBF_FUNC_KIND_FILTER; +} + +PTR_VOID CoffNativeCodeManager::GetFramePointer(MethodInfo * pMethInfo, + REGDISPLAY * pRegisterSet) +{ + CoffNativeMethodInfo * pMethodInfo = (CoffNativeMethodInfo *)pMethInfo; + + size_t unwindDataBlobSize; + PTR_VOID pUnwindDataBlob = GetUnwindDataBlob(m_moduleBase, pMethodInfo->runtimeFunction, &unwindDataBlobSize); + + uint8_t unwindBlockFlags = *(dac_cast(pUnwindDataBlob) + unwindDataBlobSize); + + // Return frame pointer for methods with EH and funclets + if ((unwindBlockFlags & UBF_FUNC_HAS_EHINFO) != 0 || (unwindBlockFlags & UBF_FUNC_KIND_MASK) != UBF_FUNC_KIND_ROOT) + { + return (PTR_VOID)pRegisterSet->GetFP(); + } + + return NULL; +} + +void CoffNativeCodeManager::EnumGcRefs(MethodInfo * pMethodInfo, + PTR_VOID safePointAddress, + REGDISPLAY * pRegisterSet, + GCEnumContext * hCallback) +{ + CoffNativeMethodInfo * pNativeMethodInfo = (CoffNativeMethodInfo *)pMethodInfo; + + size_t unwindDataBlobSize; + PTR_VOID pUnwindDataBlob = GetUnwindDataBlob(m_moduleBase, pNativeMethodInfo->mainRuntimeFunction, &unwindDataBlobSize); + + PTR_UInt8 p = dac_cast(pUnwindDataBlob) + unwindDataBlobSize; + + uint8_t unwindBlockFlags = *p++; + + if ((unwindBlockFlags & UBF_FUNC_HAS_ASSOCIATED_DATA) != 0) + p += sizeof(int32_t); + + if ((unwindBlockFlags & UBF_FUNC_HAS_EHINFO) != 0) + p += sizeof(int32_t); + + TADDR methodStartAddress = m_moduleBase + pNativeMethodInfo->mainRuntimeFunction->BeginAddress; + UInt32 codeOffset = (UInt32)(dac_cast(safePointAddress) - methodStartAddress); + + GcInfoDecoder decoder( + GCInfoToken(p), + GcInfoDecoderFlags(DECODE_GC_LIFETIMES | DECODE_SECURITY_OBJECT | DECODE_VARARG), + codeOffset - 1 // TODO: Is this adjustment correct? + ); + + ICodeManagerFlags flags = (ICodeManagerFlags)0; + if (pNativeMethodInfo->executionAborted) + flags = ICodeManagerFlags::ExecutionAborted; + if (IsFilter(pMethodInfo)) + flags = (ICodeManagerFlags)(flags | ICodeManagerFlags::NoReportUntracked); + + if (!decoder.EnumerateLiveSlots( + pRegisterSet, + false /* reportScratchSlots */, + flags, + hCallback->pCallback, + hCallback + )) + { + assert(false); + } +} + +UIntNative CoffNativeCodeManager::GetConservativeUpperBoundForOutgoingArgs(MethodInfo * pMethodInfo, REGDISPLAY * pRegisterSet) +{ +#if defined(TARGET_AMD64) + + // Return value + UIntNative upperBound; + CoffNativeMethodInfo* pNativeMethodInfo = (CoffNativeMethodInfo *) pMethodInfo; + + size_t unwindDataBlobSize; + PTR_VOID pUnwindDataBlob = GetUnwindDataBlob(m_moduleBase, pNativeMethodInfo->runtimeFunction, &unwindDataBlobSize); + PTR_UInt8 p = dac_cast(pUnwindDataBlob) + unwindDataBlobSize; + uint8_t unwindBlockFlags = *p++; + + if ((unwindBlockFlags & UBF_FUNC_HAS_ASSOCIATED_DATA) != 0) + p += sizeof(int32_t); + + if ((unwindBlockFlags & UBF_FUNC_REVERSE_PINVOKE) != 0) + { + TADDR basePointer = dac_cast(pRegisterSet->GetFP()); + + // Get the method's GC info + GcInfoDecoder decoder(GCInfoToken(p), DECODE_REVERSE_PINVOKE_VAR); + UINT32 stackBasedRegister = decoder.GetStackBaseRegister(); + + if (stackBasedRegister == NO_STACK_BASE_REGISTER) + { + basePointer = dac_cast(pRegisterSet->GetSP()); + } + else + { + basePointer = dac_cast(pRegisterSet->GetFP()); + } + // Reverse PInvoke case. The embedded reverse PInvoke frame is guaranteed to reside above + // all outgoing arguments. + INT32 slot = decoder.GetReversePInvokeFrameStackSlot(); + upperBound = (UIntNative) dac_cast(basePointer + slot); + } + else + { + // Check for a pushed RBP value + if (GetFramePointer(pMethodInfo, pRegisterSet) == NULL) + { + // Unwind the current method context to get the caller's stack pointer + // and obtain the upper bound of the callee is the value just below the caller's return address on the stack + SIZE_T EstablisherFrame; + PVOID HandlerData; + CONTEXT context; + context.Rsp = pRegisterSet->GetSP(); + context.Rbp = pRegisterSet->GetFP(); + context.Rip = pRegisterSet->GetIP(); + + RtlVirtualUnwind(NULL, + dac_cast(m_moduleBase), + pRegisterSet->IP, + (PRUNTIME_FUNCTION)pNativeMethodInfo->runtimeFunction, + &context, + &HandlerData, + &EstablisherFrame, + NULL); + + upperBound = dac_cast(context.Rsp - sizeof (PVOID)); + } + else + { + // In amd64, it is guaranteed that if there is a pushed RBP + // value at the top of the frame it resides above all outgoing arguments. Unlike x86, + // the frame pointer generally points to a location that is separated from the pushed RBP + // value by an offset that is recorded in the info header. Recover the address of the + // pushed RBP value by subtracting this offset. + upperBound = (UIntNative) dac_cast(pRegisterSet->GetFP() - ((PTR_UNWIND_INFO) pUnwindDataBlob)->FrameOffset); + } + } + return upperBound; +#else + assert(false); + return false; +#endif +} + +bool CoffNativeCodeManager::UnwindStackFrame(MethodInfo * pMethodInfo, + REGDISPLAY * pRegisterSet, // in/out + PTR_VOID * ppPreviousTransitionFrame) // out +{ + CoffNativeMethodInfo * pNativeMethodInfo = (CoffNativeMethodInfo *)pMethodInfo; + + size_t unwindDataBlobSize; + PTR_VOID pUnwindDataBlob = GetUnwindDataBlob(m_moduleBase, pNativeMethodInfo->runtimeFunction, &unwindDataBlobSize); + + PTR_UInt8 p = dac_cast(pUnwindDataBlob) + unwindDataBlobSize; + + uint8_t unwindBlockFlags = *p++; + + if ((unwindBlockFlags & UBF_FUNC_HAS_ASSOCIATED_DATA) != 0) + p += sizeof(int32_t); + + if ((unwindBlockFlags & UBF_FUNC_REVERSE_PINVOKE) != 0) + { + // Reverse PInvoke transition should be on the main function body only + assert(pNativeMethodInfo->mainRuntimeFunction == pNativeMethodInfo->runtimeFunction); + + if ((unwindBlockFlags & UBF_FUNC_HAS_EHINFO) != 0) + p += sizeof(int32_t); + + GcInfoDecoder decoder(GCInfoToken(p), DECODE_REVERSE_PINVOKE_VAR); + INT32 slot = decoder.GetReversePInvokeFrameStackSlot(); + assert(slot != NO_REVERSE_PINVOKE_FRAME); + + TADDR basePointer = NULL; + UINT32 stackBasedRegister = decoder.GetStackBaseRegister(); + if (stackBasedRegister == NO_STACK_BASE_REGISTER) + { + basePointer = dac_cast(pRegisterSet->GetSP()); + } + else + { + basePointer = dac_cast(pRegisterSet->GetFP()); + } + *ppPreviousTransitionFrame = *(void**)(basePointer + slot); + return true; + } + + *ppPreviousTransitionFrame = NULL; + + CONTEXT context; + KNONVOLATILE_CONTEXT_POINTERS contextPointers; + +#ifdef _DEBUG + memset(&context, 0xDD, sizeof(context)); + memset(&contextPointers, 0xDD, sizeof(contextPointers)); +#endif + +#ifdef TARGET_X86 + #define FOR_EACH_NONVOLATILE_REGISTER(F) \ + F(E, ax) F(E, cx) F(E, dx) F(E, bx) F(E, bp) F(E, si) F(E, di) + #define WORDPTR PDWORD +#else + #define FOR_EACH_NONVOLATILE_REGISTER(F) \ + F(R, ax) F(R, cx) F(R, dx) F(R, bx) F(R, bp) F(R, si) F(R, di) \ + F(R, 8) F(R, 9) F(R, 10) F(R, 11) F(R, 12) F(R, 13) F(R, 14) F(R, 15) + #define WORDPTR PDWORD64 +#endif + +#define REGDISPLAY_TO_CONTEXT(prefix, reg) \ + contextPointers.prefix####reg = (WORDPTR) pRegisterSet->pR##reg; \ + if (pRegisterSet->pR##reg != NULL) context.prefix##reg = *(pRegisterSet->pR##reg); + +#define CONTEXT_TO_REGDISPLAY(prefix, reg) \ + pRegisterSet->pR##reg = (PTR_UIntNative) contextPointers.prefix####reg; + + FOR_EACH_NONVOLATILE_REGISTER(REGDISPLAY_TO_CONTEXT); + +#ifdef TARGET_X86 + PORTABILITY_ASSERT("CoffNativeCodeManager::UnwindStackFrame"); +#else // TARGET_X86 + memcpy(&context.Xmm6, pRegisterSet->Xmm, sizeof(pRegisterSet->Xmm)); + + context.Rsp = pRegisterSet->SP; + context.Rip = pRegisterSet->IP; + + SIZE_T EstablisherFrame; + PVOID HandlerData; + + RtlVirtualUnwind(NULL, + dac_cast(m_moduleBase), + pRegisterSet->IP, + (PRUNTIME_FUNCTION)pNativeMethodInfo->runtimeFunction, + &context, + &HandlerData, + &EstablisherFrame, + &contextPointers); + + pRegisterSet->SP = context.Rsp; + pRegisterSet->IP = context.Rip; + + pRegisterSet->pIP = PTR_PCODE(pRegisterSet->SP - sizeof(TADDR)); + + memcpy(pRegisterSet->Xmm, &context.Xmm6, sizeof(pRegisterSet->Xmm)); +#endif // TARGET_X86 + + FOR_EACH_NONVOLATILE_REGISTER(CONTEXT_TO_REGDISPLAY); + +#undef FOR_EACH_NONVOLATILE_REGISTER +#undef REGDISPLAY_TO_CONTEXT +#undef CONTEXT_TO_REGDISPLAY + + return true; +} + +// Convert the return kind that was encoded by RyuJIT to the +// value that CoreRT runtime can understand and support. +GCRefKind GetGcRefKind(ReturnKind returnKind) +{ + static_assert((GCRefKind)ReturnKind::RT_Scalar == GCRK_Scalar, "ReturnKind::RT_Scalar does not match GCRK_Scalar"); + static_assert((GCRefKind)ReturnKind::RT_Object == GCRK_Object, "ReturnKind::RT_Object does not match GCRK_Object"); + static_assert((GCRefKind)ReturnKind::RT_ByRef == GCRK_Byref, "ReturnKind::RT_ByRef does not match GCRK_Byref"); + ASSERT((returnKind == RT_Scalar) || (returnKind == GCRK_Object) || (returnKind == GCRK_Byref)); + + return (GCRefKind)returnKind; +} + +bool CoffNativeCodeManager::GetReturnAddressHijackInfo(MethodInfo * pMethodInfo, + REGDISPLAY * pRegisterSet, // in + PTR_PTR_VOID * ppvRetAddrLocation, // out + GCRefKind * pRetValueKind) // out +{ +#if defined(TARGET_AMD64) + CoffNativeMethodInfo * pNativeMethodInfo = (CoffNativeMethodInfo *)pMethodInfo; + + size_t unwindDataBlobSize; + PTR_VOID pUnwindDataBlob = GetUnwindDataBlob(m_moduleBase, pNativeMethodInfo->runtimeFunction, &unwindDataBlobSize); + + PTR_UInt8 p = dac_cast(pUnwindDataBlob) + unwindDataBlobSize; + + uint8_t unwindBlockFlags = *p++; + + if ((unwindBlockFlags & UBF_FUNC_HAS_ASSOCIATED_DATA) != 0) + p += sizeof(int32_t); + + // Check whether this is a funclet + if ((unwindBlockFlags & UBF_FUNC_KIND_MASK) != UBF_FUNC_KIND_ROOT) + return false; + + // Skip hijacking a reverse-pinvoke method - it doesn't get us much because we already synchronize + // with the GC on the way back to native code. + if ((unwindBlockFlags & UBF_FUNC_REVERSE_PINVOKE) != 0) + return false; + + if ((unwindBlockFlags & UBF_FUNC_HAS_EHINFO) != 0) + p += sizeof(int32_t); + + // Decode the GC info for the current method to determine its return type + GcInfoDecoder decoder( + GCInfoToken(p), + GcInfoDecoderFlags(DECODE_RETURN_KIND), + 0 + ); + + GCRefKind gcRefKind = GetGcRefKind(decoder.GetReturnKind()); + + // Unwind the current method context to the caller's context to get its stack pointer + // and obtain the location of the return address on the stack + SIZE_T EstablisherFrame; + PVOID HandlerData; + CONTEXT context; + context.Rsp = pRegisterSet->GetSP(); + context.Rbp = pRegisterSet->GetFP(); + context.Rip = pRegisterSet->GetIP(); + + RtlVirtualUnwind(NULL, + dac_cast(m_moduleBase), + pRegisterSet->IP, + (PRUNTIME_FUNCTION)pNativeMethodInfo->runtimeFunction, + &context, + &HandlerData, + &EstablisherFrame, + NULL); + + *ppvRetAddrLocation = (PTR_PTR_VOID)(context.Rsp - sizeof (PVOID)); + *pRetValueKind = gcRefKind; + return true; +#else + return false; +#endif // defined(TARGET_AMD64) +} + +void CoffNativeCodeManager::UnsynchronizedHijackMethodLoops(MethodInfo * pMethodInfo) +{ + // @TODO: CORERT: UnsynchronizedHijackMethodLoops +} + +PTR_VOID CoffNativeCodeManager::RemapHardwareFaultToGCSafePoint(MethodInfo * pMethodInfo, PTR_VOID controlPC) +{ + // GCInfo decoder needs to know whether execution of the method is aborted + // while querying for gc-info. But ICodeManager::EnumGCRef() doesn't receive any + // flags from mrt. Call to this method is used as a cue to mark the method info + // as execution aborted. Note - if pMethodInfo was cached, this scheme would not work. + // + // If the method has EH, then JIT will make sure the method is fully interruptible + // and we will have GC-info available at the faulting address as well. + + CoffNativeMethodInfo * pNativeMethodInfo = (CoffNativeMethodInfo *)pMethodInfo; + pNativeMethodInfo->executionAborted = true; + + return controlPC; +} + +struct CoffEHEnumState +{ + PTR_UInt8 pMethodStartAddress; + PTR_UInt8 pEHInfo; + UInt32 uClause; + UInt32 nClauses; +}; + +// Ensure that CoffEHEnumState fits into the space reserved by EHEnumState +static_assert(sizeof(CoffEHEnumState) <= sizeof(EHEnumState), "CoffEHEnumState too big"); + +bool CoffNativeCodeManager::EHEnumInit(MethodInfo * pMethodInfo, PTR_VOID * pMethodStartAddress, EHEnumState * pEHEnumStateOut) +{ + assert(pMethodInfo != NULL); + assert(pMethodStartAddress != NULL); + assert(pEHEnumStateOut != NULL); + + CoffNativeMethodInfo * pNativeMethodInfo = (CoffNativeMethodInfo *)pMethodInfo; + CoffEHEnumState * pEnumState = (CoffEHEnumState *)pEHEnumStateOut; + + size_t unwindDataBlobSize; + PTR_VOID pUnwindDataBlob = GetUnwindDataBlob(m_moduleBase, pNativeMethodInfo->mainRuntimeFunction, &unwindDataBlobSize); + + PTR_UInt8 p = dac_cast(pUnwindDataBlob) + unwindDataBlobSize; + + uint8_t unwindBlockFlags = *p++; + + if ((unwindBlockFlags & UBF_FUNC_HAS_ASSOCIATED_DATA) != 0) + p += sizeof(int32_t); + + // return if there is no EH info associated with this method + if ((unwindBlockFlags & UBF_FUNC_HAS_EHINFO) == 0) + { + return false; + } + + *pMethodStartAddress = dac_cast(m_moduleBase + pNativeMethodInfo->mainRuntimeFunction->BeginAddress); + + pEnumState->pMethodStartAddress = dac_cast(*pMethodStartAddress); + pEnumState->pEHInfo = dac_cast(m_moduleBase + *dac_cast(p)); + pEnumState->uClause = 0; + pEnumState->nClauses = VarInt::ReadUnsigned(pEnumState->pEHInfo); + + return true; +} + +bool CoffNativeCodeManager::EHEnumNext(EHEnumState * pEHEnumState, EHClause * pEHClauseOut) +{ + assert(pEHEnumState != NULL); + assert(pEHClauseOut != NULL); + + CoffEHEnumState * pEnumState = (CoffEHEnumState *)pEHEnumState; + if (pEnumState->uClause >= pEnumState->nClauses) + return false; + pEnumState->uClause++; + + pEHClauseOut->m_tryStartOffset = VarInt::ReadUnsigned(pEnumState->pEHInfo); + + UInt32 tryEndDeltaAndClauseKind = VarInt::ReadUnsigned(pEnumState->pEHInfo); + pEHClauseOut->m_clauseKind = (EHClauseKind)(tryEndDeltaAndClauseKind & 0x3); + pEHClauseOut->m_tryEndOffset = pEHClauseOut->m_tryStartOffset + (tryEndDeltaAndClauseKind >> 2); + + // For each clause, we have up to 4 integers: + // 1) try start offset + // 2) (try length << 2) | clauseKind + // 3) if (typed || fault || filter) { handler start offset } + // 4a) if (typed) { type RVA } + // 4b) if (filter) { filter start offset } + // + // The first two integers have already been decoded + + switch (pEHClauseOut->m_clauseKind) + { + case EH_CLAUSE_TYPED: + pEHClauseOut->m_handlerAddress = pEnumState->pMethodStartAddress + VarInt::ReadUnsigned(pEnumState->pEHInfo); + + // Read target type + { + // @TODO: CORERT: Compress EHInfo using type table index scheme + // https://github.com/dotnet/corert/issues/972 + UInt32 typeRVA = *((PTR_UInt32&)pEnumState->pEHInfo)++; + pEHClauseOut->m_pTargetType = dac_cast(m_moduleBase + typeRVA); + } + break; + case EH_CLAUSE_FAULT: + pEHClauseOut->m_handlerAddress = pEnumState->pMethodStartAddress + VarInt::ReadUnsigned(pEnumState->pEHInfo); + break; + case EH_CLAUSE_FILTER: + pEHClauseOut->m_handlerAddress = pEnumState->pMethodStartAddress + VarInt::ReadUnsigned(pEnumState->pEHInfo); + pEHClauseOut->m_filterAddress = pEnumState->pMethodStartAddress + VarInt::ReadUnsigned(pEnumState->pEHInfo); + break; + default: + UNREACHABLE_MSG("unexpected EHClauseKind"); + } + + return true; +} + +PTR_VOID CoffNativeCodeManager::GetOsModuleHandle() +{ + return dac_cast(m_moduleBase); +} + +PTR_VOID CoffNativeCodeManager::GetMethodStartAddress(MethodInfo * pMethodInfo) +{ + CoffNativeMethodInfo * pNativeMethodInfo = (CoffNativeMethodInfo *)pMethodInfo; + return dac_cast(m_moduleBase + pNativeMethodInfo->mainRuntimeFunction->BeginAddress); +} + +void * CoffNativeCodeManager::GetClasslibFunction(ClasslibFunctionId functionId) +{ + uint32_t id = (uint32_t)functionId; + + if (id >= m_nClasslibFunctions) + return nullptr; + + return m_pClasslibFunctions[id]; +} + +PTR_VOID CoffNativeCodeManager::GetAssociatedData(PTR_VOID ControlPC) +{ + if (dac_cast(ControlPC) < dac_cast(m_pvManagedCodeStartRange) || + dac_cast(m_pvManagedCodeStartRange) + m_cbManagedCodeRange <= dac_cast(ControlPC)) + { + return NULL; + } + + TADDR relativePC = dac_cast(ControlPC) - m_moduleBase; + + int MethodIndex = LookupUnwindInfoForMethod((UInt32)relativePC, m_pRuntimeFunctionTable, 0, m_nRuntimeFunctionTable - 1); + if (MethodIndex < 0) + return NULL; + + PTR_RUNTIME_FUNCTION pRuntimeFunction = m_pRuntimeFunctionTable + MethodIndex; + + size_t unwindDataBlobSize; + PTR_VOID pUnwindDataBlob = GetUnwindDataBlob(m_moduleBase, pRuntimeFunction, &unwindDataBlobSize); + + PTR_UInt8 p = dac_cast(pUnwindDataBlob) + unwindDataBlobSize; + + uint8_t unwindBlockFlags = *p++; + if ((unwindBlockFlags & UBF_FUNC_HAS_ASSOCIATED_DATA) == 0) + return NULL; + + UInt32 dataRVA = *(UInt32*)p; + return dac_cast(m_moduleBase + dataRVA); +} + +extern "C" bool __stdcall RegisterCodeManager(ICodeManager * pCodeManager, PTR_VOID pvStartRange, UInt32 cbRange); +extern "C" void __stdcall UnregisterCodeManager(ICodeManager * pCodeManager); +extern "C" bool __stdcall RegisterUnboxingStubs(PTR_VOID pvStartRange, UInt32 cbRange); + +extern "C" +bool RhRegisterOSModule(void * pModule, + void * pvManagedCodeStartRange, UInt32 cbManagedCodeRange, + void * pvUnboxingStubsStartRange, UInt32 cbUnboxingStubsRange, + void ** pClasslibFunctions, UInt32 nClasslibFunctions) +{ + PIMAGE_DOS_HEADER pDosHeader = (PIMAGE_DOS_HEADER)pModule; + PIMAGE_NT_HEADERS pNTHeaders = (PIMAGE_NT_HEADERS)((TADDR)pModule + pDosHeader->e_lfanew); + + IMAGE_DATA_DIRECTORY * pRuntimeFunctions = &(pNTHeaders->OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_EXCEPTION]); + + NewHolder pCoffNativeCodeManager = new (nothrow) CoffNativeCodeManager((TADDR)pModule, + pvManagedCodeStartRange, cbManagedCodeRange, + dac_cast((TADDR)pModule + pRuntimeFunctions->VirtualAddress), + pRuntimeFunctions->Size / sizeof(RUNTIME_FUNCTION), + pClasslibFunctions, nClasslibFunctions); + + if (pCoffNativeCodeManager == nullptr) + return false; + + if (!RegisterCodeManager(pCoffNativeCodeManager, pvManagedCodeStartRange, cbManagedCodeRange)) + return false; + + if (!RegisterUnboxingStubs(pvUnboxingStubsStartRange, cbUnboxingStubsRange)) + { + UnregisterCodeManager(pCoffNativeCodeManager); + return false; + } + + pCoffNativeCodeManager.SuppressRelease(); + + return true; +} diff --git a/src/coreclr/src/nativeaot/Runtime/windows/CoffNativeCodeManager.h b/src/coreclr/src/nativeaot/Runtime/windows/CoffNativeCodeManager.h new file mode 100644 index 0000000000000..8777bdfc7b750 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/windows/CoffNativeCodeManager.h @@ -0,0 +1,105 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#pragma once + +#if defined(TARGET_AMD64) || defined(TARGET_X86) +struct T_RUNTIME_FUNCTION { + uint32_t BeginAddress; + uint32_t EndAddress; + uint32_t UnwindInfoAddress; +}; +#elif defined(TARGET_ARM) +struct T_RUNTIME_FUNCTION { + uint32_t BeginAddress; + uint32_t UnwindData; +}; +#elif defined(TARGET_ARM64) +struct T_RUNTIME_FUNCTION { + uint32_t BeginAddress; + union { + uint32_t UnwindData; + struct { + uint32_t Flag : 2; + uint32_t FunctionLength : 11; + uint32_t RegF : 3; + uint32_t RegI : 4; + uint32_t H : 1; + uint32_t CR : 2; + uint32_t FrameSize : 9; + } PackedUnwindData; + }; +}; +#else +#error unexpected target architecture +#endif + +typedef DPTR(T_RUNTIME_FUNCTION) PTR_RUNTIME_FUNCTION; + +class CoffNativeCodeManager : public ICodeManager +{ + TADDR m_moduleBase; + + PTR_VOID m_pvManagedCodeStartRange; + UInt32 m_cbManagedCodeRange; + + PTR_RUNTIME_FUNCTION m_pRuntimeFunctionTable; + UInt32 m_nRuntimeFunctionTable; + + PTR_PTR_VOID m_pClasslibFunctions; + UInt32 m_nClasslibFunctions; + +public: + CoffNativeCodeManager(TADDR moduleBase, + PTR_VOID pvManagedCodeStartRange, UInt32 cbManagedCodeRange, + PTR_RUNTIME_FUNCTION pRuntimeFunctionTable, UInt32 nRuntimeFunctionTable, + PTR_PTR_VOID pClasslibFunctions, UInt32 nClasslibFunctions); + ~CoffNativeCodeManager(); + + // + // Code manager methods + // + + bool FindMethodInfo(PTR_VOID ControlPC, + MethodInfo * pMethodInfoOut); + + bool IsFunclet(MethodInfo * pMethodInfo); + + bool IsFilter(MethodInfo * pMethodInfo); + + PTR_VOID GetFramePointer(MethodInfo * pMethodInfo, + REGDISPLAY * pRegisterSet); + + void EnumGcRefs(MethodInfo * pMethodInfo, + PTR_VOID safePointAddress, + REGDISPLAY * pRegisterSet, + GCEnumContext * hCallback); + + bool UnwindStackFrame(MethodInfo * pMethodInfo, + REGDISPLAY * pRegisterSet, // in/out + PTR_VOID * ppPreviousTransitionFrame); // out + + UIntNative GetConservativeUpperBoundForOutgoingArgs(MethodInfo * pMethodInfo, + REGDISPLAY * pRegisterSet); + + bool GetReturnAddressHijackInfo(MethodInfo * pMethodInfo, + REGDISPLAY * pRegisterSet, // in + PTR_PTR_VOID * ppvRetAddrLocation, // out + GCRefKind * pRetValueKind); // out + + void UnsynchronizedHijackMethodLoops(MethodInfo * pMethodInfo); + + PTR_VOID RemapHardwareFaultToGCSafePoint(MethodInfo * pMethodInfo, PTR_VOID controlPC); + + bool EHEnumInit(MethodInfo * pMethodInfo, PTR_VOID * pMethodStartAddress, EHEnumState * pEHEnumState); + + bool EHEnumNext(EHEnumState * pEHEnumState, EHClause * pEHClause); + + PTR_VOID GetMethodStartAddress(MethodInfo * pMethodInfo); + + void * GetClasslibFunction(ClasslibFunctionId functionId); + + PTR_VOID GetAssociatedData(PTR_VOID ControlPC); + + PTR_VOID GetOsModuleHandle(); +}; diff --git a/src/coreclr/src/nativeaot/Runtime/windows/PalRedhawkCommon.cpp b/src/coreclr/src/nativeaot/Runtime/windows/PalRedhawkCommon.cpp new file mode 100644 index 0000000000000..de7caaa9abe5e --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/windows/PalRedhawkCommon.cpp @@ -0,0 +1,405 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// +// Implementation of the portions of the Redhawk Platform Abstraction Layer (PAL) library that are common among +// multiple PAL variants. +// +// Note that in general we don't want to assume that Windows and Redhawk global definitions can co-exist. +// Since this code must include Windows headers to do its job we can't therefore safely include general +// Redhawk header files. +// + +#include +#include +#include +#include +#include "CommonTypes.h" +#include "daccess.h" +#include "PalRedhawkCommon.h" +#include "PalRedhawk.h" +#include +#include "CommonMacros.h" +#include "rhassert.h" + + +#define REDHAWK_PALEXPORT extern "C" +#define REDHAWK_PALAPI __stdcall + + +// Given the OS handle of a loaded module, compute the upper and lower virtual address bounds (inclusive). +REDHAWK_PALEXPORT void REDHAWK_PALAPI PalGetModuleBounds(HANDLE hOsHandle, _Out_ UInt8 ** ppLowerBound, _Out_ UInt8 ** ppUpperBound) +{ + BYTE *pbModule = (BYTE*)hOsHandle; + DWORD cbModule; + + IMAGE_NT_HEADERS *pNtHeaders = (IMAGE_NT_HEADERS*)(pbModule + ((IMAGE_DOS_HEADER*)hOsHandle)->e_lfanew); + if (pNtHeaders->OptionalHeader.Magic == IMAGE_NT_OPTIONAL_HDR32_MAGIC) + cbModule = ((IMAGE_OPTIONAL_HEADER32*)&pNtHeaders->OptionalHeader)->SizeOfImage; + else + cbModule = ((IMAGE_OPTIONAL_HEADER64*)&pNtHeaders->OptionalHeader)->SizeOfImage; + + *ppLowerBound = pbModule; + *ppUpperBound = pbModule + cbModule - 1; +} + +// Reads through the PE header of the specified module, and returns +// the module's matching PDB's signature GUID, age, and build path by +// fishing them out of the last IMAGE_DEBUG_DIRECTORY of type +// IMAGE_DEBUG_TYPE_CODEVIEW. Used when sending the ModuleLoad event +// to help profilers find matching PDBs for loaded modules. +// +// Arguments: +// +// [in] hOsHandle - OS Handle for module from which to get PDB info +// [out] pGuidSignature - PDB's signature GUID to be placed here +// [out] pdwAge - PDB's age to be placed here +// [out] wszPath - PDB's build path to be placed here +// [in] cchPath - Number of wide characters allocated in wszPath, including NULL terminator +// +// This is a simplification of similar code in desktop CLR's GetCodeViewInfo +// in eventtrace.cpp. +REDHAWK_PALEXPORT void REDHAWK_PALAPI PalGetPDBInfo(HANDLE hOsHandle, _Out_ GUID * pGuidSignature, _Out_ UInt32 * pdwAge, _Out_writes_z_(cchPath) WCHAR * wszPath, Int32 cchPath) +{ + // Zero-init [out]-params + ZeroMemory(pGuidSignature, sizeof(*pGuidSignature)); + *pdwAge = 0; + if (cchPath <= 0) + return; + wszPath[0] = L'\0'; + + BYTE *pbModule = (BYTE*)hOsHandle; + + IMAGE_NT_HEADERS const * pNtHeaders = (IMAGE_NT_HEADERS*)(pbModule + ((IMAGE_DOS_HEADER*)hOsHandle)->e_lfanew); + IMAGE_DATA_DIRECTORY const * rgDataDirectory = NULL; + if (pNtHeaders->OptionalHeader.Magic == IMAGE_NT_OPTIONAL_HDR32_MAGIC) + rgDataDirectory = ((IMAGE_OPTIONAL_HEADER32 const *)&pNtHeaders->OptionalHeader)->DataDirectory; + else + rgDataDirectory = ((IMAGE_OPTIONAL_HEADER64 const *)&pNtHeaders->OptionalHeader)->DataDirectory; + + IMAGE_DATA_DIRECTORY const * pDebugDataDirectory = &rgDataDirectory[IMAGE_DIRECTORY_ENTRY_DEBUG]; + + // In Redhawk, modules are loaded as MAPPED, so we don't have to worry about dealing + // with FLAT files (with padding missing), so header addresses can be used as is + IMAGE_DEBUG_DIRECTORY const *rgDebugEntries = (IMAGE_DEBUG_DIRECTORY const *) (pbModule + pDebugDataDirectory->VirtualAddress); + DWORD cbDebugEntries = pDebugDataDirectory->Size; + if (cbDebugEntries < sizeof(IMAGE_DEBUG_DIRECTORY)) + return; + + // Since rgDebugEntries is an array of IMAGE_DEBUG_DIRECTORYs, cbDebugEntries + // should be a multiple of sizeof(IMAGE_DEBUG_DIRECTORY). + if (cbDebugEntries % sizeof(IMAGE_DEBUG_DIRECTORY) != 0) + return; + + // CodeView RSDS debug information -> PDB 7.00 + struct CV_INFO_PDB70 + { + DWORD magic; + GUID signature; // unique identifier + DWORD age; // an always-incrementing value + _Field_z_ char path[MAX_PATH]; // zero terminated string with the name of the PDB file + }; + + // Temporary storage for a CV_INFO_PDB70 and its size (which could be less than + // sizeof(CV_INFO_PDB70); see below). + struct PdbInfo + { + CV_INFO_PDB70 * m_pPdb70; + ULONG m_cbPdb70; + }; + + // Grab module bounds so we can do some rough sanity checking before we follow any + // RVAs + UInt8 * pbModuleLowerBound = NULL; + UInt8 * pbModuleUpperBound = NULL; + PalGetModuleBounds(hOsHandle, &pbModuleLowerBound, &pbModuleUpperBound); + + // Iterate through all debug directory entries. The convention is that debuggers & + // profilers typically just use the very last IMAGE_DEBUG_TYPE_CODEVIEW entry. Treat raw + // bytes we read as untrusted. + PdbInfo pdbInfoLast = {0}; + int cEntries = cbDebugEntries / sizeof(IMAGE_DEBUG_DIRECTORY); + for (int i = 0; i < cEntries; i++) + { + if ((UInt8 *)(&rgDebugEntries[i]) + sizeof(rgDebugEntries[i]) >= pbModuleUpperBound) + { + // Bogus pointer + return; + } + + if (rgDebugEntries[i].Type != IMAGE_DEBUG_TYPE_CODEVIEW) + continue; + + // Get raw data pointed to by this IMAGE_DEBUG_DIRECTORY + + // AddressOfRawData is generally set properly for Redhawk modules, so we don't + // have to worry about using PointerToRawData and converting it to an RVA + if (rgDebugEntries[i].AddressOfRawData == NULL) + continue; + + DWORD rvaOfRawData = rgDebugEntries[i].AddressOfRawData; + ULONG cbDebugData = rgDebugEntries[i].SizeOfData; + if (cbDebugData < size_t(&((CV_INFO_PDB70*)0)->magic) + sizeof(((CV_INFO_PDB70*)0)->magic)) + { + // raw data too small to contain magic number at expected spot, so its format + // is not recognizable. Skip + continue; + } + + // Verify the magic number is as expected + const DWORD CV_SIGNATURE_RSDS = 0x53445352; + CV_INFO_PDB70 * pPdb70 = (CV_INFO_PDB70 *) (pbModule + rvaOfRawData); + if ((UInt8 *)(pPdb70) + cbDebugData >= pbModuleUpperBound) + { + // Bogus pointer + return; + } + + if (pPdb70->magic != CV_SIGNATURE_RSDS) + { + // Unrecognized magic number. Skip + continue; + } + + // From this point forward, the format should adhere to the expected layout of + // CV_INFO_PDB70. If we find otherwise, then assume the IMAGE_DEBUG_DIRECTORY is + // outright corrupt. + + // Verify sane size of raw data + if (cbDebugData > sizeof(CV_INFO_PDB70)) + return; + + // cbDebugData actually can be < sizeof(CV_INFO_PDB70), since the "path" field + // can be truncated to its actual data length (i.e., fewer than MAX_PATH chars + // may be present in the PE file). In some cases, though, cbDebugData will + // include all MAX_PATH chars even though path gets null-terminated well before + // the MAX_PATH limit. + + // Gotta have at least one byte of the path + if (cbDebugData < offsetof(CV_INFO_PDB70, path) + sizeof(char)) + return; + + // How much space is available for the path? + size_t cchPathMaxIncludingNullTerminator = (cbDebugData - offsetof(CV_INFO_PDB70, path)) / sizeof(char); + ASSERT(cchPathMaxIncludingNullTerminator >= 1); // Guaranteed above + + // Verify path string fits inside the declared size + size_t cchPathActualExcludingNullTerminator = strnlen_s(pPdb70->path, cchPathMaxIncludingNullTerminator); + if (cchPathActualExcludingNullTerminator == cchPathMaxIncludingNullTerminator) + { + // This is how strnlen indicates failure--it couldn't find the null + // terminator within the buffer size specified + return; + } + + // Looks valid. Remember it. + pdbInfoLast.m_pPdb70 = pPdb70; + pdbInfoLast.m_cbPdb70 = cbDebugData; + } + + // Take the last IMAGE_DEBUG_TYPE_CODEVIEW entry we saw, and return it to the caller + if (pdbInfoLast.m_pPdb70 != NULL) + { + memcpy(pGuidSignature, &pdbInfoLast.m_pPdb70->signature, sizeof(GUID)); + *pdwAge = pdbInfoLast.m_pPdb70->age; + + // Convert build path from ANSI to UNICODE + errno_t ret; + size_t cchConverted; + ret = mbstowcs_s( + &cchConverted, + wszPath, + cchPath, + pdbInfoLast.m_pPdb70->path, + _countof(pdbInfoLast.m_pPdb70->path) - 1); + if ((ret != 0) && (ret != STRUNCATE)) + { + // PDB path isn't essential. An empty string will do if we hit an error. + ASSERT(cchPath > 0); // Guaranteed at top of function + wszPath[0] = L'\0'; + } + } +} + +REDHAWK_PALEXPORT Int32 REDHAWK_PALAPI PalGetProcessCpuCount() +{ + static int CpuCount = 0; + + if (CpuCount != 0) + return CpuCount; + else + { + // The concept of process CPU affinity is going away and so CoreSystem obsoletes the APIs used to + // fetch this information. Instead we'll just return total cpu count. + SYSTEM_INFO sysInfo; +#ifndef APP_LOCAL_RUNTIME + ::GetSystemInfo(&sysInfo); +#else + ::GetNativeSystemInfo(&sysInfo); +#endif + CpuCount = sysInfo.dwNumberOfProcessors; + return sysInfo.dwNumberOfProcessors; + } +} + +//Reads the entire contents of the file into the specified buffer, buff +//returns the number of bytes read if the file is successfully read +//returns 0 if the file is not found, size is greater than maxBytesToRead or the file couldn't be opened or read +REDHAWK_PALEXPORT UInt32 REDHAWK_PALAPI PalReadFileContents(_In_z_ const TCHAR* fileName, _Out_writes_all_(maxBytesToRead) char* buff, _In_ UInt32 maxBytesToRead) +{ + WIN32_FILE_ATTRIBUTE_DATA attrData; + + BOOL getAttrSuccess = GetFileAttributesExW(fileName, GetFileExInfoStandard, &attrData); + + //if we weren't able to get the file attributes, or the file is larger than maxBytesToRead, or the file size is zero + if ((!getAttrSuccess) || (attrData.nFileSizeHigh != 0) || (attrData.nFileSizeLow > (DWORD)maxBytesToRead) || (attrData.nFileSizeLow == 0)) + { + return 0; + } + + HANDLE hFile = PalCreateFileW(fileName, GENERIC_READ, FILE_SHARE_DELETE | FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); + if (hFile == INVALID_HANDLE_VALUE) + { + return 0; + } + + UInt32 bytesRead; + + BOOL readSuccess = ReadFile(hFile, buff, (DWORD)maxBytesToRead, (DWORD*)&bytesRead, NULL); + + CloseHandle(hFile); + + if (!readSuccess) + { + return 0; + } + + return bytesRead; +} + + +// Retrieves the entire range of memory dedicated to the calling thread's stack. This does +// not get the current dynamic bounds of the stack, which can be significantly smaller than +// the maximum bounds. +REDHAWK_PALEXPORT bool REDHAWK_PALAPI PalGetMaximumStackBounds(_Out_ void** ppStackLowOut, _Out_ void** ppStackHighOut) +{ + // VirtualQuery on the address of a local variable to get the allocation + // base of the stack. Then use the StackBase field in the TEB to give + // the highest address of the stack region. + MEMORY_BASIC_INFORMATION mbi = { 0 }; + SIZE_T cb = VirtualQuery(&mbi, &mbi, sizeof(mbi)); + if (cb != sizeof(mbi)) + return false; + + NT_TIB* pTib = (NT_TIB*)NtCurrentTeb(); + *ppStackHighOut = pTib->StackBase; // stack base is the highest address + *ppStackLowOut = mbi.AllocationBase; // allocation base is the lowest address + return true; +} + +#if !defined(_INC_WINDOWS) || defined(APP_LOCAL_RUNTIME) + +typedef struct _UNICODE_STRING { + USHORT Length; + USHORT MaximumLength; + PWSTR Buffer; +} UNICODE_STRING; +typedef UNICODE_STRING *PUNICODE_STRING; +typedef const UNICODE_STRING *PCUNICODE_STRING; + +typedef struct _PEB_LDR_DATA { + BYTE Reserved1[8]; + PVOID Reserved2[3]; + LIST_ENTRY InMemoryOrderModuleList; +} PEB_LDR_DATA, *PPEB_LDR_DATA; + +typedef struct _LDR_DATA_TABLE_ENTRY { + PVOID Reserved1[2]; + LIST_ENTRY InMemoryOrderLinks; + PVOID Reserved2[2]; + PVOID DllBase; + PVOID Reserved3[2]; + UNICODE_STRING FullDllName; + BYTE Reserved4[8]; + PVOID Reserved5[3]; + union { + ULONG CheckSum; + PVOID Reserved6; + } DUMMYUNIONNAME; + ULONG TimeDateStamp; +} LDR_DATA_TABLE_ENTRY, *PLDR_DATA_TABLE_ENTRY; + +typedef struct _PEB { + BYTE Reserved1[2]; + BYTE BeingDebugged; + BYTE Reserved2[1]; + PVOID Reserved3[2]; + PPEB_LDR_DATA Ldr; + PVOID /*PRTL_USER_PROCESS_PARAMETERS*/ ProcessParameters; + PVOID Reserved4[3]; + PVOID AtlThunkSListPtr; + PVOID Reserved5; + ULONG Reserved6; + PVOID Reserved7; + ULONG Reserved8; + ULONG AtlThunkSListPtr32; + PVOID Reserved9[45]; + BYTE Reserved10[96]; + PVOID /*PPS_POST_PROCESS_INIT_ROUTINE*/ PostProcessInitRoutine; + BYTE Reserved11[128]; + PVOID Reserved12[1]; + ULONG SessionId; +} PEB, *PPEB; + +typedef struct _TEB { + PVOID Reserved1[12]; + PPEB ProcessEnvironmentBlock; + PVOID Reserved2[399]; + BYTE Reserved3[1952]; + PVOID TlsSlots[64]; + BYTE Reserved4[8]; + PVOID Reserved5[26]; + PVOID ReservedForOle; // Windows 2000 only + PVOID Reserved6[4]; + PVOID TlsExpansionSlots; +} TEB, *PTEB; + +#endif // !defined(_INC_WINDOWS) || defined(APP_LOCAL_RUNTIME) + +// retrieves the full path to the specified module, if moduleBase is NULL retreieves the full path to the +// executable module of the current process. +// +// Return value: number of characters in name string +// +//NOTE: This implementation exists because calling GetModuleFileName is not wack compliant. if we later decide +// that the framework package containing mrt100_app no longer needs to be wack compliant, this should be +// removed and the windows implementation of GetModuleFileName should be substitued on windows. +REDHAWK_PALEXPORT Int32 PalGetModuleFileName(_Out_ const TCHAR** pModuleNameOut, HANDLE moduleBase) +{ + TEB* pTEB = NtCurrentTeb(); + LIST_ENTRY* pStartLink = &(pTEB->ProcessEnvironmentBlock->Ldr->InMemoryOrderModuleList); + LIST_ENTRY* pCurLink = pStartLink->Flink; + + do + { + LDR_DATA_TABLE_ENTRY* pEntry = CONTAINING_RECORD(pCurLink, LDR_DATA_TABLE_ENTRY, InMemoryOrderLinks); + + //null moduleBase will result in the first module being returned + //since the module list is ordered this is the executable module of the current process + if ((pEntry->DllBase == moduleBase) || (moduleBase == NULL)) + { + *pModuleNameOut = pEntry->FullDllName.Buffer; + return pEntry->FullDllName.Length / 2; + } + pCurLink = pCurLink->Flink; + } + while (pCurLink != pStartLink); + + *pModuleNameOut = NULL; + return 0; +} + +REDHAWK_PALEXPORT UInt64 __cdecl PalGetTickCount64() +{ + return GetTickCount64(); +} diff --git a/src/coreclr/src/nativeaot/Runtime/windows/PalRedhawkInline.h b/src/coreclr/src/nativeaot/Runtime/windows/PalRedhawkInline.h new file mode 100644 index 0000000000000..cd9535f4731d0 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/windows/PalRedhawkInline.h @@ -0,0 +1,157 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// Implementation of Redhawk PAL inline functions + +EXTERN_C long __cdecl _InterlockedIncrement(long volatile *); +#pragma intrinsic(_InterlockedIncrement) +FORCEINLINE Int32 PalInterlockedIncrement(_Inout_ _Interlocked_operand_ Int32 volatile *pDst) +{ + return _InterlockedIncrement((long volatile *)pDst); +} + +EXTERN_C long __cdecl _InterlockedDecrement(long volatile *); +#pragma intrinsic(_InterlockedDecrement) +FORCEINLINE Int32 PalInterlockedDecrement(_Inout_ _Interlocked_operand_ Int32 volatile *pDst) +{ + return _InterlockedDecrement((long volatile *)pDst); +} + +EXTERN_C long _InterlockedOr(long volatile *, long); +#pragma intrinsic(_InterlockedOr) +FORCEINLINE UInt32 PalInterlockedOr(_Inout_ _Interlocked_operand_ UInt32 volatile *pDst, UInt32 iValue) +{ + return _InterlockedOr((long volatile *)pDst, iValue); +} + +EXTERN_C long _InterlockedAnd(long volatile *, long); +#pragma intrinsic(_InterlockedAnd) +FORCEINLINE UInt32 PalInterlockedAnd(_Inout_ _Interlocked_operand_ UInt32 volatile *pDst, UInt32 iValue) +{ + return _InterlockedAnd((long volatile *)pDst, iValue); +} + +EXTERN_C long __PN__MACHINECALL_CDECL_OR_DEFAULT _InterlockedExchange(long volatile *, long); +#pragma intrinsic(_InterlockedExchange) +FORCEINLINE Int32 PalInterlockedExchange(_Inout_ _Interlocked_operand_ Int32 volatile *pDst, Int32 iValue) +{ + return _InterlockedExchange((long volatile *)pDst, iValue); +} + +EXTERN_C long __PN__MACHINECALL_CDECL_OR_DEFAULT _InterlockedCompareExchange(long volatile *, long, long); +#pragma intrinsic(_InterlockedCompareExchange) +FORCEINLINE Int32 PalInterlockedCompareExchange(_Inout_ _Interlocked_operand_ Int32 volatile *pDst, Int32 iValue, Int32 iComparand) +{ + return _InterlockedCompareExchange((long volatile *)pDst, iValue, iComparand); +} + +EXTERN_C Int64 _InterlockedCompareExchange64(Int64 volatile *, Int64, Int64); +#pragma intrinsic(_InterlockedCompareExchange64) +FORCEINLINE Int64 PalInterlockedCompareExchange64(_Inout_ _Interlocked_operand_ Int64 volatile *pDst, Int64 iValue, Int64 iComparand) +{ + return _InterlockedCompareExchange64(pDst, iValue, iComparand); +} + +#if defined(HOST_AMD64) || defined(HOST_ARM64) +EXTERN_C UInt8 _InterlockedCompareExchange128(Int64 volatile *, Int64, Int64, Int64 *); +#pragma intrinsic(_InterlockedCompareExchange128) +FORCEINLINE UInt8 PalInterlockedCompareExchange128(_Inout_ _Interlocked_operand_ Int64 volatile *pDst, Int64 iValueHigh, Int64 iValueLow, Int64 *pComparandAndResult) +{ + return _InterlockedCompareExchange128(pDst, iValueHigh, iValueLow, pComparandAndResult); +} +#endif // HOST_AMD64 + +#ifdef HOST_64BIT + +EXTERN_C void * _InterlockedExchangePointer(void * volatile *, void *); +#pragma intrinsic(_InterlockedExchangePointer) +FORCEINLINE void * PalInterlockedExchangePointer(_Inout_ _Interlocked_operand_ void * volatile *pDst, _In_ void *pValue) +{ + return _InterlockedExchangePointer((void * volatile *)pDst, pValue); +} + +EXTERN_C void * _InterlockedCompareExchangePointer(void * volatile *, void *, void *); +#pragma intrinsic(_InterlockedCompareExchangePointer) +FORCEINLINE void * PalInterlockedCompareExchangePointer(_Inout_ _Interlocked_operand_ void * volatile *pDst, _In_ void *pValue, _In_ void *pComparand) +{ + return _InterlockedCompareExchangePointer((void * volatile *)pDst, pValue, pComparand); +} + +#else // HOST_64BIT + +#define PalInterlockedExchangePointer(_pDst, _pValue) \ + ((void *)_InterlockedExchange((long volatile *)(_pDst), (long)(size_t)(_pValue))) + +#define PalInterlockedCompareExchangePointer(_pDst, _pValue, _pComparand) \ + ((void *)_InterlockedCompareExchange((long volatile *)(_pDst), (long)(size_t)(_pValue), (long)(size_t)(_pComparand))) + +#endif // HOST_64BIT + +EXTERN_C __declspec(dllimport) unsigned long __stdcall GetLastError(); +FORCEINLINE int PalGetLastError() +{ + return (int)GetLastError(); +} + +EXTERN_C __declspec(dllimport) void __stdcall SetLastError(unsigned long error); +FORCEINLINE void PalSetLastError(int error) +{ + SetLastError((unsigned long)error); +} + +#if defined(HOST_X86) + +EXTERN_C void _mm_pause(); +#pragma intrinsic(_mm_pause) +#define PalYieldProcessor() _mm_pause() + +FORCEINLINE void PalMemoryBarrier() +{ + long Barrier; + _InterlockedOr(&Barrier, 0); +} + +#elif defined(HOST_AMD64) + +EXTERN_C void _mm_pause(); +#pragma intrinsic(_mm_pause) +#define PalYieldProcessor() _mm_pause() + +EXTERN_C void __faststorefence(); +#pragma intrinsic(__faststorefence) +#define PalMemoryBarrier() __faststorefence() + + +#elif defined(HOST_ARM) + +EXTERN_C void __yield(void); +#pragma intrinsic(__yield) +EXTERN_C void __dmb(unsigned int _Type); +#pragma intrinsic(__dmb) +FORCEINLINE void PalYieldProcessor() +{ + __dmb(0xA /* _ARM_BARRIER_ISHST */); + __yield(); +} + +#define PalMemoryBarrier() __dmb(0xF /* _ARM_BARRIER_SY */) + +#elif defined(HOST_ARM64) + +EXTERN_C void __yield(void); +#pragma intrinsic(__yield) +EXTERN_C void __dmb(unsigned int _Type); +#pragma intrinsic(__dmb) +FORCEINLINE void PalYieldProcessor() +{ + __dmb(0xA /* _ARM64_BARRIER_ISHST */); + __yield(); +} + +#define PalMemoryBarrier() __dmb(0xF /* _ARM64_BARRIER_SY */) + +#else +#error Unsupported architecture +#endif + +#define PalDebugBreak() __debugbreak() diff --git a/src/coreclr/src/nativeaot/Runtime/windows/PalRedhawkMinWin.cpp b/src/coreclr/src/nativeaot/Runtime/windows/PalRedhawkMinWin.cpp new file mode 100644 index 0000000000000..e9fd0e8d72fc6 --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/windows/PalRedhawkMinWin.cpp @@ -0,0 +1,496 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// +// Implementation of the Redhawk Platform Abstraction Layer (PAL) library when MinWin is the platform. In this +// case most or all of the import requirements which Redhawk has can be satisfied via a forwarding export to +// some native MinWin library. Therefore most of the work is done in the .def file and there is very little +// code here. +// +// Note that in general we don't want to assume that Windows and Redhawk global definitions can co-exist. +// Since this code must include Windows headers to do its job we can't therefore safely include general +// Redhawk header files. +// +#include "common.h" +#include +#include +#include +#include + +#include "holder.h" + +#define PalRaiseFailFastException RaiseFailFastException + +uint32_t PalEventWrite(REGHANDLE arg1, const EVENT_DESCRIPTOR * arg2, uint32_t arg3, EVENT_DATA_DESCRIPTOR * arg4) +{ + return EventWrite(arg1, arg2, arg3, arg4); +} + +#include "gcenv.h" + + +#define REDHAWK_PALEXPORT extern "C" +#define REDHAWK_PALAPI __stdcall + +// Index for the fiber local storage of the attached thread pointer +static UInt32 g_flsIndex = FLS_OUT_OF_INDEXES; + +static DWORD g_dwPALCapabilities; + +GCSystemInfo g_RhSystemInfo; + +bool InitializeSystemInfo() +{ + SYSTEM_INFO systemInfo; + GetSystemInfo(&systemInfo); + + g_RhSystemInfo.dwNumberOfProcessors = systemInfo.dwNumberOfProcessors; + g_RhSystemInfo.dwPageSize = systemInfo.dwPageSize; + g_RhSystemInfo.dwAllocationGranularity = systemInfo.dwAllocationGranularity; + + return true; +} + +// This is called when each *fiber* is destroyed. When the home fiber of a thread is destroyed, +// it means that the thread itself is destroyed. +// Since we receive that notification outside of the Loader Lock, it allows us to safely acquire +// the ThreadStore lock in the RuntimeThreadShutdown. +void __stdcall FiberDetachCallback(void* lpFlsData) +{ + ASSERT(g_flsIndex != FLS_OUT_OF_INDEXES); + ASSERT(lpFlsData == FlsGetValue(g_flsIndex)); + + if (lpFlsData != NULL) + { + // The current fiber is the home fiber of a thread, so the thread is shutting down + RuntimeThreadShutdown(lpFlsData); + } +} + +// The Redhawk PAL must be initialized before any of its exports can be called. Returns true for a successful +// initialization and false on failure. +REDHAWK_PALEXPORT bool REDHAWK_PALAPI PalInit() +{ + g_dwPALCapabilities = WriteWatchCapability | LowMemoryNotificationCapability; + + // We use fiber detach callbacks to run our thread shutdown code because the fiber detach + // callback is made without the OS loader lock + g_flsIndex = FlsAlloc(FiberDetachCallback); + if (g_flsIndex == FLS_OUT_OF_INDEXES) + { + return false; + } + + return true; +} + +// Given a mask of capabilities return true if all of them are supported by the current PAL. +REDHAWK_PALEXPORT bool REDHAWK_PALAPI PalHasCapability(PalCapability capability) +{ + return (g_dwPALCapabilities & (DWORD)capability) == (DWORD)capability; +} + +// Attach thread to PAL. +// It can be called multiple times for the same thread. +// It fails fast if a different thread was already registered with the current fiber +// or if the thread was already registered with a different fiber. +// Parameters: +// thread - thread to attach +REDHAWK_PALEXPORT void REDHAWK_PALAPI PalAttachThread(void* thread) +{ + void* threadFromCurrentFiber = FlsGetValue(g_flsIndex); + + if (threadFromCurrentFiber != NULL) + { + ASSERT_UNCONDITIONALLY("Multiple threads encountered from a single fiber"); + RhFailFast(); + } + + // Associate the current fiber with the current thread. This makes the current fiber the thread's "home" + // fiber. This fiber is the only fiber allowed to execute managed code on this thread. When this fiber + // is destroyed, we consider the thread to be destroyed. + FlsSetValue(g_flsIndex, thread); +} + +// Detach thread from PAL. +// It fails fast if some other thread value was attached to PAL. +// Parameters: +// thread - thread to detach +// Return: +// true if the thread was detached, false if there was no attached thread +REDHAWK_PALEXPORT bool REDHAWK_PALAPI PalDetachThread(void* thread) +{ + ASSERT(g_flsIndex != FLS_OUT_OF_INDEXES); + void* threadFromCurrentFiber = FlsGetValue(g_flsIndex); + + if (threadFromCurrentFiber == NULL) + { + // we've seen this thread, but not this fiber. It must be a "foreign" fiber that was + // borrowing this thread. + return false; + } + + if (threadFromCurrentFiber != thread) + { + ASSERT_UNCONDITIONALLY("Detaching a thread from the wrong fiber"); + RhFailFast(); + } + + FlsSetValue(g_flsIndex, NULL); + return true; +} + +extern "C" UInt64 PalGetCurrentThreadIdForLogging() +{ + return GetCurrentThreadId(); +} + +#if !defined(USE_PORTABLE_HELPERS) && !defined(FEATURE_RX_THUNKS) +REDHAWK_PALEXPORT UInt32_BOOL REDHAWK_PALAPI PalAllocateThunksFromTemplate(_In_ HANDLE hTemplateModule, UInt32 templateRva, size_t templateSize, _Outptr_result_bytebuffer_(templateSize) void** newThunksOut) +{ +#ifdef XBOX_ONE + return E_NOTIMPL; +#else + BOOL success = FALSE; + HANDLE hMap = NULL, hFile = INVALID_HANDLE_VALUE; + + const WCHAR * wszModuleFileName = NULL; + if (PalGetModuleFileName(&wszModuleFileName, hTemplateModule) == 0 || wszModuleFileName == NULL) + return FALSE; + + hFile = CreateFileW(wszModuleFileName, GENERIC_READ | GENERIC_EXECUTE, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); + if (hFile == INVALID_HANDLE_VALUE) + goto cleanup; + + hMap = CreateFileMapping(hFile, NULL, SEC_IMAGE | PAGE_READONLY, 0, 0, NULL); + if (hMap == NULL) + goto cleanup; + + *newThunksOut = MapViewOfFile(hMap, 0, 0, templateRva, templateSize); + success = ((*newThunksOut) != NULL); + +cleanup: + CloseHandle(hMap); + CloseHandle(hFile); + + return success; +#endif +} + +REDHAWK_PALEXPORT UInt32_BOOL REDHAWK_PALAPI PalFreeThunksFromTemplate(_In_ void *pBaseAddress) +{ +#ifdef XBOX_ONE + return TRUE; +#else + return UnmapViewOfFile(pBaseAddress); +#endif +} +#endif // !USE_PORTABLE_HELPERS && !FEATURE_RX_THUNKS + +REDHAWK_PALEXPORT UInt32_BOOL REDHAWK_PALAPI PalMarkThunksAsValidCallTargets( + void *virtualAddress, + int thunkSize, + int thunksPerBlock, + int thunkBlockSize, + int thunkBlocksPerMapping) +{ + // For CoreRT we are using RWX pages so there is no need for this API for now. + // Once we have a scenario for non-RWX pages we should be able to put the implementation here + return TRUE; +} + +REDHAWK_PALEXPORT UInt32 REDHAWK_PALAPI PalCompatibleWaitAny(UInt32_BOOL alertable, UInt32 timeout, UInt32 handleCount, HANDLE* pHandles, UInt32_BOOL allowReentrantWait) +{ + if (!allowReentrantWait) + { + return WaitForMultipleObjectsEx(handleCount, pHandles, FALSE, timeout, alertable); + } + else + { + DWORD index; + SetLastError(ERROR_SUCCESS); // recommended by MSDN. + HRESULT hr = CoWaitForMultipleHandles(alertable ? COWAIT_ALERTABLE : 0, timeout, handleCount, pHandles, &index); + + switch (hr) + { + case S_OK: + return index; + + case RPC_S_CALLPENDING: + return WAIT_TIMEOUT; + + default: + SetLastError(HRESULT_CODE(hr)); + return WAIT_FAILED; + } + } +} + +REDHAWK_PALEXPORT void REDHAWK_PALAPI PalSleep(UInt32 milliseconds) +{ + return Sleep(milliseconds); +} + +REDHAWK_PALEXPORT UInt32_BOOL REDHAWK_PALAPI PalSwitchToThread() +{ + return SwitchToThread(); +} + +REDHAWK_PALEXPORT HANDLE REDHAWK_PALAPI PalCreateEventW(_In_opt_ LPSECURITY_ATTRIBUTES pEventAttributes, UInt32_BOOL manualReset, UInt32_BOOL initialState, _In_opt_z_ LPCWSTR pName) +{ + return CreateEventW(pEventAttributes, manualReset, initialState, pName); +} + +REDHAWK_PALEXPORT _Success_(return) bool REDHAWK_PALAPI PalGetThreadContext(HANDLE hThread, _Out_ PAL_LIMITED_CONTEXT * pCtx) +{ + CONTEXT win32ctx; + + win32ctx.ContextFlags = CONTEXT_CONTROL | CONTEXT_INTEGER | CONTEXT_EXCEPTION_REQUEST; + + if (!GetThreadContext(hThread, &win32ctx)) + return false; + + // The CONTEXT_SERVICE_ACTIVE and CONTEXT_EXCEPTION_ACTIVE output flags indicate we suspended the thread + // at a point where the kernel cannot guarantee a completely accurate context. We'll fail the request in + // this case (which should force our caller to resume the thread and try again -- since this is a fairly + // narrow window we're highly likely to succeed next time). + // Note: in some cases (x86 WOW64, ARM32 on ARM64) the OS will not set the CONTEXT_EXCEPTION_REPORTING flag + // if the thread is executing in kernel mode (i.e. in the middle of a syscall or exception handling). + // Therefore, we should treat the absence of the CONTEXT_EXCEPTION_REPORTING flag as an indication that + // it is not safe to manipulate with the current state of the thread context. + if ((win32ctx.ContextFlags & CONTEXT_EXCEPTION_REPORTING) == 0 || + (win32ctx.ContextFlags & (CONTEXT_SERVICE_ACTIVE | CONTEXT_EXCEPTION_ACTIVE))) + return false; + +#ifdef HOST_X86 + pCtx->IP = win32ctx.Eip; + pCtx->Rsp = win32ctx.Esp; + pCtx->Rbp = win32ctx.Ebp; + pCtx->Rdi = win32ctx.Edi; + pCtx->Rsi = win32ctx.Esi; + pCtx->Rax = win32ctx.Eax; + pCtx->Rbx = win32ctx.Ebx; +#elif defined(HOST_AMD64) + pCtx->IP = win32ctx.Rip; + pCtx->Rsp = win32ctx.Rsp; + pCtx->Rbp = win32ctx.Rbp; + pCtx->Rdi = win32ctx.Rdi; + pCtx->Rsi = win32ctx.Rsi; + pCtx->Rax = win32ctx.Rax; + pCtx->Rbx = win32ctx.Rbx; + pCtx->R12 = win32ctx.R12; + pCtx->R13 = win32ctx.R13; + pCtx->R14 = win32ctx.R14; + pCtx->R15 = win32ctx.R15; +#elif defined(HOST_ARM) + pCtx->IP = win32ctx.Pc; + pCtx->R0 = win32ctx.R0; + pCtx->R4 = win32ctx.R4; + pCtx->R5 = win32ctx.R5; + pCtx->R6 = win32ctx.R6; + pCtx->R7 = win32ctx.R7; + pCtx->R8 = win32ctx.R8; + pCtx->R9 = win32ctx.R9; + pCtx->R10 = win32ctx.R10; + pCtx->R11 = win32ctx.R11; + pCtx->SP = win32ctx.Sp; + pCtx->LR = win32ctx.Lr; +#elif defined(HOST_ARM64) + pCtx->IP = win32ctx.Pc; + pCtx->X0 = win32ctx.X0; + pCtx->X1 = win32ctx.X1; + // TODO: Copy X2-X7 when we start supporting HVA's + pCtx->X19 = win32ctx.X19; + pCtx->X20 = win32ctx.X20; + pCtx->X21 = win32ctx.X21; + pCtx->X22 = win32ctx.X22; + pCtx->X23 = win32ctx.X23; + pCtx->X24 = win32ctx.X24; + pCtx->X25 = win32ctx.X25; + pCtx->X26 = win32ctx.X26; + pCtx->X27 = win32ctx.X27; + pCtx->X28 = win32ctx.X28; + pCtx->SP = win32ctx.Sp; + pCtx->LR = win32ctx.Lr; + pCtx->FP = win32ctx.Fp; +#else +#error Unsupported platform +#endif + return true; +} + + +REDHAWK_PALEXPORT UInt32 REDHAWK_PALAPI PalHijack(HANDLE hThread, _In_ PalHijackCallback callback, _In_opt_ void* pCallbackContext) +{ + if (hThread == INVALID_HANDLE_VALUE) + { + return (UInt32)E_INVALIDARG; + } + + if (SuspendThread(hThread) == (DWORD)-1) + { + return HRESULT_FROM_WIN32(GetLastError()); + } + + PAL_LIMITED_CONTEXT ctx; + HRESULT result; + if (!PalGetThreadContext(hThread, &ctx)) + { + result = HRESULT_FROM_WIN32(GetLastError()); + } + else + { + result = callback(hThread, &ctx, pCallbackContext) ? S_OK : E_FAIL; + } + + ResumeThread(hThread); + + return result; +} + +REDHAWK_PALEXPORT HANDLE REDHAWK_PALAPI PalStartBackgroundWork(_In_ BackgroundCallback callback, _In_opt_ void* pCallbackContext, BOOL highPriority) +{ + HANDLE hThread = CreateThread( + NULL, + 0, + (LPTHREAD_START_ROUTINE)callback, + pCallbackContext, + highPriority ? CREATE_SUSPENDED : 0, + NULL); + + if (hThread == NULL) + return NULL; + + if (highPriority) + { + SetThreadPriority(hThread, THREAD_PRIORITY_HIGHEST); + ResumeThread(hThread); + } + + return hThread; +} + +REDHAWK_PALEXPORT bool REDHAWK_PALAPI PalStartBackgroundGCThread(_In_ BackgroundCallback callback, _In_opt_ void* pCallbackContext) +{ + return PalStartBackgroundWork(callback, pCallbackContext, FALSE) != NULL; +} + +REDHAWK_PALEXPORT bool REDHAWK_PALAPI PalStartFinalizerThread(_In_ BackgroundCallback callback, _In_opt_ void* pCallbackContext) +{ + return PalStartBackgroundWork(callback, pCallbackContext, TRUE) != NULL; +} + +REDHAWK_PALEXPORT UInt32 REDHAWK_PALAPI PalGetTickCount() +{ +#pragma warning(push) +#pragma warning(disable: 28159) // Consider GetTickCount64 instead + return GetTickCount(); +#pragma warning(pop) +} + +REDHAWK_PALEXPORT bool REDHAWK_PALAPI PalEventEnabled(REGHANDLE regHandle, _In_ const EVENT_DESCRIPTOR* eventDescriptor) +{ + return !!EventEnabled(regHandle, eventDescriptor); +} + +REDHAWK_PALEXPORT HANDLE REDHAWK_PALAPI PalCreateFileW( + _In_z_ LPCWSTR pFileName, + uint32_t desiredAccess, + uint32_t shareMode, + _In_opt_ void* pSecurityAttributes, + uint32_t creationDisposition, + uint32_t flagsAndAttributes, + HANDLE hTemplateFile) +{ + return CreateFileW(pFileName, desiredAccess, shareMode, (LPSECURITY_ATTRIBUTES)pSecurityAttributes, + creationDisposition, flagsAndAttributes, hTemplateFile); +} + +REDHAWK_PALEXPORT HANDLE REDHAWK_PALAPI PalCreateLowMemoryNotification() +{ + return CreateMemoryResourceNotification(LowMemoryResourceNotification); +} + +REDHAWK_PALEXPORT HANDLE REDHAWK_PALAPI PalGetModuleHandleFromPointer(_In_ void* pointer) +{ + // CoreRT is not designed to be unloadable today. Use GET_MODULE_HANDLE_EX_FLAG_PIN to prevent + // the module from ever unloading. + + HMODULE module; + if (!GetModuleHandleExW( + GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS | GET_MODULE_HANDLE_EX_FLAG_PIN, + (LPCWSTR)pointer, + &module)) + { + return NULL; + } + + return (HANDLE)module; +} + +REDHAWK_PALEXPORT bool REDHAWK_PALAPI PalIsAvxEnabled() +{ + typedef DWORD64(WINAPI* PGETENABLEDXSTATEFEATURES)(); + PGETENABLEDXSTATEFEATURES pfnGetEnabledXStateFeatures = NULL; + + HMODULE hMod = LoadLibraryExW(L"kernel32", NULL, LOAD_LIBRARY_SEARCH_SYSTEM32); + if (hMod == NULL) + return FALSE; + + pfnGetEnabledXStateFeatures = (PGETENABLEDXSTATEFEATURES)GetProcAddress(hMod, "GetEnabledXStateFeatures"); + + if (pfnGetEnabledXStateFeatures == NULL) + { + return FALSE; + } + + DWORD64 FeatureMask = pfnGetEnabledXStateFeatures(); + if ((FeatureMask & XSTATE_MASK_AVX) == 0) + { + return FALSE; + } + + return TRUE; +} + +REDHAWK_PALEXPORT void* REDHAWK_PALAPI PalAddVectoredExceptionHandler(UInt32 firstHandler, _In_ PVECTORED_EXCEPTION_HANDLER vectoredHandler) +{ + return AddVectoredExceptionHandler(firstHandler, vectoredHandler); +} + +REDHAWK_PALEXPORT void PalPrintFatalError(const char* message) +{ + // Write the message using lowest-level OS API available. This is used to print the stack overflow + // message, so there is not much that can be done here. + DWORD dwBytesWritten; + WriteFile(GetStdHandle(STD_ERROR_HANDLE), message, (DWORD)strlen(message), &dwBytesWritten, NULL); +} + +REDHAWK_PALEXPORT _Ret_maybenull_ _Post_writable_byte_size_(size) void* REDHAWK_PALAPI PalVirtualAlloc(_In_opt_ void* pAddress, UIntNative size, UInt32 allocationType, UInt32 protect) +{ + return VirtualAlloc(pAddress, size, allocationType, protect); +} + +#pragma warning (push) +#pragma warning (disable:28160) // warnings about invalid potential parameter combinations that would cause VirtualFree to fail - those are asserted for below +REDHAWK_PALEXPORT UInt32_BOOL REDHAWK_PALAPI PalVirtualFree(_In_ void* pAddress, UIntNative size, UInt32 freeType) +{ + assert(((freeType & MEM_RELEASE) != MEM_RELEASE) || size == 0); + assert((freeType & (MEM_RELEASE | MEM_DECOMMIT)) != (MEM_RELEASE | MEM_DECOMMIT)); + assert(freeType != 0); + + return VirtualFree(pAddress, size, freeType); +} +#pragma warning (pop) + +REDHAWK_PALEXPORT UInt32_BOOL REDHAWK_PALAPI PalVirtualProtect(_In_ void* pAddress, UIntNative size, UInt32 protect) +{ + DWORD oldProtect; + return VirtualProtect(pAddress, size, protect, &oldProtect); +} + +REDHAWK_PALEXPORT _Ret_maybenull_ void* REDHAWK_PALAPI PalSetWerDataBuffer(_In_ void* pNewBuffer) +{ + static void* pBuffer; + return InterlockedExchangePointer(&pBuffer, pNewBuffer); +} diff --git a/src/coreclr/src/nativeaot/Runtime/yieldprocessornormalized.cpp b/src/coreclr/src/nativeaot/Runtime/yieldprocessornormalized.cpp new file mode 100644 index 0000000000000..83fd70bd49dcb --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/yieldprocessornormalized.cpp @@ -0,0 +1,120 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include "common.h" +#include "gcenv.h" +#include "gcheaputilities.h" +#include "CommonTypes.h" +#include "CommonMacros.h" +#include "daccess.h" +#include "DebugMacrosExt.h" +#include "PalRedhawkCommon.h" +#include "PalRedhawk.h" +#include "rhassert.h" +#include "slist.h" +#include "volatile.h" +#include "yieldprocessornormalized.h" + +#define ULONGLONG int64_t + +static Volatile s_isYieldProcessorNormalizedInitialized = false; +static CrstStatic s_initializeYieldProcessorNormalizedCrst; + +// Defaults are for when InitializeYieldProcessorNormalized has not yet been called or when no measurement is done, and are +// tuned for Skylake processors +unsigned int g_yieldsPerNormalizedYield = 1; // current value is for Skylake processors, this is expected to be ~8 for pre-Skylake +unsigned int g_optimalMaxNormalizedYieldsPerSpinIteration = 7; + +void InitializeYieldProcessorNormalizedCrst() +{ + WRAPPER_NO_CONTRACT; + s_initializeYieldProcessorNormalizedCrst.Init(CrstYieldProcessorNormalized); +} + +static void InitializeYieldProcessorNormalized() +{ + WRAPPER_NO_CONTRACT; + + CrstHolder lock(&s_initializeYieldProcessorNormalizedCrst); + + if (s_isYieldProcessorNormalizedInitialized) + { + return; + } + + // Intel pre-Skylake processor: measured typically 14-17 cycles per yield + // Intel post-Skylake processor: measured typically 125-150 cycles per yield + const int MeasureDurationMs = 10; + const int NsPerSecond = 1000 * 1000 * 1000; + + LARGE_INTEGER li; + if (!PalQueryPerformanceFrequency(&li) || (ULONGLONG)li.QuadPart < 1000 / MeasureDurationMs) + { + // High precision clock not available or clock resolution is too low, resort to defaults + s_isYieldProcessorNormalizedInitialized = true; + return; + } + ULONGLONG ticksPerSecond = li.QuadPart; + + // Measure the nanosecond delay per yield + ULONGLONG measureDurationTicks = ticksPerSecond / (1000 / MeasureDurationMs); + unsigned int yieldCount = 0; + PalQueryPerformanceCounter(&li); + ULONGLONG startTicks = li.QuadPart; + ULONGLONG elapsedTicks; + do + { + // On some systems, querying the high performance counter has relatively significant overhead. Do enough yields to mask + // the timing overhead. Assuming one yield has a delay of MinNsPerNormalizedYield, 1000 yields would have a delay in the + // low microsecond range. + for (int i = 0; i < 1000; ++i) + { + System_YieldProcessor(); + } + yieldCount += 1000; + + PalQueryPerformanceCounter(&li); + ULONGLONG nowTicks = li.QuadPart; + elapsedTicks = nowTicks - startTicks; + } while (elapsedTicks < measureDurationTicks); + double nsPerYield = (double)elapsedTicks * NsPerSecond / ((double)yieldCount * ticksPerSecond); + if (nsPerYield < 1) + { + nsPerYield = 1; + } + + // Calculate the number of yields required to span the duration of a normalized yield. Since nsPerYield is at least 1, this + // value is naturally limited to MinNsPerNormalizedYield. + int yieldsPerNormalizedYield = (int)(MinNsPerNormalizedYield / nsPerYield + 0.5); + if (yieldsPerNormalizedYield < 1) + { + yieldsPerNormalizedYield = 1; + } + _ASSERTE(yieldsPerNormalizedYield <= (int)MinNsPerNormalizedYield); + + // Calculate the maximum number of yields that would be optimal for a late spin iteration. Typically, we would not want to + // spend excessive amounts of time (thousands of cycles) doing only YieldProcessor, as SwitchToThread/Sleep would do a + // better job of allowing other work to run. + int optimalMaxNormalizedYieldsPerSpinIteration = + (int)(NsPerOptimalMaxSpinIterationDuration / (yieldsPerNormalizedYield * nsPerYield) + 0.5); + if (optimalMaxNormalizedYieldsPerSpinIteration < 1) + { + optimalMaxNormalizedYieldsPerSpinIteration = 1; + } + + g_yieldsPerNormalizedYield = yieldsPerNormalizedYield; + g_optimalMaxNormalizedYieldsPerSpinIteration = optimalMaxNormalizedYieldsPerSpinIteration; + s_isYieldProcessorNormalizedInitialized = true; + + GCHeapUtilities::GetGCHeap()->SetYieldProcessorScalingFactor((float)yieldsPerNormalizedYield); +} + +void EnsureYieldProcessorNormalizedInitialized() +{ + WRAPPER_NO_CONTRACT; + + if (!s_isYieldProcessorNormalizedInitialized) + { + InitializeYieldProcessorNormalized(); + } +} diff --git a/src/coreclr/src/nativeaot/Runtime/yieldprocessornormalized.h b/src/coreclr/src/nativeaot/Runtime/yieldprocessornormalized.h new file mode 100644 index 0000000000000..405e991de07ad --- /dev/null +++ b/src/coreclr/src/nativeaot/Runtime/yieldprocessornormalized.h @@ -0,0 +1,229 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#pragma once + +#include + +// Undefine YieldProcessor to encourage using the normalized versions below instead. System_YieldProcessor() can be used where +// the intention is to use the system-default implementation of YieldProcessor(). +#define HAS_SYSTEM_YIELDPROCESSOR +FORCEINLINE void System_YieldProcessor() { PalYieldProcessor(); } +#ifdef YieldProcessor +#undef YieldProcessor +#endif +#define YieldProcessor Dont_Use_YieldProcessor +#ifdef PalYieldProcessor +#undef PalYieldProcessor +#endif +#define PalYieldProcessor Dont_Use_PalYieldProcessor + +#define SIZE_T UIntNative + +const unsigned int MinNsPerNormalizedYield = 37; // measured typically 37-46 on post-Skylake +const unsigned int NsPerOptimalMaxSpinIterationDuration = 272; // approx. 900 cycles, measured 281 on pre-Skylake, 263 on post-Skylake + +extern unsigned int g_yieldsPerNormalizedYield; +extern unsigned int g_optimalMaxNormalizedYieldsPerSpinIteration; + +void InitializeYieldProcessorNormalizedCrst(); +void EnsureYieldProcessorNormalizedInitialized(); + +class YieldProcessorNormalizationInfo +{ +private: + unsigned int yieldsPerNormalizedYield; + unsigned int optimalMaxNormalizedYieldsPerSpinIteration; + unsigned int optimalMaxYieldsPerSpinIteration; + +public: + YieldProcessorNormalizationInfo() + : yieldsPerNormalizedYield(g_yieldsPerNormalizedYield), + optimalMaxNormalizedYieldsPerSpinIteration(g_optimalMaxNormalizedYieldsPerSpinIteration), + optimalMaxYieldsPerSpinIteration(yieldsPerNormalizedYield * optimalMaxNormalizedYieldsPerSpinIteration) + { + } + + friend void YieldProcessorNormalized(const YieldProcessorNormalizationInfo &); + friend void YieldProcessorNormalized(const YieldProcessorNormalizationInfo &, unsigned int); + friend void YieldProcessorNormalizedForPreSkylakeCount(const YieldProcessorNormalizationInfo &, unsigned int); + friend void YieldProcessorWithBackOffNormalized(const YieldProcessorNormalizationInfo &, unsigned int); +}; + +// See YieldProcessorNormalized() for preliminary info. Typical usage: +// if (!condition) +// { +// YieldProcessorNormalizationInfo normalizationInfo; +// do +// { +// YieldProcessorNormalized(normalizationInfo); +// } while (!condition); +// } +FORCEINLINE void YieldProcessorNormalized(const YieldProcessorNormalizationInfo &normalizationInfo) +{ + unsigned int n = normalizationInfo.yieldsPerNormalizedYield; + _ASSERTE(n != 0); + do + { + System_YieldProcessor(); + } while (--n != 0); +} + +// Delays execution of the current thread for a short duration. Unlike YieldProcessor(), an effort is made to normalize the +// delay across processors. The actual delay may be meaningful in several ways, including but not limited to the following: +// - The delay should be long enough that a tiny spin-wait like the following has a decent likelihood of observing a new value +// for the condition (when changed by a different thread) on each iteration, otherwise it may unnecessary increase CPU usage +// and decrease scalability of the operation. +// while(!condition) +// { +// YieldProcessorNormalized(); +// } +// - The delay should be short enough that a tiny spin-wait like above would not miss multiple cross-thread changes to the +// condition, otherwise it may unnecessarily increase latency of the operation +// - In reasonably short spin-waits, the actual delay may not matter much. In unreasonably long spin-waits that progress in +// yield count per iteration for each failed check of the condition, the progression can significantly magnify the second +// issue above on later iterations. +// - This function and variants are intended to provide a decent balance between the above issues, as ideal solutions to each +// issue have trade-offs between them. If latency of the operation is far more important in the scenario, consider using +// System_YieldProcessor() instead, which would issue a delay that is typically <= the delay issued by this method. +FORCEINLINE void YieldProcessorNormalized() +{ + YieldProcessorNormalized(YieldProcessorNormalizationInfo()); +} + +// See YieldProcessorNormalized(count) for preliminary info. Typical usage: +// if (!moreExpensiveCondition) +// { +// YieldProcessorNormalizationInfo normalizationInfo; +// do +// { +// YieldProcessorNormalized(normalizationInfo, 2); +// } while (!moreExpensiveCondition); +// } +FORCEINLINE void YieldProcessorNormalized(const YieldProcessorNormalizationInfo &normalizationInfo, unsigned int count) +{ + _ASSERTE(count != 0); + + if (sizeof(SIZE_T) <= sizeof(unsigned int)) + { + // On platforms with a small SIZE_T, prevent overflow on the multiply below. normalizationInfo.yieldsPerNormalizedYield + // is limited to MinNsPerNormalizedYield by InitializeYieldProcessorNormalized(). + const unsigned int MaxCount = UINT_MAX / MinNsPerNormalizedYield; + if (count > MaxCount) + { + count = MaxCount; + } + } + + SIZE_T n = (SIZE_T)count * normalizationInfo.yieldsPerNormalizedYield; + _ASSERTE(n != 0); + do + { + System_YieldProcessor(); + } while (--n != 0); +} + +// See YieldProcessorNormalized() for preliminary info. This function repeats the delay 'count' times. This overload is +// preferred over the single-count overload when multiple yields are desired per spin-wait iteration. Typical usage: +// while(!moreExpensiveCondition) +// { +// YieldProcessorNormalized(2); +// } +FORCEINLINE void YieldProcessorNormalized(unsigned int count) +{ + YieldProcessorNormalized(YieldProcessorNormalizationInfo(), count); +} + +// Please DO NOT use this function in new code! See YieldProcessorNormalizedForPreSkylakeCount(preSkylakeCount) for preliminary +// info. Typical usage: +// if (!condition) +// { +// YieldProcessorNormalizationInfo normalizationInfo; +// do +// { +// YieldProcessorNormalizedForPreSkylakeCount(normalizationInfo, 100); +// } while (!condition); +// } +FORCEINLINE void YieldProcessorNormalizedForPreSkylakeCount( + const YieldProcessorNormalizationInfo &normalizationInfo, + unsigned int preSkylakeCount) +{ + _ASSERTE(preSkylakeCount != 0); + + if (sizeof(SIZE_T) <= sizeof(unsigned int)) + { + // On platforms with a small SIZE_T, prevent overflow on the multiply below. normalizationInfo.yieldsPerNormalizedYield + // is limited to MinNsPerNormalizedYield by InitializeYieldProcessorNormalized(). + const unsigned int MaxCount = UINT_MAX / MinNsPerNormalizedYield; + if (preSkylakeCount > MaxCount) + { + preSkylakeCount = MaxCount; + } + } + + const unsigned int PreSkylakeCountToSkylakeCountDivisor = 8; + SIZE_T n = (SIZE_T)preSkylakeCount * normalizationInfo.yieldsPerNormalizedYield / PreSkylakeCountToSkylakeCountDivisor; + if (n == 0) + { + n = 1; + } + do + { + System_YieldProcessor(); + } while (--n != 0); +} + +// Please DO NOT use this function in new code! This function is to be used for old spin-wait loops that have not been retuned +// for recent processors, and especially where the yield count may be unreasonably high. The function scales the yield count in +// an attempt to normalize the total delay across processors, to approximately the total delay that would be issued on a +// pre-Skylake processor. New code should be tuned with YieldProcessorNormalized() or variants instead. Typical usage: +// while(!condition) +// { +// YieldProcessorNormalizedForPreSkylakeCount(100); +// } +FORCEINLINE void YieldProcessorNormalizedForPreSkylakeCount(unsigned int preSkylakeCount) +{ + YieldProcessorNormalizedForPreSkylakeCount(YieldProcessorNormalizationInfo(), preSkylakeCount); +} + +// See YieldProcessorNormalized() for preliminary info. This function is to be used when there is a decent possibility that the +// condition would not be satisfied within a short duration. The current implementation increases the delay per spin-wait +// iteration exponentially up to a limit. Typical usage: +// if (!conditionThatMayNotBeSatisfiedSoon) +// { +// YieldProcessorNormalizationInfo normalizationInfo; +// do +// { +// YieldProcessorWithBackOffNormalized(normalizationInfo); // maybe Sleep(0) occasionally +// } while (!conditionThatMayNotBeSatisfiedSoon); +// } +FORCEINLINE void YieldProcessorWithBackOffNormalized( + const YieldProcessorNormalizationInfo &normalizationInfo, + unsigned int spinIteration) +{ + // normalizationInfo.optimalMaxNormalizedYieldsPerSpinIteration cannot exceed the value below based on calculations done in + // InitializeYieldProcessorNormalized() + const unsigned int MaxOptimalMaxNormalizedYieldsPerSpinIteration = + NsPerOptimalMaxSpinIterationDuration * 3 / (MinNsPerNormalizedYield * 2) + 1; + _ASSERTE(normalizationInfo.optimalMaxNormalizedYieldsPerSpinIteration <= MaxOptimalMaxNormalizedYieldsPerSpinIteration); + + // This shift value should be adjusted based on the asserted condition below + const uint8_t MaxShift = 3; + static_assert(((unsigned int)1 << (MaxShift + 1)) >= MaxOptimalMaxNormalizedYieldsPerSpinIteration, ""); + + unsigned int n; + if (spinIteration <= MaxShift && + ((unsigned int)1 << spinIteration) < normalizationInfo.optimalMaxNormalizedYieldsPerSpinIteration) + { + n = ((unsigned int)1 << spinIteration) * normalizationInfo.yieldsPerNormalizedYield; + } + else + { + n = normalizationInfo.optimalMaxYieldsPerSpinIteration; + } + _ASSERTE(n != 0); + do + { + System_YieldProcessor(); + } while (--n != 0); +} diff --git a/src/coreclr/src/nativeaot/libunwind/.arcconfig b/src/coreclr/src/nativeaot/libunwind/.arcconfig new file mode 100644 index 0000000000000..78ee8d358cded --- /dev/null +++ b/src/coreclr/src/nativeaot/libunwind/.arcconfig @@ -0,0 +1,4 @@ +{ + "repository.callsign" : "UNW", + "conduit_uri" : "https://reviews.llvm.org/" +} diff --git a/src/coreclr/src/nativeaot/libunwind/.clang-format b/src/coreclr/src/nativeaot/libunwind/.clang-format new file mode 100644 index 0000000000000..5bead5f39dd3c --- /dev/null +++ b/src/coreclr/src/nativeaot/libunwind/.clang-format @@ -0,0 +1,2 @@ +BasedOnStyle: LLVM + diff --git a/src/coreclr/src/nativeaot/libunwind/CMakeLists.txt b/src/coreclr/src/nativeaot/libunwind/CMakeLists.txt new file mode 100644 index 0000000000000..b51922a48fe28 --- /dev/null +++ b/src/coreclr/src/nativeaot/libunwind/CMakeLists.txt @@ -0,0 +1,383 @@ +#=============================================================================== +# Setup Project +#=============================================================================== + +cmake_minimum_required(VERSION 3.4.3) + +if (POLICY CMP0042) + cmake_policy(SET CMP0042 NEW) # Set MACOSX_RPATH=YES by default +endif() + +# Add path for custom modules +set(CMAKE_MODULE_PATH + "${CMAKE_CURRENT_SOURCE_DIR}/cmake" + "${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules" + ${CMAKE_MODULE_PATH} + ) + +if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR OR LIBUNWIND_STANDALONE_BUILD) + project(libunwind) + + # Rely on llvm-config. + set(CONFIG_OUTPUT) + if(NOT LLVM_CONFIG_PATH) + find_program(LLVM_CONFIG_PATH "llvm-config") + endif() + if (DEFINED LLVM_PATH) + set(LLVM_INCLUDE_DIR ${LLVM_INCLUDE_DIR} CACHE PATH "Path to llvm/include") + set(LLVM_PATH ${LLVM_PATH} CACHE PATH "Path to LLVM source tree") + set(LLVM_MAIN_SRC_DIR ${LLVM_PATH}) + set(LLVM_CMAKE_PATH "${LLVM_PATH}/cmake/modules") + elseif(LLVM_CONFIG_PATH) + message(STATUS "Found LLVM_CONFIG_PATH as ${LLVM_CONFIG_PATH}") + set(CONFIG_COMMAND ${LLVM_CONFIG_PATH} "--includedir" "--prefix" "--src-root") + execute_process(COMMAND ${CONFIG_COMMAND} + RESULT_VARIABLE HAD_ERROR + OUTPUT_VARIABLE CONFIG_OUTPUT) + if (NOT HAD_ERROR) + string(REGEX REPLACE "[ \t]*[\r\n]+[ \t]*" ";" + CONFIG_OUTPUT ${CONFIG_OUTPUT}) + else() + string(REPLACE ";" " " CONFIG_COMMAND_STR "${CONFIG_COMMAND}") + message(STATUS "${CONFIG_COMMAND_STR}") + message(FATAL_ERROR "llvm-config failed with status ${HAD_ERROR}") + endif() + + list(GET CONFIG_OUTPUT 0 INCLUDE_DIR) + list(GET CONFIG_OUTPUT 1 LLVM_OBJ_ROOT) + list(GET CONFIG_OUTPUT 2 MAIN_SRC_DIR) + + set(LLVM_INCLUDE_DIR ${INCLUDE_DIR} CACHE PATH "Path to llvm/include") + set(LLVM_BINARY_DIR ${LLVM_OBJ_ROOT} CACHE PATH "Path to LLVM build tree") + set(LLVM_MAIN_SRC_DIR ${MAIN_SRC_DIR} CACHE PATH "Path to LLVM source tree") + set(LLVM_LIT_PATH "${LLVM_PATH}/utils/lit/lit.py") + + # --cmakedir is supported since llvm r291218 (4.0 release) + execute_process( + COMMAND ${LLVM_CONFIG_PATH} --cmakedir + RESULT_VARIABLE HAD_ERROR + OUTPUT_VARIABLE CONFIG_OUTPUT + ERROR_QUIET) + if(NOT HAD_ERROR) + string(STRIP "${CONFIG_OUTPUT}" LLVM_CMAKE_PATH_FROM_LLVM_CONFIG) + file(TO_CMAKE_PATH "${LLVM_CMAKE_PATH_FROM_LLVM_CONFIG}" LLVM_CMAKE_PATH) + else() + file(TO_CMAKE_PATH "${LLVM_BINARY_DIR}" LLVM_BINARY_DIR_CMAKE_STYLE) + set(LLVM_CMAKE_PATH "${LLVM_BINARY_DIR_CMAKE_STYLE}/lib${LLVM_LIBDIR_SUFFIX}/cmake/llvm") + endif() + else() + message(WARNING "UNSUPPORTED LIBUNWIND CONFIGURATION DETECTED: " + "llvm-config not found and LLVM_MAIN_SRC_DIR not defined. " + "Reconfigure with -DLLVM_CONFIG=path/to/llvm-config " + "or -DLLVM_PATH=path/to/llvm-source-root.") + endif() + + if (EXISTS ${LLVM_CMAKE_PATH}) + # Enable warnings, otherwise -w gets added to the cflags by HandleLLVMOptions. + set(LLVM_ENABLE_WARNINGS ON) + list(APPEND CMAKE_MODULE_PATH "${LLVM_CMAKE_PATH}") + include("${LLVM_CMAKE_PATH}/AddLLVM.cmake") + include("${LLVM_CMAKE_PATH}/HandleLLVMOptions.cmake") + else() + message(WARNING "Not found: ${LLVM_CMAKE_PATH}") + endif() + + set(PACKAGE_NAME libunwind) + set(PACKAGE_VERSION 9.0.0svn) + set(PACKAGE_STRING "${PACKAGE_NAME} ${PACKAGE_VERSION}") + set(PACKAGE_BUGREPORT "llvm-bugs@lists.llvm.org") + + if (EXISTS ${LLVM_MAIN_SRC_DIR}/utils/lit/lit.py) + set(LLVM_LIT ${LLVM_MAIN_SRC_DIR}/utils/lit/lit.py) + else() + # Seek installed Lit. + find_program(LLVM_LIT "lit.py" ${LLVM_MAIN_SRC_DIR}/utils/lit + DOC "Path to lit.py") + endif() + + if (LLVM_LIT) + # Define the default arguments to use with 'lit', and an option for the user + # to override. + set(LIT_ARGS_DEFAULT "-sv") + if (MSVC OR XCODE) + set(LIT_ARGS_DEFAULT "${LIT_ARGS_DEFAULT} --no-progress-bar") + endif() + set(LLVM_LIT_ARGS "${LIT_ARGS_DEFAULT}" CACHE STRING "Default options for lit") + + # On Win32 hosts, provide an option to specify the path to the GnuWin32 tools. + if (WIN32 AND NOT CYGWIN) + set(LLVM_LIT_TOOLS_DIR "" CACHE PATH "Path to GnuWin32 tools") + endif() + else() + set(LLVM_INCLUDE_TESTS OFF) + endif() + + set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib${LLVM_LIBDIR_SUFFIX}) + set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib${LLVM_LIBDIR_SUFFIX}) +else() + set(LLVM_LIT "${CMAKE_SOURCE_DIR}/utils/lit/lit.py") +endif() + +#=============================================================================== +# Setup CMake Options +#=============================================================================== +include(CMakeDependentOption) +include(HandleCompilerRT) + +# Define options. +option(LIBUNWIND_BUILD_32_BITS "Build 32 bit libunwind" ${LLVM_BUILD_32_BITS}) +option(LIBUNWIND_ENABLE_ASSERTIONS "Enable assertions independent of build mode." ON) +option(LIBUNWIND_ENABLE_PEDANTIC "Compile with pedantic enabled." ON) +option(LIBUNWIND_ENABLE_WERROR "Fail and stop if a warning is triggered." OFF) +option(LIBUNWIND_ENABLE_SHARED "Build libunwind as a shared library." ON) +option(LIBUNWIND_ENABLE_STATIC "Build libunwind as a static library." ON) +option(LIBUNWIND_ENABLE_CROSS_UNWINDING "Enable cross-platform unwinding support." OFF) +option(LIBUNWIND_ENABLE_ARM_WMMX "Enable unwinding support for ARM WMMX registers." OFF) +option(LIBUNWIND_ENABLE_THREADS "Build libunwind with threading support." ON) +option(LIBUNWIND_WEAK_PTHREAD_LIB "Use weak references to refer to pthread functions." OFF) +option(LIBUNWIND_USE_COMPILER_RT "Use compiler-rt instead of libgcc" OFF) +option(LIBUNWIND_INCLUDE_DOCS "Build the libunwind documentation." ${LLVM_INCLUDE_DOCS}) + +set(LIBUNWIND_LIBDIR_SUFFIX "${LLVM_LIBDIR_SUFFIX}" CACHE STRING + "Define suffix of library directory name (32/64)") +option(LIBUNWIND_INSTALL_LIBRARY "Install the libunwind library." ON) +cmake_dependent_option(LIBUNWIND_INSTALL_STATIC_LIBRARY + "Install the static libunwind library." ON + "LIBUNWIND_ENABLE_STATIC;LIBUNWIND_INSTALL_LIBRARY" OFF) +cmake_dependent_option(LIBUNWIND_INSTALL_SHARED_LIBRARY + "Install the shared libunwind library." ON + "LIBUNWIND_ENABLE_SHARED;LIBUNWIND_INSTALL_LIBRARY" OFF) +set(LIBUNWIND_TARGET_TRIPLE "" CACHE STRING "Target triple for cross compiling.") +set(LIBUNWIND_GCC_TOOLCHAIN "" CACHE PATH "GCC toolchain for cross compiling.") +set(LIBUNWIND_SYSROOT "" CACHE PATH "Sysroot for cross compiling.") +set(LIBUNWIND_TEST_LINKER_FLAGS "" CACHE STRING + "Additional linker flags for test programs.") +set(LIBUNWIND_TEST_COMPILER_FLAGS "" CACHE STRING + "Additional compiler flags for test programs.") + +if (NOT LIBUNWIND_ENABLE_SHARED AND NOT LIBUNWIND_ENABLE_STATIC) + message(FATAL_ERROR "libunwind must be built as either a shared or static library.") +endif() + +# Check that we can build with 32 bits if requested. +if (CMAKE_SIZEOF_VOID_P EQUAL 8 AND NOT WIN32) + if (LIBUNWIND_BUILD_32_BITS AND NOT LLVM_BUILD_32_BITS) # Don't duplicate the output from LLVM + message(STATUS "Building 32 bits executables and libraries.") + endif() +elseif(LIBUNWIND_BUILD_32_BITS) + message(FATAL_ERROR "LIBUNWIND_BUILD_32_BITS=ON is not supported on this platform.") +endif() + +option(LIBUNWIND_HERMETIC_STATIC_LIBRARY + "Do not export any symbols from the static library." OFF) + +#=============================================================================== +# Configure System +#=============================================================================== + +# Add path for custom modules +set(CMAKE_MODULE_PATH + "${CMAKE_CURRENT_SOURCE_DIR}/cmake" + ${CMAKE_MODULE_PATH}) + +set(LIBUNWIND_COMPILER ${CMAKE_CXX_COMPILER}) +set(LIBUNWIND_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) +set(LIBUNWIND_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}) + +string(REGEX MATCH "[0-9]+\\.[0-9]+(\\.[0-9]+)?" CLANG_VERSION + ${PACKAGE_VERSION}) + +if(LLVM_ENABLE_PER_TARGET_RUNTIME_DIR AND NOT APPLE) + set(LIBUNWIND_LIBRARY_DIR ${LLVM_LIBRARY_OUTPUT_INTDIR}/${LLVM_DEFAULT_TARGET_TRIPLE}/c++) + set(LIBUNWIND_INSTALL_LIBRARY_DIR lib${LLVM_LIBDIR_SUFFIX}/${LLVM_DEFAULT_TARGET_TRIPLE}/c++) + if(LIBCXX_LIBDIR_SUBDIR) + string(APPEND LIBUNWIND_LIBRARY_DIR /${LIBUNWIND_LIBDIR_SUBDIR}) + string(APPEND LIBUNWIND_INSTALL_LIBRARY_DIR /${LIBUNWIND_LIBDIR_SUBDIR}) + endif() +elseif(LLVM_LIBRARY_OUTPUT_INTDIR) + set(LIBUNWIND_LIBRARY_DIR ${LLVM_LIBRARY_OUTPUT_INTDIR}) + set(LIBUNWIND_INSTALL_LIBRARY_DIR lib${LIBUNWIND_LIBDIR_SUFFIX}) +else() + set(LIBUNWIND_LIBRARY_DIR ${CMAKE_BINARY_DIR}/lib${LIBUNWIND_LIBDIR_SUFFIX}) + set(LIBUNWIND_INSTALL_LIBRARY_DIR lib${LIBUNWIND_LIBDIR_SUFFIX}) +endif() + +set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${LIBUNWIND_LIBRARY_DIR}) +set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${LIBUNWIND_LIBRARY_DIR}) +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${LIBUNWIND_LIBRARY_DIR}) + +set(LIBUNWIND_INSTALL_PREFIX "" CACHE STRING "Define libunwind destination prefix.") + +set(LIBUNWIND_C_FLAGS "") +set(LIBUNWIND_CXX_FLAGS "") +set(LIBUNWIND_COMPILE_FLAGS "") +set(LIBUNWIND_LINK_FLAGS "") + +# Get required flags. +macro(unwind_append_if list condition var) + if (${condition}) + list(APPEND ${list} ${var}) + endif() +endmacro() + +macro(add_target_flags) + foreach(value ${ARGN}) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${value}") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${value}") + list(APPEND LIBUNWIND_COMPILE_FLAGS ${value}) + list(APPEND LIBUNWIND_LINK_FLAGS ${value}) + endforeach() +endmacro() + +macro(add_target_flags_if condition) + if (${condition}) + add_target_flags(${ARGN}) + endif() +endmacro() + +add_target_flags_if(LIBUNWIND_BUILD_32_BITS "-m32") + +if(LIBUNWIND_TARGET_TRIPLE) + add_target_flags("--target=${LIBUNWIND_TARGET_TRIPLE}") +elseif(CMAKE_CXX_COMPILER_TARGET) + set(LIBUNWIND_TARGET_TRIPLE "${CMAKE_CXX_COMPILER_TARGET}") +endif() +if(LIBUNWIND_GCC_TOOLCHAIN) + add_target_flags("--gcc-toolchain=${LIBUNWIND_GCC_TOOLCHAIN}") +elseif(CMAKE_CXX_COMPILER_EXTERNAL_TOOLCHAIN) + set(LIBUNWIND_GCC_TOOLCHAIN "${CMAKE_CXX_COMPILER_EXTERNAL_TOOLCHAIN}") +endif() +if(LIBUNWIND_SYSROOT) + add_target_flags("--sysroot=${LIBUNWIND_SYSROOT}") +elseif(CMAKE_SYSROOT) + set(LIBUNWIND_SYSROOT "${CMAKE_SYSROOT}") +endif() + +if (LIBUNWIND_TARGET_TRIPLE) + set(TARGET_TRIPLE "${LIBUNWIND_TARGET_TRIPLE}") +endif() + +# Configure compiler. +include(config-ix) + +if (LIBUNWIND_USE_COMPILER_RT AND NOT LIBUNWIND_HAS_NODEFAULTLIBS_FLAG) + list(APPEND LIBUNWIND_LINK_FLAGS "-rtlib=compiler-rt") +endif() + +#=============================================================================== +# Setup Compiler Flags +#=============================================================================== + +unwind_append_if(LIBUNWIND_COMPILE_FLAGS LIBUNWIND_HAS_WERROR_FLAG -Werror=return-type) + +# Get warning flags +unwind_append_if(LIBUNWIND_COMPILE_FLAGS LIBUNWIND_HAS_W_FLAG -W) +unwind_append_if(LIBUNWIND_COMPILE_FLAGS LIBUNWIND_HAS_WALL_FLAG -Wall) +unwind_append_if(LIBUNWIND_COMPILE_FLAGS LIBUNWIND_HAS_WCHAR_SUBSCRIPTS_FLAG -Wchar-subscripts) +unwind_append_if(LIBUNWIND_COMPILE_FLAGS LIBUNWIND_HAS_WCONVERSION_FLAG -Wconversion) +unwind_append_if(LIBUNWIND_COMPILE_FLAGS LIBUNWIND_HAS_WMISMATCHED_TAGS_FLAG -Wmismatched-tags) +unwind_append_if(LIBUNWIND_COMPILE_FLAGS LIBUNWIND_HAS_WMISSING_BRACES_FLAG -Wmissing-braces) +unwind_append_if(LIBUNWIND_COMPILE_FLAGS LIBUNWIND_HAS_WNEWLINE_EOF_FLAG -Wnewline-eof) +unwind_append_if(LIBUNWIND_COMPILE_FLAGS LIBUNWIND_HAS_WNO_UNUSED_FUNCTION_FLAG -Wno-unused-function) +unwind_append_if(LIBUNWIND_COMPILE_FLAGS LIBUNWIND_HAS_WSHADOW_FLAG -Wshadow) +unwind_append_if(LIBUNWIND_COMPILE_FLAGS LIBUNWIND_HAS_WSHORTEN_64_TO_32_FLAG -Wshorten-64-to-32) +unwind_append_if(LIBUNWIND_COMPILE_FLAGS LIBUNWIND_HAS_WSIGN_COMPARE_FLAG -Wsign-compare) +unwind_append_if(LIBUNWIND_COMPILE_FLAGS LIBUNWIND_HAS_WSIGN_CONVERSION_FLAG -Wsign-conversion) +unwind_append_if(LIBUNWIND_COMPILE_FLAGS LIBUNWIND_HAS_WSTRICT_ALIASING_FLAG -Wstrict-aliasing=2) +unwind_append_if(LIBUNWIND_COMPILE_FLAGS LIBUNWIND_HAS_WSTRICT_OVERFLOW_FLAG -Wstrict-overflow=4) +unwind_append_if(LIBUNWIND_COMPILE_FLAGS LIBUNWIND_HAS_WUNUSED_PARAMETER_FLAG -Wunused-parameter) +unwind_append_if(LIBUNWIND_COMPILE_FLAGS LIBUNWIND_HAS_WUNUSED_VARIABLE_FLAG -Wunused-variable) +unwind_append_if(LIBUNWIND_COMPILE_FLAGS LIBUNWIND_HAS_WWRITE_STRINGS_FLAG -Wwrite-strings) +unwind_append_if(LIBUNWIND_COMPILE_FLAGS LIBUNWIND_HAS_WUNDEF_FLAG -Wundef) + +if (LIBUNWIND_ENABLE_WERROR) + unwind_append_if(LIBUNWIND_COMPILE_FLAGS LIBUNWIND_HAS_WERROR_FLAG -Werror) + unwind_append_if(LIBUNWIND_COMPILE_FLAGS LIBUNWIND_HAS_WX_FLAG -WX) +else() + unwind_append_if(LIBUNWIND_COMPILE_FLAGS LIBUNWIND_HAS_WNO_ERROR_FLAG -Wno-error) + unwind_append_if(LIBUNWIND_COMPILE_FLAGS LIBUNWIND_HAS_NO_WX_FLAG -WX-) +endif() + +if (LIBUNWIND_ENABLE_PEDANTIC) + unwind_append_if(LIBUNWIND_COMPILE_FLAGS LIBUNWIND_HAS_PEDANTIC_FLAG -pedantic) +endif() + +# Get feature flags. +# Exceptions +# Catches C++ exceptions only and tells the compiler to assume that extern C +# functions never throw a C++ exception. +unwind_append_if(LIBUNWIND_CXX_FLAGS LIBUNWIND_HAS_FSTRICT_ALIASING_FLAG -fstrict-aliasing) +unwind_append_if(LIBUNWIND_CXX_FLAGS LIBUNWIND_HAS_EHSC_FLAG -EHsc) + +unwind_append_if(LIBUNWIND_C_FLAGS LIBUNWIND_HAS_FUNWIND_TABLES -funwind-tables) + +# Ensure that we don't depend on C++ standard library. +unwind_append_if(LIBUNWIND_CXX_FLAGS LIBUNWIND_HAS_NOSTDINCXX_FLAG -nostdinc++) + +# Assert +string(TOUPPER "${CMAKE_BUILD_TYPE}" uppercase_CMAKE_BUILD_TYPE) +if (LIBUNWIND_ENABLE_ASSERTIONS) + # MSVC doesn't like _DEBUG on release builds. See PR 4379. + if (NOT MSVC) + list(APPEND LIBUNWIND_COMPILE_FLAGS -D_DEBUG) + endif() + + # On Release builds cmake automatically defines NDEBUG, so we + # explicitly undefine it: + if (uppercase_CMAKE_BUILD_TYPE STREQUAL "RELEASE") + list(APPEND LIBUNWIND_COMPILE_FLAGS -UNDEBUG) + endif() +else() + if (NOT uppercase_CMAKE_BUILD_TYPE STREQUAL "RELEASE") + list(APPEND LIBUNWIND_COMPILE_FLAGS -DNDEBUG) + endif() +endif() + +# Cross-unwinding +if (NOT LIBUNWIND_ENABLE_CROSS_UNWINDING) + list(APPEND LIBUNWIND_COMPILE_FLAGS -D_LIBUNWIND_IS_NATIVE_ONLY) +endif() + +# Threading-support +if (NOT LIBUNWIND_ENABLE_THREADS) + list(APPEND LIBUNWIND_COMPILE_FLAGS -D_LIBUNWIND_HAS_NO_THREADS) +endif() + +# ARM WMMX register support +if (LIBUNWIND_ENABLE_ARM_WMMX) + # __ARM_WMMX is a compiler pre-define (as per the ACLE 2.0). Clang does not + # define this macro for any supported target at present. Therefore, here we + # provide the option to explicitly enable support for WMMX registers in the + # unwinder. + list(APPEND LIBUNWIND_COMPILE_FLAGS -D__ARM_WMMX) +endif() + +# This is the _ONLY_ place where add_definitions is called. +if (MSVC) + add_definitions(-D_CRT_SECURE_NO_WARNINGS) +endif() + +# Disable DLL annotations on Windows for static builds. +if (WIN32 AND LIBUNWIND_ENABLE_STATIC AND NOT LIBUNWIND_ENABLE_SHARED) + add_definitions(-D_LIBUNWIND_DISABLE_VISIBILITY_ANNOTATIONS) +endif() + +if (LIBUNWIND_HAS_COMMENT_LIB_PRAGMA) + add_definitions(-D_LIBUNWIND_HAS_COMMENT_LIB_PRAGMA) +endif() + +#=============================================================================== +# Setup Source Code +#=============================================================================== + +include_directories(include) + +add_subdirectory(src) + +if (LIBUNWIND_INCLUDE_DOCS) + add_subdirectory(docs) +endif() + +if (EXISTS ${LLVM_CMAKE_PATH}) + add_subdirectory(test) +endif() diff --git a/src/coreclr/src/nativeaot/libunwind/cmake/Modules/HandleCompilerRT.cmake b/src/coreclr/src/nativeaot/libunwind/cmake/Modules/HandleCompilerRT.cmake new file mode 100644 index 0000000000000..77168e599466e --- /dev/null +++ b/src/coreclr/src/nativeaot/libunwind/cmake/Modules/HandleCompilerRT.cmake @@ -0,0 +1,64 @@ +function(find_compiler_rt_library name dest) + if (NOT DEFINED LIBUNWIND_COMPILE_FLAGS) + message(FATAL_ERROR "LIBUNWIND_COMPILE_FLAGS must be defined when using this function") + endif() + set(dest "" PARENT_SCOPE) + set(CLANG_COMMAND ${CMAKE_CXX_COMPILER} ${LIBUNWIND_COMPILE_FLAGS} + "--rtlib=compiler-rt" "--print-libgcc-file-name") + if (CMAKE_CXX_COMPILER_ID MATCHES Clang AND CMAKE_CXX_COMPILER_TARGET) + list(APPEND CLANG_COMMAND "--target=${CMAKE_CXX_COMPILER_TARGET}") + endif() + get_property(LIBUNWIND_CXX_FLAGS CACHE CMAKE_CXX_FLAGS PROPERTY VALUE) + string(REPLACE " " ";" LIBUNWIND_CXX_FLAGS "${LIBUNWIND_CXX_FLAGS}") + list(APPEND CLANG_COMMAND ${LIBUNWIND_CXX_FLAGS}) + execute_process( + COMMAND ${CLANG_COMMAND} + RESULT_VARIABLE HAD_ERROR + OUTPUT_VARIABLE LIBRARY_FILE + ) + string(STRIP "${LIBRARY_FILE}" LIBRARY_FILE) + file(TO_CMAKE_PATH "${LIBRARY_FILE}" LIBRARY_FILE) + string(REPLACE "builtins" "${name}" LIBRARY_FILE "${LIBRARY_FILE}") + if (NOT HAD_ERROR AND EXISTS "${LIBRARY_FILE}") + message(STATUS "Found compiler-rt library: ${LIBRARY_FILE}") + set(${dest} "${LIBRARY_FILE}" PARENT_SCOPE) + else() + message(STATUS "Failed to find compiler-rt library") + endif() +endfunction() + +function(find_compiler_rt_dir dest) + if (NOT DEFINED LIBUNWIND_COMPILE_FLAGS) + message(FATAL_ERROR "LIBUNWIND_COMPILE_FLAGS must be defined when using this function") + endif() + set(dest "" PARENT_SCOPE) + if (APPLE) + set(CLANG_COMMAND ${CMAKE_CXX_COMPILER} ${LIBUNWIND_COMPILE_FLAGS} + "-print-file-name=lib") + execute_process( + COMMAND ${CLANG_COMMAND} + RESULT_VARIABLE HAD_ERROR + OUTPUT_VARIABLE LIBRARY_DIR + ) + string(STRIP "${LIBRARY_DIR}" LIBRARY_DIR) + file(TO_CMAKE_PATH "${LIBRARY_DIR}" LIBRARY_DIR) + set(LIBRARY_DIR "${LIBRARY_DIR}/darwin") + else() + set(CLANG_COMMAND ${CMAKE_CXX_COMPILER} ${LIBUNWIND_COMPILE_FLAGS} + "--rtlib=compiler-rt" "--print-libgcc-file-name") + execute_process( + COMMAND ${CLANG_COMMAND} + RESULT_VARIABLE HAD_ERROR + OUTPUT_VARIABLE LIBRARY_FILE + ) + string(STRIP "${LIBRARY_FILE}" LIBRARY_FILE) + file(TO_CMAKE_PATH "${LIBRARY_FILE}" LIBRARY_FILE) + get_filename_component(LIBRARY_DIR "${LIBRARY_FILE}" DIRECTORY) + endif() + if (NOT HAD_ERROR AND EXISTS "${LIBRARY_DIR}") + message(STATUS "Found compiler-rt directory: ${LIBRARY_DIR}") + set(${dest} "${LIBRARY_DIR}" PARENT_SCOPE) + else() + message(STATUS "Failed to find compiler-rt directory") + endif() +endfunction() diff --git a/src/coreclr/src/nativeaot/libunwind/cmake/config-ix.cmake b/src/coreclr/src/nativeaot/libunwind/cmake/config-ix.cmake new file mode 100644 index 0000000000000..07a95ce1a46a8 --- /dev/null +++ b/src/coreclr/src/nativeaot/libunwind/cmake/config-ix.cmake @@ -0,0 +1,110 @@ +include(CMakePushCheckState) +include(CheckCCompilerFlag) +include(CheckCXXCompilerFlag) +include(CheckLibraryExists) +include(CheckCSourceCompiles) + +check_library_exists(c fopen "" LIBUNWIND_HAS_C_LIB) + +if (NOT LIBUNWIND_USE_COMPILER_RT) + check_library_exists(gcc_s __gcc_personality_v0 "" LIBUNWIND_HAS_GCC_S_LIB) + check_library_exists(gcc __absvdi2 "" LIBUNWIND_HAS_GCC_LIB) +endif() + +# libunwind is built with -nodefaultlibs, so we want all our checks to also +# use this option, otherwise we may end up with an inconsistency between +# the flags we think we require during configuration (if the checks are +# performed without -nodefaultlibs) and the flags that are actually +# required during compilation (which has the -nodefaultlibs). libc is +# required for the link to go through. We remove sanitizers from the +# configuration checks to avoid spurious link errors. +check_c_compiler_flag(-nodefaultlibs LIBUNWIND_HAS_NODEFAULTLIBS_FLAG) +if (LIBUNWIND_HAS_NODEFAULTLIBS_FLAG) + set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -nodefaultlibs") + if (LIBUNWIND_HAS_C_LIB) + list(APPEND CMAKE_REQUIRED_LIBRARIES c) + endif () + if (LIBUNWIND_USE_COMPILER_RT) + find_compiler_rt_library(builtins LIBUNWIND_BUILTINS_LIBRARY) + list(APPEND CMAKE_REQUIRED_LIBRARIES "${LIBUNWIND_BUILTINS_LIBRARY}") + else () + if (LIBUNWIND_HAS_GCC_S_LIB) + list(APPEND CMAKE_REQUIRED_LIBRARIES gcc_s) + endif () + if (LIBUNWIND_HAS_GCC_LIB) + list(APPEND CMAKE_REQUIRED_LIBRARIES gcc) + endif () + endif () + if (MINGW) + # Mingw64 requires quite a few "C" runtime libraries in order for basic + # programs to link successfully with -nodefaultlibs. + if (LIBUNWIND_USE_COMPILER_RT) + set(MINGW_RUNTIME ${LIBUNWIND_BUILTINS_LIBRARY}) + else () + set(MINGW_RUNTIME gcc_s gcc) + endif() + set(MINGW_LIBRARIES mingw32 ${MINGW_RUNTIME} moldname mingwex msvcrt advapi32 + shell32 user32 kernel32 mingw32 ${MINGW_RUNTIME} + moldname mingwex msvcrt) + list(APPEND CMAKE_REQUIRED_LIBRARIES ${MINGW_LIBRARIES}) + endif() + if (CMAKE_C_FLAGS MATCHES -fsanitize OR CMAKE_CXX_FLAGS MATCHES -fsanitize) + set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -fno-sanitize=all") + endif () + if (CMAKE_C_FLAGS MATCHES -fsanitize-coverage OR CMAKE_CXX_FLAGS MATCHES -fsanitize-coverage) + set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -fno-sanitize-coverage=edge,trace-cmp,indirect-calls,8bit-counters") + endif () +endif () + +# Check compiler pragmas +if(CMAKE_CXX_COMPILER_ID MATCHES "Clang") + cmake_push_check_state() + set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -Werror=unknown-pragmas") + check_c_source_compiles(" +#pragma comment(lib, \"c\") +int main() { return 0; } +" LIBUNWIND_HAS_COMMENT_LIB_PRAGMA) + cmake_pop_check_state() +endif() + +# Check compiler flags +check_c_compiler_flag(-funwind-tables LIBUNWIND_HAS_FUNWIND_TABLES) +check_cxx_compiler_flag(-fno-exceptions LIBUNWIND_HAS_NO_EXCEPTIONS_FLAG) +check_cxx_compiler_flag(-fno-rtti LIBUNWIND_HAS_NO_RTTI_FLAG) +check_cxx_compiler_flag(-fstrict-aliasing LIBUNWIND_HAS_FSTRICT_ALIASING_FLAG) +check_cxx_compiler_flag(-nostdinc++ LIBUNWIND_HAS_NOSTDINCXX_FLAG) +check_cxx_compiler_flag(-Wall LIBUNWIND_HAS_WALL_FLAG) +check_cxx_compiler_flag(-W LIBUNWIND_HAS_W_FLAG) +check_cxx_compiler_flag(-Wno-unused-function LIBUNWIND_HAS_WNO_UNUSED_FUNCTION_FLAG) +check_cxx_compiler_flag(-Wunused-variable LIBUNWIND_HAS_WUNUSED_VARIABLE_FLAG) +check_cxx_compiler_flag(-Wunused-parameter LIBUNWIND_HAS_WUNUSED_PARAMETER_FLAG) +check_cxx_compiler_flag(-Wstrict-aliasing LIBUNWIND_HAS_WSTRICT_ALIASING_FLAG) +check_cxx_compiler_flag(-Wstrict-overflow LIBUNWIND_HAS_WSTRICT_OVERFLOW_FLAG) +check_cxx_compiler_flag(-Wwrite-strings LIBUNWIND_HAS_WWRITE_STRINGS_FLAG) +check_cxx_compiler_flag(-Wchar-subscripts LIBUNWIND_HAS_WCHAR_SUBSCRIPTS_FLAG) +check_cxx_compiler_flag(-Wmismatched-tags LIBUNWIND_HAS_WMISMATCHED_TAGS_FLAG) +check_cxx_compiler_flag(-Wmissing-braces LIBUNWIND_HAS_WMISSING_BRACES_FLAG) +check_cxx_compiler_flag(-Wshorten-64-to-32 LIBUNWIND_HAS_WSHORTEN_64_TO_32_FLAG) +check_cxx_compiler_flag(-Wsign-conversion LIBUNWIND_HAS_WSIGN_CONVERSION_FLAG) +check_cxx_compiler_flag(-Wsign-compare LIBUNWIND_HAS_WSIGN_COMPARE_FLAG) +check_cxx_compiler_flag(-Wshadow LIBUNWIND_HAS_WSHADOW_FLAG) +check_cxx_compiler_flag(-Wconversion LIBUNWIND_HAS_WCONVERSION_FLAG) +check_cxx_compiler_flag(-Wnewline-eof LIBUNWIND_HAS_WNEWLINE_EOF_FLAG) +check_cxx_compiler_flag(-Wundef LIBUNWIND_HAS_WUNDEF_FLAG) +check_cxx_compiler_flag(-pedantic LIBUNWIND_HAS_PEDANTIC_FLAG) +check_cxx_compiler_flag(-Werror LIBUNWIND_HAS_WERROR_FLAG) +check_cxx_compiler_flag(-Wno-error LIBUNWIND_HAS_WNO_ERROR_FLAG) +check_cxx_compiler_flag(/WX LIBUNWIND_HAS_WX_FLAG) +check_cxx_compiler_flag(/WX- LIBUNWIND_HAS_NO_WX_FLAG) +check_cxx_compiler_flag(/EHsc LIBUNWIND_HAS_EHSC_FLAG) +check_cxx_compiler_flag(/EHs- LIBUNWIND_HAS_NO_EHS_FLAG) +check_cxx_compiler_flag(/EHa- LIBUNWIND_HAS_NO_EHA_FLAG) +check_cxx_compiler_flag(/GR- LIBUNWIND_HAS_NO_GR_FLAG) +check_cxx_compiler_flag(-std=c++11 LIBUNWIND_HAS_STD_CXX11) + +if(LIBUNWIND_HAS_STD_CXX11) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") +endif() + +check_library_exists(dl dladdr "" LIBUNWIND_HAS_DL_LIB) +check_library_exists(pthread pthread_once "" LIBUNWIND_HAS_PTHREAD_LIB) diff --git a/src/coreclr/src/nativeaot/libunwind/docs/BuildingLibunwind.rst b/src/coreclr/src/nativeaot/libunwind/docs/BuildingLibunwind.rst new file mode 100644 index 0000000000000..7f42133a8a50e --- /dev/null +++ b/src/coreclr/src/nativeaot/libunwind/docs/BuildingLibunwind.rst @@ -0,0 +1,161 @@ +.. _BuildingLibunwind: + +================== +Building libunwind +================== + +.. contents:: + :local: + +.. _build instructions: + +Getting Started +=============== + +On Mac OS, the easiest way to get this library is to link with -lSystem. +However if you want to build tip-of-trunk from here (getting the bleeding +edge), read on. + +The basic steps needed to build libc++ are: + +#. Checkout LLVM, libunwind, and related projects: + + * ``cd where-you-want-llvm-to-live`` + * ``git clone https://github.com/llvm/llvm-project.git`` + +#. Configure and build libunwind: + + CMake is the only supported configuration system. + + Clang is the preferred compiler when building and using libunwind. + + * ``cd where you want to build llvm`` + * ``mkdir build`` + * ``cd build`` + * ``cmake -G -DLLVM_ENABLE_PROJECTS=libunwind [options] `` + + For more information about configuring libunwind see :ref:`CMake Options`. + + * ``make unwind`` --- will build libunwind. + * ``make check-unwind`` --- will run the test suite. + + Shared and static libraries for libunwind should now be present in llvm/build/lib. + +#. **Optional**: Install libunwind + + If your system already provides an unwinder, it is important to be careful + not to replace it. Remember Use the CMake option ``CMAKE_INSTALL_PREFIX`` to + select a safe place to install libunwind. + + * ``make install-unwind`` --- Will install the libraries and the headers + + +It is sometimes beneficial to build outside of the LLVM tree. An out-of-tree +build would look like this: + +.. code-block:: bash + + $ cd where-you-want-libunwind-to-live + $ # Check out llvm, and libunwind + $ ``svn co http://llvm.org/svn/llvm-project/llvm/trunk llvm`` + $ ``svn co http://llvm.org/svn/llvm-project/libunwind/trunk libunwind`` + $ cd where-you-want-to-build + $ mkdir build && cd build + $ export CC=clang CXX=clang++ + $ cmake -DLLVM_PATH=path/to/llvm \ + path/to/libunwind + $ make + + +.. _CMake Options: + +CMake Options +============= + +Here are some of the CMake variables that are used often, along with a +brief explanation and LLVM-specific notes. For full documentation, check the +CMake docs or execute ``cmake --help-variable VARIABLE_NAME``. + +**CMAKE_BUILD_TYPE**:STRING + Sets the build type for ``make`` based generators. Possible values are + Release, Debug, RelWithDebInfo and MinSizeRel. On systems like Visual Studio + the user sets the build type with the IDE settings. + +**CMAKE_INSTALL_PREFIX**:PATH + Path where LLVM will be installed if "make install" is invoked or the + "INSTALL" target is built. + +**CMAKE_CXX_COMPILER**:STRING + The C++ compiler to use when building and testing libunwind. + + +.. _libunwind-specific options: + +libunwind specific options +-------------------------- + +.. option:: LIBUNWIND_BUILD_32_BITS:BOOL + + **Default**: Same as LLVM_BUILD_32_BITS + + Toggle whether libunwind should be built with -m32. + +.. option:: LIBUNWIND_ENABLE_ASSERTIONS:BOOL + + **Default**: ``ON`` + + Toggle assertions independent of the build mode. + +.. option:: LIBUNWIND_ENABLE_PEDANTIC:BOOL + + **Default**: ``ON`` + + Compile with -Wpedantic. + +.. option:: LIBUNWIND_ENABLE_WERROR:BOOL + + **Default**: ``ON`` + + Compile with -Werror + +.. option:: LIBUNWIND_ENABLE_SHARED:BOOL + + **Default**: ``ON`` + + Build libunwind as a shared library. + +.. option:: LIBUNWIND_ENABLE_STATIC:BOOL + + **Default**: ``ON`` + + Build libunwind as a static archive. + +.. option:: LIBUNWIND_ENABLE_CROSS_UNWINDING:BOOL + + **Default**: ``OFF`` + + Enable cross-platform unwinding support. + +.. option:: LIBUNWIND_ENABLE_ARM_WMMX:BOOL + + **Default**: ``OFF`` + + Enable unwinding support for ARM WMMX registers. + +.. option:: LIBUNWIND_ENABLE_THREADS:BOOL + + **Default**: ``ON`` + + Build libunwind with threading support. + +.. option:: LIBUNWIND_TARGET_TRIPLE:STRING + + Target triple for cross compiling + +.. option:: LIBUNWIND_GCC_TOOLCHAIN:PATH + + GCC toolchain for cross compiling + +.. option:: LIBUNWIND_SYSROOT + + Sysroot for cross compiling diff --git a/src/coreclr/src/nativeaot/libunwind/docs/CMakeLists.txt b/src/coreclr/src/nativeaot/libunwind/docs/CMakeLists.txt new file mode 100644 index 0000000000000..c226f2f5b8e8d --- /dev/null +++ b/src/coreclr/src/nativeaot/libunwind/docs/CMakeLists.txt @@ -0,0 +1,7 @@ +include(FindSphinx) +if (SPHINX_FOUND) + include(AddSphinxTarget) + if (${SPHINX_OUTPUT_HTML}) + add_sphinx_target(html libunwind) + endif() +endif() diff --git a/src/coreclr/src/nativeaot/libunwind/docs/README.txt b/src/coreclr/src/nativeaot/libunwind/docs/README.txt new file mode 100644 index 0000000000000..968982fce5e07 --- /dev/null +++ b/src/coreclr/src/nativeaot/libunwind/docs/README.txt @@ -0,0 +1,13 @@ +libunwind Documentation +==================== + +The libunwind documentation is written using the Sphinx documentation generator. It is +currently tested with Sphinx 1.1.3. + +To build the documents into html configure libunwind with the following cmake options: + + * -DLLVM_ENABLE_SPHINX=ON + * -DLIBUNWIND_INCLUDE_DOCS=ON + +After configuring libunwind with these options the make rule `docs-libunwind-html` +should be available. diff --git a/src/coreclr/src/nativeaot/libunwind/docs/conf.py b/src/coreclr/src/nativeaot/libunwind/docs/conf.py new file mode 100644 index 0000000000000..704a1d0a12da4 --- /dev/null +++ b/src/coreclr/src/nativeaot/libunwind/docs/conf.py @@ -0,0 +1,252 @@ +# -*- coding: utf-8 -*- +# +# libunwind documentation build configuration file. +# +# This file is execfile()d with the current directory set to its containing dir. +# +# Note that not all possible configuration values are present in this +# autogenerated file. +# +# All configuration values have a default; values that are commented out +# serve to show the default. + +import sys, os +from datetime import date + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +#sys.path.insert(0, os.path.abspath('.')) + +# -- General configuration ----------------------------------------------------- + +# If your documentation needs a minimal Sphinx version, state it here. +#needs_sphinx = '1.0' + +# Add any Sphinx extension module names here, as strings. They can be extensions +# coming with Sphinx (named 'sphinx.ext.*') or your custom ones. +extensions = ['sphinx.ext.intersphinx', 'sphinx.ext.todo'] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# The suffix of source filenames. +source_suffix = '.rst' + +# The encoding of source files. +#source_encoding = 'utf-8-sig' + +# The master toctree document. +master_doc = 'index' + +# General information about the project. +project = u'libunwind' +copyright = u'2011-%d, LLVM Project' % date.today().year + +# The version info for the project you're documenting, acts as replacement for +# |version| and |release|, also used in various other places throughout the +# built documents. +# +# The short X.Y version. +version = '9.0' +# The full version, including alpha/beta/rc tags. +release = '9.0' + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +#language = None + +# There are two options for replacing |today|: either, you set today to some +# non-false value, then it is used: +#today = '' +# Else, today_fmt is used as the format for a strftime call. +today_fmt = '%Y-%m-%d' + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +exclude_patterns = ['_build'] + +# The reST default role (used for this markup: `text`) to use for all documents. +#default_role = None + +# If true, '()' will be appended to :func: etc. cross-reference text. +#add_function_parentheses = True + +# If true, the current module name will be prepended to all description +# unit titles (such as .. function::). +#add_module_names = True + +# If true, sectionauthor and moduleauthor directives will be shown in the +# output. They are ignored by default. +show_authors = True + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = 'friendly' + +# A list of ignored prefixes for module index sorting. +#modindex_common_prefix = [] + + +# -- Options for HTML output --------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +html_theme = 'haiku' + +# Theme options are theme-specific and customize the look and feel of a theme +# further. For a list of options available for each theme, see the +# documentation. +#html_theme_options = {} + +# Add any paths that contain custom themes here, relative to this directory. +#html_theme_path = [] + +# The name for this set of Sphinx documents. If None, it defaults to +# " v documentation". +#html_title = None + +# A shorter title for the navigation bar. Default is the same as html_title. +#html_short_title = None + +# The name of an image file (relative to this directory) to place at the top +# of the sidebar. +#html_logo = None + +# The name of an image file (within the static path) to use as favicon of the +# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 +# pixels large. +#html_favicon = None + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = [] + +# If not '', a 'Last updated on:' timestamp is inserted at every page bottom, +# using the given strftime format. +#html_last_updated_fmt = '%b %d, %Y' + +# If true, SmartyPants will be used to convert quotes and dashes to +# typographically correct entities. +#html_use_smartypants = True + +# Custom sidebar templates, maps document names to template names. +#html_sidebars = {} + +# Additional templates that should be rendered to pages, maps page names to +# template names. +#html_additional_pages = {} + +# If false, no module index is generated. +#html_domain_indices = True + +# If false, no index is generated. +#html_use_index = True + +# If true, the index is split into individual pages for each letter. +#html_split_index = False + +# If true, links to the reST sources are added to the pages. +#html_show_sourcelink = True + +# If true, "Created using Sphinx" is shown in the HTML footer. Default is True. +#html_show_sphinx = True + +# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. +#html_show_copyright = True + +# If true, an OpenSearch description file will be output, and all pages will +# contain a tag referring to it. The value of this option must be the +# base URL from which the finished HTML is served. +#html_use_opensearch = '' + +# This is the file name suffix for HTML files (e.g. ".xhtml"). +#html_file_suffix = None + +# Output file base name for HTML help builder. +htmlhelp_basename = 'libunwinddoc' + + +# -- Options for LaTeX output -------------------------------------------------- + +latex_elements = { +# The paper size ('letterpaper' or 'a4paper'). +#'papersize': 'letterpaper', + +# The font size ('10pt', '11pt' or '12pt'). +#'pointsize': '10pt', + +# Additional stuff for the LaTeX preamble. +#'preamble': '', +} + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, author, documentclass [howto/manual]). +latex_documents = [ + ('contents', 'libunwind.tex', u'libunwind Documentation', + u'LLVM project', 'manual'), +] + +# The name of an image file (relative to this directory) to place at the top of +# the title page. +#latex_logo = None + +# For "manual" documents, if this is true, then toplevel headings are parts, +# not chapters. +#latex_use_parts = False + +# If true, show page references after internal links. +#latex_show_pagerefs = False + +# If true, show URL addresses after external links. +#latex_show_urls = False + +# Documents to append as an appendix to all manuals. +#latex_appendices = [] + +# If false, no module index is generated. +#latex_domain_indices = True + + +# -- Options for manual page output -------------------------------------------- + +# One entry per manual page. List of tuples +# (source start file, name, description, authors, manual section). +man_pages = [ + ('contents', 'libunwind', u'libunwind Documentation', + [u'LLVM project'], 1) +] + +# If true, show URL addresses after external links. +#man_show_urls = False + + +# -- Options for Texinfo output ------------------------------------------------ + +# Grouping the document tree into Texinfo files. List of tuples +# (source start file, target name, title, author, +# dir menu entry, description, category) +texinfo_documents = [ + ('contents', 'libunwind', u'libunwind Documentation', + u'LLVM project', 'libunwind', 'LLVM Unwinder', + 'Miscellaneous'), +] + +# Documents to append as an appendix to all manuals. +#texinfo_appendices = [] + +# If false, no module index is generated. +#texinfo_domain_indices = True + +# How to display URL addresses: 'footnote', 'no', or 'inline'. +#texinfo_show_urls = 'footnote' + + +# FIXME: Define intersphinx configration. +intersphinx_mapping = {} + + +# -- Options for extensions ---------------------------------------------------- + +# Enable this if you want TODOs to show up in the generated documentation. +todo_include_todos = True diff --git a/src/coreclr/src/nativeaot/libunwind/docs/index.rst b/src/coreclr/src/nativeaot/libunwind/docs/index.rst new file mode 100644 index 0000000000000..a4e21bb3c336c --- /dev/null +++ b/src/coreclr/src/nativeaot/libunwind/docs/index.rst @@ -0,0 +1,104 @@ +.. _index: + +======================= +libunwind LLVM Unwinder +======================= + +Overview +======== + +libunwind is an implementation of the interface defined by the HP libunwind +project. It was contributed by Apple as a way to enable clang++ to port to +platforms that do not have a system unwinder. It is intended to be a small and +fast implementation of the ABI, leaving off some features of HP's libunwind +that never materialized (e.g. remote unwinding). + +The unwinder has two levels of API. The high level APIs are the `_Unwind_*` +functions which implement functionality required by `__cxa_*` exception +functions. The low level APIs are the `unw_*` functions which are an interface +defined by the old HP libunwind project. + +Getting Started with libunwind +------------------------------ + +.. toctree:: + :maxdepth: 2 + + BuildingLibunwind + +Current Status +-------------- + +libunwind is a production-quality unwinder, with platform support for DWARF +unwind info, SjLj, and ARM EHABI. + +The low level libunwind API was designed to work either in-process (aka local) +or to operate on another process (aka remote), but only the local path has been +implemented. Remote unwinding remains as future work. + +Platform and Compiler Support +----------------------------- + +libunwind is known to work on the following platforms: + +============ ======================== ============ ======================== +OS Arch Compilers Unwind Info +============ ======================== ============ ======================== +Any i386, x86_64, ARM Clang SjLj +Bare Metal ARM Clang, GCC EHABI +FreeBSD i386, x86_64, ARM64 Clang DWARF CFI +iOS ARM Clang SjLj +Linux ARM Clang, GCC EHABI +Linux i386, x86_64, ARM64 Clang, GCC DWARF CFI +macOS i386, x86_64 Clang, GCC DWARF CFI +NetBSD x86_64 Clang, GCC DWARF CFI +Windows i386, x86_64, ARM, ARM64 Clang DWARF CFI +============ ======================== ============ ======================== + +The following minimum compiler versions are strongly recommended. + +* Clang 3.5 and above +* GCC 4.7 and above. + +Anything older *may* work. + +Notes and Known Issues +---------------------- + +* TODO + + +Getting Involved +================ + +First please review our `Developer's Policy `__ +and `Getting started with LLVM `__. + +**Bug Reports** + +If you think you've found a bug in libunwind, please report it using +the `LLVM Bugzilla`_. If you're not sure, you +can post a message to the `cfe-dev mailing list`_ or on IRC. +Please include "libunwind" in your subject. + +**Patches** + +If you want to contribute a patch to libunwind, the best place for that is +`Phabricator `_. Please include [libunwind] in the subject and +add `cfe-commits` as a subscriber. Also make sure you are subscribed to the +`cfe-commits mailing list `_. + +**Discussion and Questions** + +Send discussions and questions to the +`cfe-dev mailing list `_. +Please include [libunwind] in the subject. + + +Quick Links +=========== +* `LLVM Homepage `_ +* `LLVM Bugzilla `_ +* `cfe-commits Mailing List`_ +* `cfe-dev Mailing List`_ +* `Browse libunwind Sources `_ diff --git a/src/coreclr/src/nativeaot/libunwind/include/__libunwind_config.h b/src/coreclr/src/nativeaot/libunwind/include/__libunwind_config.h new file mode 100644 index 0000000000000..753085c7fe0c0 --- /dev/null +++ b/src/coreclr/src/nativeaot/libunwind/include/__libunwind_config.h @@ -0,0 +1,145 @@ +//===------------------------- __libunwind_config.h -----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef ____LIBUNWIND_CONFIG_H__ +#define ____LIBUNWIND_CONFIG_H__ + +#if defined(__arm__) && !defined(__USING_SJLJ_EXCEPTIONS__) && \ + !defined(__ARM_DWARF_EH__) +#define _LIBUNWIND_ARM_EHABI +#endif + +#define _LIBUNWIND_HIGHEST_DWARF_REGISTER_X86 8 +#define _LIBUNWIND_HIGHEST_DWARF_REGISTER_X86_64 32 +#define _LIBUNWIND_HIGHEST_DWARF_REGISTER_PPC 112 +#define _LIBUNWIND_HIGHEST_DWARF_REGISTER_PPC64 116 +#define _LIBUNWIND_HIGHEST_DWARF_REGISTER_ARM64 95 +#define _LIBUNWIND_HIGHEST_DWARF_REGISTER_ARM 287 +#define _LIBUNWIND_HIGHEST_DWARF_REGISTER_OR1K 32 +#define _LIBUNWIND_HIGHEST_DWARF_REGISTER_MIPS 65 +#define _LIBUNWIND_HIGHEST_DWARF_REGISTER_SPARC 31 + +#if defined(_LIBUNWIND_IS_NATIVE_ONLY) +# if defined(__i386__) +# define _LIBUNWIND_TARGET_I386 +# define _LIBUNWIND_CONTEXT_SIZE 13 +# define _LIBUNWIND_CURSOR_SIZE 19 +# define _LIBUNWIND_HIGHEST_DWARF_REGISTER _LIBUNWIND_HIGHEST_DWARF_REGISTER_X86 +# elif defined(__x86_64__) +# define _LIBUNWIND_TARGET_X86_64 1 +# if defined(_WIN64) +# define _LIBUNWIND_CONTEXT_SIZE 54 +# ifdef __SEH__ +# define _LIBUNWIND_CURSOR_SIZE 204 +# else +# define _LIBUNWIND_CURSOR_SIZE 66 +# endif +# else +# define _LIBUNWIND_CONTEXT_SIZE 38 +# define _LIBUNWIND_CURSOR_SIZE 50 +# endif +# define _LIBUNWIND_HIGHEST_DWARF_REGISTER _LIBUNWIND_HIGHEST_DWARF_REGISTER_X86_64 +# elif defined(__powerpc64__) +# define _LIBUNWIND_TARGET_PPC64 1 +# define _LIBUNWIND_CONTEXT_SIZE 167 +# define _LIBUNWIND_CURSOR_SIZE 179 +# define _LIBUNWIND_HIGHEST_DWARF_REGISTER _LIBUNWIND_HIGHEST_DWARF_REGISTER_PPC64 +# elif defined(__ppc__) +# define _LIBUNWIND_TARGET_PPC 1 +# define _LIBUNWIND_CONTEXT_SIZE 117 +# define _LIBUNWIND_CURSOR_SIZE 124 +# define _LIBUNWIND_HIGHEST_DWARF_REGISTER _LIBUNWIND_HIGHEST_DWARF_REGISTER_PPC +# elif defined(__aarch64__) +# define _LIBUNWIND_TARGET_AARCH64 1 +# define _LIBUNWIND_CONTEXT_SIZE 100 +# if defined(__SEH__) +# define _LIBUNWIND_CURSOR_SIZE 198 +# else +# define _LIBUNWIND_CURSOR_SIZE 112 +# endif +# define _LIBUNWIND_HIGHEST_DWARF_REGISTER _LIBUNWIND_HIGHEST_DWARF_REGISTER_ARM64 +# elif defined(__arm__) +# define _LIBUNWIND_TARGET_ARM 1 +# if defined(__SEH__) +# define _LIBUNWIND_CONTEXT_SIZE 42 +# define _LIBUNWIND_CURSOR_SIZE 80 +# elif defined(__ARM_WMMX) +# define _LIBUNWIND_CONTEXT_SIZE 61 +# define _LIBUNWIND_CURSOR_SIZE 68 +# else +# define _LIBUNWIND_CONTEXT_SIZE 50 +# define _LIBUNWIND_CURSOR_SIZE 57 +# endif +# define _LIBUNWIND_HIGHEST_DWARF_REGISTER _LIBUNWIND_HIGHEST_DWARF_REGISTER_ARM +# elif defined(__or1k__) +# define _LIBUNWIND_TARGET_OR1K 1 +# define _LIBUNWIND_CONTEXT_SIZE 16 +# define _LIBUNWIND_CURSOR_SIZE 24 +# define _LIBUNWIND_HIGHEST_DWARF_REGISTER _LIBUNWIND_HIGHEST_DWARF_REGISTER_OR1K +# elif defined(__mips__) +# if defined(_ABIO32) && _MIPS_SIM == _ABIO32 +# define _LIBUNWIND_TARGET_MIPS_O32 1 +# if defined(__mips_hard_float) +# define _LIBUNWIND_CONTEXT_SIZE 50 +# define _LIBUNWIND_CURSOR_SIZE 57 +# else +# define _LIBUNWIND_CONTEXT_SIZE 18 +# define _LIBUNWIND_CURSOR_SIZE 24 +# endif +# elif defined(_ABIN32) && _MIPS_SIM == _ABIN32 +# define _LIBUNWIND_TARGET_MIPS_NEWABI 1 +# if defined(__mips_hard_float) +# define _LIBUNWIND_CONTEXT_SIZE 67 +# define _LIBUNWIND_CURSOR_SIZE 74 +# else +# define _LIBUNWIND_CONTEXT_SIZE 35 +# define _LIBUNWIND_CURSOR_SIZE 42 +# endif +# elif defined(_ABI64) && _MIPS_SIM == _ABI64 +# define _LIBUNWIND_TARGET_MIPS_NEWABI 1 +# if defined(__mips_hard_float) +# define _LIBUNWIND_CONTEXT_SIZE 67 +# define _LIBUNWIND_CURSOR_SIZE 79 +# else +# define _LIBUNWIND_CONTEXT_SIZE 35 +# define _LIBUNWIND_CURSOR_SIZE 47 +# endif +# else +# error "Unsupported MIPS ABI and/or environment" +# endif +# define _LIBUNWIND_HIGHEST_DWARF_REGISTER _LIBUNWIND_HIGHEST_DWARF_REGISTER_MIPS +# elif defined(__sparc__) + #define _LIBUNWIND_TARGET_SPARC 1 + #define _LIBUNWIND_HIGHEST_DWARF_REGISTER _LIBUNWIND_HIGHEST_DWARF_REGISTER_SPARC + #define _LIBUNWIND_CONTEXT_SIZE 16 + #define _LIBUNWIND_CURSOR_SIZE 23 +#elif defined(HOST_WASM) +#define _LIBUNWIND_TARGET_WASM 1 +// TODO: Determine the right values +#define _LIBUNWIND_CONTEXT_SIZE 0xbadf00d +#define _LIBUNWIND_CURSOR_SIZE 0xbadf00d +#else +# error "Unsupported architecture." +# endif +#else // !_LIBUNWIND_IS_NATIVE_ONLY +# define _LIBUNWIND_TARGET_I386 +# define _LIBUNWIND_TARGET_X86_64 1 +# define _LIBUNWIND_TARGET_PPC 1 +# define _LIBUNWIND_TARGET_PPC64 1 +# define _LIBUNWIND_TARGET_AARCH64 1 +# define _LIBUNWIND_TARGET_ARM 1 +# define _LIBUNWIND_TARGET_OR1K 1 +# define _LIBUNWIND_TARGET_MIPS_O32 1 +# define _LIBUNWIND_TARGET_MIPS_NEWABI 1 +# define _LIBUNWIND_TARGET_SPARC 1 +# define _LIBUNWIND_CONTEXT_SIZE 167 +# define _LIBUNWIND_CURSOR_SIZE 179 +# define _LIBUNWIND_HIGHEST_DWARF_REGISTER 287 +#endif // _LIBUNWIND_IS_NATIVE_ONLY + +#endif // ____LIBUNWIND_CONFIG_H__ diff --git a/src/coreclr/src/nativeaot/libunwind/include/libunwind.h b/src/coreclr/src/nativeaot/libunwind/include/libunwind.h new file mode 100644 index 0000000000000..6e70f264f9f36 --- /dev/null +++ b/src/coreclr/src/nativeaot/libunwind/include/libunwind.h @@ -0,0 +1,855 @@ +//===---------------------------- libunwind.h -----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// +// Compatible with libunwind API documented at: +// http://www.nongnu.org/libunwind/man/libunwind(3).html +// +//===----------------------------------------------------------------------===// + +#ifndef __LIBUNWIND__ +#define __LIBUNWIND__ + +#include <__libunwind_config.h> + +#include +#include + +#ifdef __APPLE__ + #if __clang__ + #if __has_include() + #include + #endif + #elif __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ >= 1050 + #include + #endif + + #ifdef __arm__ + #define LIBUNWIND_AVAIL __attribute__((unavailable)) + #elif defined(__OSX_AVAILABLE_STARTING) + #define LIBUNWIND_AVAIL __OSX_AVAILABLE_STARTING(__MAC_10_6, __IPHONE_5_0) + #else + #include + #ifdef AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER + #define LIBUNWIND_AVAIL AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER + #else + #define LIBUNWIND_AVAIL __attribute__((unavailable)) + #endif + #endif +#else + #define LIBUNWIND_AVAIL +#endif + +/* error codes */ +enum { + UNW_ESUCCESS = 0, /* no error */ + UNW_EUNSPEC = -6540, /* unspecified (general) error */ + UNW_ENOMEM = -6541, /* out of memory */ + UNW_EBADREG = -6542, /* bad register number */ + UNW_EREADONLYREG = -6543, /* attempt to write read-only register */ + UNW_ESTOPUNWIND = -6544, /* stop unwinding */ + UNW_EINVALIDIP = -6545, /* invalid IP */ + UNW_EBADFRAME = -6546, /* bad frame */ + UNW_EINVAL = -6547, /* unsupported operation or bad value */ + UNW_EBADVERSION = -6548, /* unwind info has unsupported version */ + UNW_ENOINFO = -6549 /* no unwind info found */ +#if defined(_LIBUNWIND_TARGET_AARCH64) && !defined(_LIBUNWIND_IS_NATIVE_ONLY) + , UNW_ECROSSRASIGNING = -6550 /* cross unwind with return address signing */ +#endif +}; + +struct unw_context_t { + uint64_t data[_LIBUNWIND_CONTEXT_SIZE]; +}; +typedef struct unw_context_t unw_context_t; + +struct unw_cursor_t { + uint64_t data[_LIBUNWIND_CURSOR_SIZE]; +}; +typedef struct unw_cursor_t unw_cursor_t; + +typedef struct unw_addr_space *unw_addr_space_t; + +typedef int unw_regnum_t; +typedef uintptr_t unw_word_t; +#if defined(__arm__) && !defined(__ARM_DWARF_EH__) +typedef uint64_t unw_fpreg_t; +#else +typedef double unw_fpreg_t; +#endif + +struct unw_proc_info_t { + unw_word_t start_ip; /* start address of function */ + unw_word_t end_ip; /* address after end of function */ + unw_word_t lsda; /* address of language specific data area, */ + /* or zero if not used */ + unw_word_t handler; /* personality routine, or zero if not used */ + unw_word_t gp; /* not used */ + unw_word_t flags; /* not used */ + uint32_t format; /* compact unwind encoding, or zero if none */ + uint32_t unwind_info_size; /* size of DWARF unwind info, or zero if none */ + unw_word_t unwind_info; /* address of DWARF unwind info, or zero */ + unw_word_t extra; /* mach_header of mach-o image containing func */ +}; +typedef struct unw_proc_info_t unw_proc_info_t; + +enum unw_save_loc_type_t +{ + UNW_SLT_NONE, /* register is not saved ("not an l-value") */ + UNW_SLT_MEMORY, /* register has been saved in memory */ + UNW_SLT_REG /* register has been saved in (another) register */ +}; +typedef enum unw_save_loc_type_t unw_save_loc_type_t; + +struct unw_save_loc_t +{ + unw_save_loc_type_t type; + union + { + unw_word_t addr; /* valid if type==UNW_SLT_MEMORY */ + unw_regnum_t regnum; /* valid if type==UNW_SLT_REG */ + } + u; +}; +typedef struct unw_save_loc_t unw_save_loc_t; + +#ifdef __cplusplus +extern "C" { +#endif + +extern int unw_getcontext(unw_context_t *) LIBUNWIND_AVAIL; +extern int unw_init_local(unw_cursor_t *, unw_context_t *) LIBUNWIND_AVAIL; +extern int unw_step(unw_cursor_t *) LIBUNWIND_AVAIL; +extern int unw_get_reg(unw_cursor_t *, unw_regnum_t, unw_word_t *) LIBUNWIND_AVAIL; +extern int unw_get_fpreg(unw_cursor_t *, unw_regnum_t, unw_fpreg_t *) LIBUNWIND_AVAIL; +extern int unw_set_reg(unw_cursor_t *, unw_regnum_t, unw_word_t, unw_word_t *) LIBUNWIND_AVAIL; +extern int unw_set_fpreg(unw_cursor_t *, unw_regnum_t, unw_fpreg_t) LIBUNWIND_AVAIL; +extern int unw_resume(unw_cursor_t *) LIBUNWIND_AVAIL; + +#ifdef __arm__ +/* Save VFP registers in FSTMX format (instead of FSTMD). */ +extern void unw_save_vfp_as_X(unw_cursor_t *) LIBUNWIND_AVAIL; +#endif + + +extern const char *unw_regname(unw_cursor_t *, unw_regnum_t) LIBUNWIND_AVAIL; +extern int unw_get_proc_info(unw_cursor_t *, unw_proc_info_t *) LIBUNWIND_AVAIL; +extern int unw_is_fpreg(unw_cursor_t *, unw_regnum_t) LIBUNWIND_AVAIL; +extern int unw_is_signal_frame(unw_cursor_t *) LIBUNWIND_AVAIL; +extern int unw_get_proc_name(unw_cursor_t *, char *, size_t, unw_word_t *) LIBUNWIND_AVAIL; +extern int unw_get_save_loc(unw_cursor_t*, int, unw_save_loc_t*) LIBUNWIND_AVAIL; + +extern unw_addr_space_t unw_local_addr_space; + +#ifdef __cplusplus +} +#endif + +// architecture independent register numbers +enum { + UNW_REG_IP = -1, // instruction pointer + UNW_REG_SP = -2, // stack pointer +}; + +// 32-bit x86 registers +enum { + UNW_X86_EAX = 0, + UNW_X86_ECX = 1, + UNW_X86_EDX = 2, + UNW_X86_EBX = 3, + UNW_X86_EBP = 4, + UNW_X86_ESP = 5, + UNW_X86_ESI = 6, + UNW_X86_EDI = 7 +}; + +// 64-bit x86_64 registers +enum { + UNW_X86_64_RAX = 0, + UNW_X86_64_RDX = 1, + UNW_X86_64_RCX = 2, + UNW_X86_64_RBX = 3, + UNW_X86_64_RSI = 4, + UNW_X86_64_RDI = 5, + UNW_X86_64_RBP = 6, + UNW_X86_64_RSP = 7, + UNW_X86_64_R8 = 8, + UNW_X86_64_R9 = 9, + UNW_X86_64_R10 = 10, + UNW_X86_64_R11 = 11, + UNW_X86_64_R12 = 12, + UNW_X86_64_R13 = 13, + UNW_X86_64_R14 = 14, + UNW_X86_64_R15 = 15, + UNW_X86_64_RIP = 16, + UNW_X86_64_XMM0 = 17, + UNW_X86_64_XMM1 = 18, + UNW_X86_64_XMM2 = 19, + UNW_X86_64_XMM3 = 20, + UNW_X86_64_XMM4 = 21, + UNW_X86_64_XMM5 = 22, + UNW_X86_64_XMM6 = 23, + UNW_X86_64_XMM7 = 24, + UNW_X86_64_XMM8 = 25, + UNW_X86_64_XMM9 = 26, + UNW_X86_64_XMM10 = 27, + UNW_X86_64_XMM11 = 28, + UNW_X86_64_XMM12 = 29, + UNW_X86_64_XMM13 = 30, + UNW_X86_64_XMM14 = 31, + UNW_X86_64_XMM15 = 32, +}; + + +// 32-bit ppc register numbers +enum { + UNW_PPC_R0 = 0, + UNW_PPC_R1 = 1, + UNW_PPC_R2 = 2, + UNW_PPC_R3 = 3, + UNW_PPC_R4 = 4, + UNW_PPC_R5 = 5, + UNW_PPC_R6 = 6, + UNW_PPC_R7 = 7, + UNW_PPC_R8 = 8, + UNW_PPC_R9 = 9, + UNW_PPC_R10 = 10, + UNW_PPC_R11 = 11, + UNW_PPC_R12 = 12, + UNW_PPC_R13 = 13, + UNW_PPC_R14 = 14, + UNW_PPC_R15 = 15, + UNW_PPC_R16 = 16, + UNW_PPC_R17 = 17, + UNW_PPC_R18 = 18, + UNW_PPC_R19 = 19, + UNW_PPC_R20 = 20, + UNW_PPC_R21 = 21, + UNW_PPC_R22 = 22, + UNW_PPC_R23 = 23, + UNW_PPC_R24 = 24, + UNW_PPC_R25 = 25, + UNW_PPC_R26 = 26, + UNW_PPC_R27 = 27, + UNW_PPC_R28 = 28, + UNW_PPC_R29 = 29, + UNW_PPC_R30 = 30, + UNW_PPC_R31 = 31, + UNW_PPC_F0 = 32, + UNW_PPC_F1 = 33, + UNW_PPC_F2 = 34, + UNW_PPC_F3 = 35, + UNW_PPC_F4 = 36, + UNW_PPC_F5 = 37, + UNW_PPC_F6 = 38, + UNW_PPC_F7 = 39, + UNW_PPC_F8 = 40, + UNW_PPC_F9 = 41, + UNW_PPC_F10 = 42, + UNW_PPC_F11 = 43, + UNW_PPC_F12 = 44, + UNW_PPC_F13 = 45, + UNW_PPC_F14 = 46, + UNW_PPC_F15 = 47, + UNW_PPC_F16 = 48, + UNW_PPC_F17 = 49, + UNW_PPC_F18 = 50, + UNW_PPC_F19 = 51, + UNW_PPC_F20 = 52, + UNW_PPC_F21 = 53, + UNW_PPC_F22 = 54, + UNW_PPC_F23 = 55, + UNW_PPC_F24 = 56, + UNW_PPC_F25 = 57, + UNW_PPC_F26 = 58, + UNW_PPC_F27 = 59, + UNW_PPC_F28 = 60, + UNW_PPC_F29 = 61, + UNW_PPC_F30 = 62, + UNW_PPC_F31 = 63, + UNW_PPC_MQ = 64, + UNW_PPC_LR = 65, + UNW_PPC_CTR = 66, + UNW_PPC_AP = 67, + UNW_PPC_CR0 = 68, + UNW_PPC_CR1 = 69, + UNW_PPC_CR2 = 70, + UNW_PPC_CR3 = 71, + UNW_PPC_CR4 = 72, + UNW_PPC_CR5 = 73, + UNW_PPC_CR6 = 74, + UNW_PPC_CR7 = 75, + UNW_PPC_XER = 76, + UNW_PPC_V0 = 77, + UNW_PPC_V1 = 78, + UNW_PPC_V2 = 79, + UNW_PPC_V3 = 80, + UNW_PPC_V4 = 81, + UNW_PPC_V5 = 82, + UNW_PPC_V6 = 83, + UNW_PPC_V7 = 84, + UNW_PPC_V8 = 85, + UNW_PPC_V9 = 86, + UNW_PPC_V10 = 87, + UNW_PPC_V11 = 88, + UNW_PPC_V12 = 89, + UNW_PPC_V13 = 90, + UNW_PPC_V14 = 91, + UNW_PPC_V15 = 92, + UNW_PPC_V16 = 93, + UNW_PPC_V17 = 94, + UNW_PPC_V18 = 95, + UNW_PPC_V19 = 96, + UNW_PPC_V20 = 97, + UNW_PPC_V21 = 98, + UNW_PPC_V22 = 99, + UNW_PPC_V23 = 100, + UNW_PPC_V24 = 101, + UNW_PPC_V25 = 102, + UNW_PPC_V26 = 103, + UNW_PPC_V27 = 104, + UNW_PPC_V28 = 105, + UNW_PPC_V29 = 106, + UNW_PPC_V30 = 107, + UNW_PPC_V31 = 108, + UNW_PPC_VRSAVE = 109, + UNW_PPC_VSCR = 110, + UNW_PPC_SPE_ACC = 111, + UNW_PPC_SPEFSCR = 112 +}; + +// 64-bit ppc register numbers +enum { + UNW_PPC64_R0 = 0, + UNW_PPC64_R1 = 1, + UNW_PPC64_R2 = 2, + UNW_PPC64_R3 = 3, + UNW_PPC64_R4 = 4, + UNW_PPC64_R5 = 5, + UNW_PPC64_R6 = 6, + UNW_PPC64_R7 = 7, + UNW_PPC64_R8 = 8, + UNW_PPC64_R9 = 9, + UNW_PPC64_R10 = 10, + UNW_PPC64_R11 = 11, + UNW_PPC64_R12 = 12, + UNW_PPC64_R13 = 13, + UNW_PPC64_R14 = 14, + UNW_PPC64_R15 = 15, + UNW_PPC64_R16 = 16, + UNW_PPC64_R17 = 17, + UNW_PPC64_R18 = 18, + UNW_PPC64_R19 = 19, + UNW_PPC64_R20 = 20, + UNW_PPC64_R21 = 21, + UNW_PPC64_R22 = 22, + UNW_PPC64_R23 = 23, + UNW_PPC64_R24 = 24, + UNW_PPC64_R25 = 25, + UNW_PPC64_R26 = 26, + UNW_PPC64_R27 = 27, + UNW_PPC64_R28 = 28, + UNW_PPC64_R29 = 29, + UNW_PPC64_R30 = 30, + UNW_PPC64_R31 = 31, + UNW_PPC64_F0 = 32, + UNW_PPC64_F1 = 33, + UNW_PPC64_F2 = 34, + UNW_PPC64_F3 = 35, + UNW_PPC64_F4 = 36, + UNW_PPC64_F5 = 37, + UNW_PPC64_F6 = 38, + UNW_PPC64_F7 = 39, + UNW_PPC64_F8 = 40, + UNW_PPC64_F9 = 41, + UNW_PPC64_F10 = 42, + UNW_PPC64_F11 = 43, + UNW_PPC64_F12 = 44, + UNW_PPC64_F13 = 45, + UNW_PPC64_F14 = 46, + UNW_PPC64_F15 = 47, + UNW_PPC64_F16 = 48, + UNW_PPC64_F17 = 49, + UNW_PPC64_F18 = 50, + UNW_PPC64_F19 = 51, + UNW_PPC64_F20 = 52, + UNW_PPC64_F21 = 53, + UNW_PPC64_F22 = 54, + UNW_PPC64_F23 = 55, + UNW_PPC64_F24 = 56, + UNW_PPC64_F25 = 57, + UNW_PPC64_F26 = 58, + UNW_PPC64_F27 = 59, + UNW_PPC64_F28 = 60, + UNW_PPC64_F29 = 61, + UNW_PPC64_F30 = 62, + UNW_PPC64_F31 = 63, + // 64: reserved + UNW_PPC64_LR = 65, + UNW_PPC64_CTR = 66, + // 67: reserved + UNW_PPC64_CR0 = 68, + UNW_PPC64_CR1 = 69, + UNW_PPC64_CR2 = 70, + UNW_PPC64_CR3 = 71, + UNW_PPC64_CR4 = 72, + UNW_PPC64_CR5 = 73, + UNW_PPC64_CR6 = 74, + UNW_PPC64_CR7 = 75, + UNW_PPC64_XER = 76, + UNW_PPC64_V0 = 77, + UNW_PPC64_V1 = 78, + UNW_PPC64_V2 = 79, + UNW_PPC64_V3 = 80, + UNW_PPC64_V4 = 81, + UNW_PPC64_V5 = 82, + UNW_PPC64_V6 = 83, + UNW_PPC64_V7 = 84, + UNW_PPC64_V8 = 85, + UNW_PPC64_V9 = 86, + UNW_PPC64_V10 = 87, + UNW_PPC64_V11 = 88, + UNW_PPC64_V12 = 89, + UNW_PPC64_V13 = 90, + UNW_PPC64_V14 = 91, + UNW_PPC64_V15 = 92, + UNW_PPC64_V16 = 93, + UNW_PPC64_V17 = 94, + UNW_PPC64_V18 = 95, + UNW_PPC64_V19 = 96, + UNW_PPC64_V20 = 97, + UNW_PPC64_V21 = 98, + UNW_PPC64_V22 = 99, + UNW_PPC64_V23 = 100, + UNW_PPC64_V24 = 101, + UNW_PPC64_V25 = 102, + UNW_PPC64_V26 = 103, + UNW_PPC64_V27 = 104, + UNW_PPC64_V28 = 105, + UNW_PPC64_V29 = 106, + UNW_PPC64_V30 = 107, + UNW_PPC64_V31 = 108, + // 109, 111-113: OpenPOWER ELF V2 ABI: reserved + // Borrowing VRSAVE number from PPC32. + UNW_PPC64_VRSAVE = 109, + UNW_PPC64_VSCR = 110, + UNW_PPC64_TFHAR = 114, + UNW_PPC64_TFIAR = 115, + UNW_PPC64_TEXASR = 116, + UNW_PPC64_VS0 = UNW_PPC64_F0, + UNW_PPC64_VS1 = UNW_PPC64_F1, + UNW_PPC64_VS2 = UNW_PPC64_F2, + UNW_PPC64_VS3 = UNW_PPC64_F3, + UNW_PPC64_VS4 = UNW_PPC64_F4, + UNW_PPC64_VS5 = UNW_PPC64_F5, + UNW_PPC64_VS6 = UNW_PPC64_F6, + UNW_PPC64_VS7 = UNW_PPC64_F7, + UNW_PPC64_VS8 = UNW_PPC64_F8, + UNW_PPC64_VS9 = UNW_PPC64_F9, + UNW_PPC64_VS10 = UNW_PPC64_F10, + UNW_PPC64_VS11 = UNW_PPC64_F11, + UNW_PPC64_VS12 = UNW_PPC64_F12, + UNW_PPC64_VS13 = UNW_PPC64_F13, + UNW_PPC64_VS14 = UNW_PPC64_F14, + UNW_PPC64_VS15 = UNW_PPC64_F15, + UNW_PPC64_VS16 = UNW_PPC64_F16, + UNW_PPC64_VS17 = UNW_PPC64_F17, + UNW_PPC64_VS18 = UNW_PPC64_F18, + UNW_PPC64_VS19 = UNW_PPC64_F19, + UNW_PPC64_VS20 = UNW_PPC64_F20, + UNW_PPC64_VS21 = UNW_PPC64_F21, + UNW_PPC64_VS22 = UNW_PPC64_F22, + UNW_PPC64_VS23 = UNW_PPC64_F23, + UNW_PPC64_VS24 = UNW_PPC64_F24, + UNW_PPC64_VS25 = UNW_PPC64_F25, + UNW_PPC64_VS26 = UNW_PPC64_F26, + UNW_PPC64_VS27 = UNW_PPC64_F27, + UNW_PPC64_VS28 = UNW_PPC64_F28, + UNW_PPC64_VS29 = UNW_PPC64_F29, + UNW_PPC64_VS30 = UNW_PPC64_F30, + UNW_PPC64_VS31 = UNW_PPC64_F31, + UNW_PPC64_VS32 = UNW_PPC64_V0, + UNW_PPC64_VS33 = UNW_PPC64_V1, + UNW_PPC64_VS34 = UNW_PPC64_V2, + UNW_PPC64_VS35 = UNW_PPC64_V3, + UNW_PPC64_VS36 = UNW_PPC64_V4, + UNW_PPC64_VS37 = UNW_PPC64_V5, + UNW_PPC64_VS38 = UNW_PPC64_V6, + UNW_PPC64_VS39 = UNW_PPC64_V7, + UNW_PPC64_VS40 = UNW_PPC64_V8, + UNW_PPC64_VS41 = UNW_PPC64_V9, + UNW_PPC64_VS42 = UNW_PPC64_V10, + UNW_PPC64_VS43 = UNW_PPC64_V11, + UNW_PPC64_VS44 = UNW_PPC64_V12, + UNW_PPC64_VS45 = UNW_PPC64_V13, + UNW_PPC64_VS46 = UNW_PPC64_V14, + UNW_PPC64_VS47 = UNW_PPC64_V15, + UNW_PPC64_VS48 = UNW_PPC64_V16, + UNW_PPC64_VS49 = UNW_PPC64_V17, + UNW_PPC64_VS50 = UNW_PPC64_V18, + UNW_PPC64_VS51 = UNW_PPC64_V19, + UNW_PPC64_VS52 = UNW_PPC64_V20, + UNW_PPC64_VS53 = UNW_PPC64_V21, + UNW_PPC64_VS54 = UNW_PPC64_V22, + UNW_PPC64_VS55 = UNW_PPC64_V23, + UNW_PPC64_VS56 = UNW_PPC64_V24, + UNW_PPC64_VS57 = UNW_PPC64_V25, + UNW_PPC64_VS58 = UNW_PPC64_V26, + UNW_PPC64_VS59 = UNW_PPC64_V27, + UNW_PPC64_VS60 = UNW_PPC64_V28, + UNW_PPC64_VS61 = UNW_PPC64_V29, + UNW_PPC64_VS62 = UNW_PPC64_V30, + UNW_PPC64_VS63 = UNW_PPC64_V31 +}; + +// 64-bit ARM64 registers +enum { + UNW_ARM64_X0 = 0, + UNW_ARM64_X1 = 1, + UNW_ARM64_X2 = 2, + UNW_ARM64_X3 = 3, + UNW_ARM64_X4 = 4, + UNW_ARM64_X5 = 5, + UNW_ARM64_X6 = 6, + UNW_ARM64_X7 = 7, + UNW_ARM64_X8 = 8, + UNW_ARM64_X9 = 9, + UNW_ARM64_X10 = 10, + UNW_ARM64_X11 = 11, + UNW_ARM64_X12 = 12, + UNW_ARM64_X13 = 13, + UNW_ARM64_X14 = 14, + UNW_ARM64_X15 = 15, + UNW_ARM64_X16 = 16, + UNW_ARM64_X17 = 17, + UNW_ARM64_X18 = 18, + UNW_ARM64_X19 = 19, + UNW_ARM64_X20 = 20, + UNW_ARM64_X21 = 21, + UNW_ARM64_X22 = 22, + UNW_ARM64_X23 = 23, + UNW_ARM64_X24 = 24, + UNW_ARM64_X25 = 25, + UNW_ARM64_X26 = 26, + UNW_ARM64_X27 = 27, + UNW_ARM64_X28 = 28, + UNW_ARM64_X29 = 29, + UNW_ARM64_FP = 29, + UNW_ARM64_X30 = 30, + UNW_ARM64_LR = 30, + UNW_ARM64_X31 = 31, + UNW_ARM64_SP = 31, + // reserved block + UNW_ARM64_RA_SIGN_STATE = 34, + // reserved block + UNW_ARM64_D0 = 64, + UNW_ARM64_D1 = 65, + UNW_ARM64_D2 = 66, + UNW_ARM64_D3 = 67, + UNW_ARM64_D4 = 68, + UNW_ARM64_D5 = 69, + UNW_ARM64_D6 = 70, + UNW_ARM64_D7 = 71, + UNW_ARM64_D8 = 72, + UNW_ARM64_D9 = 73, + UNW_ARM64_D10 = 74, + UNW_ARM64_D11 = 75, + UNW_ARM64_D12 = 76, + UNW_ARM64_D13 = 77, + UNW_ARM64_D14 = 78, + UNW_ARM64_D15 = 79, + UNW_ARM64_D16 = 80, + UNW_ARM64_D17 = 81, + UNW_ARM64_D18 = 82, + UNW_ARM64_D19 = 83, + UNW_ARM64_D20 = 84, + UNW_ARM64_D21 = 85, + UNW_ARM64_D22 = 86, + UNW_ARM64_D23 = 87, + UNW_ARM64_D24 = 88, + UNW_ARM64_D25 = 89, + UNW_ARM64_D26 = 90, + UNW_ARM64_D27 = 91, + UNW_ARM64_D28 = 92, + UNW_ARM64_D29 = 93, + UNW_ARM64_D30 = 94, + UNW_ARM64_D31 = 95, +}; + +// 32-bit ARM registers. Numbers match DWARF for ARM spec #3.1 Table 1. +// Naming scheme uses recommendations given in Note 4 for VFP-v2 and VFP-v3. +// In this scheme, even though the 64-bit floating point registers D0-D31 +// overlap physically with the 32-bit floating pointer registers S0-S31, +// they are given a non-overlapping range of register numbers. +// +// Commented out ranges are not preserved during unwinding. +enum { + UNW_ARM_R0 = 0, + UNW_ARM_R1 = 1, + UNW_ARM_R2 = 2, + UNW_ARM_R3 = 3, + UNW_ARM_R4 = 4, + UNW_ARM_R5 = 5, + UNW_ARM_R6 = 6, + UNW_ARM_R7 = 7, + UNW_ARM_R8 = 8, + UNW_ARM_R9 = 9, + UNW_ARM_R10 = 10, + UNW_ARM_R11 = 11, + UNW_ARM_R12 = 12, + UNW_ARM_SP = 13, // Logical alias for UNW_REG_SP + UNW_ARM_R13 = 13, + UNW_ARM_LR = 14, + UNW_ARM_R14 = 14, + UNW_ARM_IP = 15, // Logical alias for UNW_REG_IP + UNW_ARM_R15 = 15, + // 16-63 -- OBSOLETE. Used in VFP1 to represent both S0-S31 and D0-D31. + UNW_ARM_S0 = 64, + UNW_ARM_S1 = 65, + UNW_ARM_S2 = 66, + UNW_ARM_S3 = 67, + UNW_ARM_S4 = 68, + UNW_ARM_S5 = 69, + UNW_ARM_S6 = 70, + UNW_ARM_S7 = 71, + UNW_ARM_S8 = 72, + UNW_ARM_S9 = 73, + UNW_ARM_S10 = 74, + UNW_ARM_S11 = 75, + UNW_ARM_S12 = 76, + UNW_ARM_S13 = 77, + UNW_ARM_S14 = 78, + UNW_ARM_S15 = 79, + UNW_ARM_S16 = 80, + UNW_ARM_S17 = 81, + UNW_ARM_S18 = 82, + UNW_ARM_S19 = 83, + UNW_ARM_S20 = 84, + UNW_ARM_S21 = 85, + UNW_ARM_S22 = 86, + UNW_ARM_S23 = 87, + UNW_ARM_S24 = 88, + UNW_ARM_S25 = 89, + UNW_ARM_S26 = 90, + UNW_ARM_S27 = 91, + UNW_ARM_S28 = 92, + UNW_ARM_S29 = 93, + UNW_ARM_S30 = 94, + UNW_ARM_S31 = 95, + // 96-103 -- OBSOLETE. F0-F7. Used by the FPA system. Superseded by VFP. + // 104-111 -- wCGR0-wCGR7, ACC0-ACC7 (Intel wireless MMX) + UNW_ARM_WR0 = 112, + UNW_ARM_WR1 = 113, + UNW_ARM_WR2 = 114, + UNW_ARM_WR3 = 115, + UNW_ARM_WR4 = 116, + UNW_ARM_WR5 = 117, + UNW_ARM_WR6 = 118, + UNW_ARM_WR7 = 119, + UNW_ARM_WR8 = 120, + UNW_ARM_WR9 = 121, + UNW_ARM_WR10 = 122, + UNW_ARM_WR11 = 123, + UNW_ARM_WR12 = 124, + UNW_ARM_WR13 = 125, + UNW_ARM_WR14 = 126, + UNW_ARM_WR15 = 127, + // 128-133 -- SPSR, SPSR_{FIQ|IRQ|ABT|UND|SVC} + // 134-143 -- Reserved + // 144-150 -- R8_USR-R14_USR + // 151-157 -- R8_FIQ-R14_FIQ + // 158-159 -- R13_IRQ-R14_IRQ + // 160-161 -- R13_ABT-R14_ABT + // 162-163 -- R13_UND-R14_UND + // 164-165 -- R13_SVC-R14_SVC + // 166-191 -- Reserved + UNW_ARM_WC0 = 192, + UNW_ARM_WC1 = 193, + UNW_ARM_WC2 = 194, + UNW_ARM_WC3 = 195, + // 196-199 -- wC4-wC7 (Intel wireless MMX control) + // 200-255 -- Reserved + UNW_ARM_D0 = 256, + UNW_ARM_D1 = 257, + UNW_ARM_D2 = 258, + UNW_ARM_D3 = 259, + UNW_ARM_D4 = 260, + UNW_ARM_D5 = 261, + UNW_ARM_D6 = 262, + UNW_ARM_D7 = 263, + UNW_ARM_D8 = 264, + UNW_ARM_D9 = 265, + UNW_ARM_D10 = 266, + UNW_ARM_D11 = 267, + UNW_ARM_D12 = 268, + UNW_ARM_D13 = 269, + UNW_ARM_D14 = 270, + UNW_ARM_D15 = 271, + UNW_ARM_D16 = 272, + UNW_ARM_D17 = 273, + UNW_ARM_D18 = 274, + UNW_ARM_D19 = 275, + UNW_ARM_D20 = 276, + UNW_ARM_D21 = 277, + UNW_ARM_D22 = 278, + UNW_ARM_D23 = 279, + UNW_ARM_D24 = 280, + UNW_ARM_D25 = 281, + UNW_ARM_D26 = 282, + UNW_ARM_D27 = 283, + UNW_ARM_D28 = 284, + UNW_ARM_D29 = 285, + UNW_ARM_D30 = 286, + UNW_ARM_D31 = 287, + // 288-319 -- Reserved for VFP/Neon + // 320-8191 -- Reserved + // 8192-16383 -- Unspecified vendor co-processor register. +}; + +// OpenRISC1000 register numbers +enum { + UNW_OR1K_R0 = 0, + UNW_OR1K_R1 = 1, + UNW_OR1K_R2 = 2, + UNW_OR1K_R3 = 3, + UNW_OR1K_R4 = 4, + UNW_OR1K_R5 = 5, + UNW_OR1K_R6 = 6, + UNW_OR1K_R7 = 7, + UNW_OR1K_R8 = 8, + UNW_OR1K_R9 = 9, + UNW_OR1K_R10 = 10, + UNW_OR1K_R11 = 11, + UNW_OR1K_R12 = 12, + UNW_OR1K_R13 = 13, + UNW_OR1K_R14 = 14, + UNW_OR1K_R15 = 15, + UNW_OR1K_R16 = 16, + UNW_OR1K_R17 = 17, + UNW_OR1K_R18 = 18, + UNW_OR1K_R19 = 19, + UNW_OR1K_R20 = 20, + UNW_OR1K_R21 = 21, + UNW_OR1K_R22 = 22, + UNW_OR1K_R23 = 23, + UNW_OR1K_R24 = 24, + UNW_OR1K_R25 = 25, + UNW_OR1K_R26 = 26, + UNW_OR1K_R27 = 27, + UNW_OR1K_R28 = 28, + UNW_OR1K_R29 = 29, + UNW_OR1K_R30 = 30, + UNW_OR1K_R31 = 31, + UNW_OR1K_EPCR = 32, +}; + +// MIPS registers +enum { + UNW_MIPS_R0 = 0, + UNW_MIPS_R1 = 1, + UNW_MIPS_R2 = 2, + UNW_MIPS_R3 = 3, + UNW_MIPS_R4 = 4, + UNW_MIPS_R5 = 5, + UNW_MIPS_R6 = 6, + UNW_MIPS_R7 = 7, + UNW_MIPS_R8 = 8, + UNW_MIPS_R9 = 9, + UNW_MIPS_R10 = 10, + UNW_MIPS_R11 = 11, + UNW_MIPS_R12 = 12, + UNW_MIPS_R13 = 13, + UNW_MIPS_R14 = 14, + UNW_MIPS_R15 = 15, + UNW_MIPS_R16 = 16, + UNW_MIPS_R17 = 17, + UNW_MIPS_R18 = 18, + UNW_MIPS_R19 = 19, + UNW_MIPS_R20 = 20, + UNW_MIPS_R21 = 21, + UNW_MIPS_R22 = 22, + UNW_MIPS_R23 = 23, + UNW_MIPS_R24 = 24, + UNW_MIPS_R25 = 25, + UNW_MIPS_R26 = 26, + UNW_MIPS_R27 = 27, + UNW_MIPS_R28 = 28, + UNW_MIPS_R29 = 29, + UNW_MIPS_R30 = 30, + UNW_MIPS_R31 = 31, + UNW_MIPS_F0 = 32, + UNW_MIPS_F1 = 33, + UNW_MIPS_F2 = 34, + UNW_MIPS_F3 = 35, + UNW_MIPS_F4 = 36, + UNW_MIPS_F5 = 37, + UNW_MIPS_F6 = 38, + UNW_MIPS_F7 = 39, + UNW_MIPS_F8 = 40, + UNW_MIPS_F9 = 41, + UNW_MIPS_F10 = 42, + UNW_MIPS_F11 = 43, + UNW_MIPS_F12 = 44, + UNW_MIPS_F13 = 45, + UNW_MIPS_F14 = 46, + UNW_MIPS_F15 = 47, + UNW_MIPS_F16 = 48, + UNW_MIPS_F17 = 49, + UNW_MIPS_F18 = 50, + UNW_MIPS_F19 = 51, + UNW_MIPS_F20 = 52, + UNW_MIPS_F21 = 53, + UNW_MIPS_F22 = 54, + UNW_MIPS_F23 = 55, + UNW_MIPS_F24 = 56, + UNW_MIPS_F25 = 57, + UNW_MIPS_F26 = 58, + UNW_MIPS_F27 = 59, + UNW_MIPS_F28 = 60, + UNW_MIPS_F29 = 61, + UNW_MIPS_F30 = 62, + UNW_MIPS_F31 = 63, + UNW_MIPS_HI = 64, + UNW_MIPS_LO = 65, +}; + +// SPARC registers +enum { + UNW_SPARC_G0 = 0, + UNW_SPARC_G1 = 1, + UNW_SPARC_G2 = 2, + UNW_SPARC_G3 = 3, + UNW_SPARC_G4 = 4, + UNW_SPARC_G5 = 5, + UNW_SPARC_G6 = 6, + UNW_SPARC_G7 = 7, + UNW_SPARC_O0 = 8, + UNW_SPARC_O1 = 9, + UNW_SPARC_O2 = 10, + UNW_SPARC_O3 = 11, + UNW_SPARC_O4 = 12, + UNW_SPARC_O5 = 13, + UNW_SPARC_O6 = 14, + UNW_SPARC_O7 = 15, + UNW_SPARC_L0 = 16, + UNW_SPARC_L1 = 17, + UNW_SPARC_L2 = 18, + UNW_SPARC_L3 = 19, + UNW_SPARC_L4 = 20, + UNW_SPARC_L5 = 21, + UNW_SPARC_L6 = 22, + UNW_SPARC_L7 = 23, + UNW_SPARC_I0 = 24, + UNW_SPARC_I1 = 25, + UNW_SPARC_I2 = 26, + UNW_SPARC_I3 = 27, + UNW_SPARC_I4 = 28, + UNW_SPARC_I5 = 29, + UNW_SPARC_I6 = 30, + UNW_SPARC_I7 = 31, +}; + +#endif diff --git a/src/coreclr/src/nativeaot/libunwind/include/mach-o/compact_unwind_encoding.h b/src/coreclr/src/nativeaot/libunwind/include/mach-o/compact_unwind_encoding.h new file mode 100644 index 0000000000000..5301b1055ef93 --- /dev/null +++ b/src/coreclr/src/nativeaot/libunwind/include/mach-o/compact_unwind_encoding.h @@ -0,0 +1,477 @@ +//===------------------ mach-o/compact_unwind_encoding.h ------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// +// Darwin's alternative to DWARF based unwind encodings. +// +//===----------------------------------------------------------------------===// + + +#ifndef __COMPACT_UNWIND_ENCODING__ +#define __COMPACT_UNWIND_ENCODING__ + +#include + +// +// Compilers can emit standard DWARF FDEs in the __TEXT,__eh_frame section +// of object files. Or compilers can emit compact unwind information in +// the __LD,__compact_unwind section. +// +// When the linker creates a final linked image, it will create a +// __TEXT,__unwind_info section. This section is a small and fast way for the +// runtime to access unwind info for any given function. If the compiler +// emitted compact unwind info for the function, that compact unwind info will +// be encoded in the __TEXT,__unwind_info section. If the compiler emitted +// DWARF unwind info, the __TEXT,__unwind_info section will contain the offset +// of the FDE in the __TEXT,__eh_frame section in the final linked image. +// +// Note: Previously, the linker would transform some DWARF unwind infos into +// compact unwind info. But that is fragile and no longer done. + + +// +// The compact unwind endoding is a 32-bit value which encoded in an +// architecture specific way, which registers to restore from where, and how +// to unwind out of the function. +// +typedef uint32_t compact_unwind_encoding_t; + + +// architecture independent bits +enum { + UNWIND_IS_NOT_FUNCTION_START = 0x80000000, + UNWIND_HAS_LSDA = 0x40000000, + UNWIND_PERSONALITY_MASK = 0x30000000, +}; + + + + +// +// x86 +// +// 1-bit: start +// 1-bit: has lsda +// 2-bit: personality index +// +// 4-bits: 0=old, 1=ebp based, 2=stack-imm, 3=stack-ind, 4=DWARF +// ebp based: +// 15-bits (5*3-bits per reg) register permutation +// 8-bits for stack offset +// frameless: +// 8-bits stack size +// 3-bits stack adjust +// 3-bits register count +// 10-bits register permutation +// +enum { + UNWIND_X86_MODE_MASK = 0x0F000000, + UNWIND_X86_MODE_EBP_FRAME = 0x01000000, + UNWIND_X86_MODE_STACK_IMMD = 0x02000000, + UNWIND_X86_MODE_STACK_IND = 0x03000000, + UNWIND_X86_MODE_DWARF = 0x04000000, + + UNWIND_X86_EBP_FRAME_REGISTERS = 0x00007FFF, + UNWIND_X86_EBP_FRAME_OFFSET = 0x00FF0000, + + UNWIND_X86_FRAMELESS_STACK_SIZE = 0x00FF0000, + UNWIND_X86_FRAMELESS_STACK_ADJUST = 0x0000E000, + UNWIND_X86_FRAMELESS_STACK_REG_COUNT = 0x00001C00, + UNWIND_X86_FRAMELESS_STACK_REG_PERMUTATION = 0x000003FF, + + UNWIND_X86_DWARF_SECTION_OFFSET = 0x00FFFFFF, +}; + +enum { + UNWIND_X86_REG_NONE = 0, + UNWIND_X86_REG_EBX = 1, + UNWIND_X86_REG_ECX = 2, + UNWIND_X86_REG_EDX = 3, + UNWIND_X86_REG_EDI = 4, + UNWIND_X86_REG_ESI = 5, + UNWIND_X86_REG_EBP = 6, +}; + +// +// For x86 there are four modes for the compact unwind encoding: +// UNWIND_X86_MODE_EBP_FRAME: +// EBP based frame where EBP is push on stack immediately after return address, +// then ESP is moved to EBP. Thus, to unwind ESP is restored with the current +// EPB value, then EBP is restored by popping off the stack, and the return +// is done by popping the stack once more into the pc. +// All non-volatile registers that need to be restored must have been saved +// in a small range in the stack that starts EBP-4 to EBP-1020. The offset/4 +// is encoded in the UNWIND_X86_EBP_FRAME_OFFSET bits. The registers saved +// are encoded in the UNWIND_X86_EBP_FRAME_REGISTERS bits as five 3-bit entries. +// Each entry contains which register to restore. +// UNWIND_X86_MODE_STACK_IMMD: +// A "frameless" (EBP not used as frame pointer) function with a small +// constant stack size. To return, a constant (encoded in the compact +// unwind encoding) is added to the ESP. Then the return is done by +// popping the stack into the pc. +// All non-volatile registers that need to be restored must have been saved +// on the stack immediately after the return address. The stack_size/4 is +// encoded in the UNWIND_X86_FRAMELESS_STACK_SIZE (max stack size is 1024). +// The number of registers saved is encoded in UNWIND_X86_FRAMELESS_STACK_REG_COUNT. +// UNWIND_X86_FRAMELESS_STACK_REG_PERMUTATION constains which registers were +// saved and their order. +// UNWIND_X86_MODE_STACK_IND: +// A "frameless" (EBP not used as frame pointer) function large constant +// stack size. This case is like the previous, except the stack size is too +// large to encode in the compact unwind encoding. Instead it requires that +// the function contains "subl $nnnnnnnn,ESP" in its prolog. The compact +// encoding contains the offset to the nnnnnnnn value in the function in +// UNWIND_X86_FRAMELESS_STACK_SIZE. +// UNWIND_X86_MODE_DWARF: +// No compact unwind encoding is available. Instead the low 24-bits of the +// compact encoding is the offset of the DWARF FDE in the __eh_frame section. +// This mode is never used in object files. It is only generated by the +// linker in final linked images which have only DWARF unwind info for a +// function. +// +// The permutation encoding is a Lehmer code sequence encoded into a +// single variable-base number so we can encode the ordering of up to +// six registers in a 10-bit space. +// +// The following is the algorithm used to create the permutation encoding used +// with frameless stacks. It is passed the number of registers to be saved and +// an array of the register numbers saved. +// +//uint32_t permute_encode(uint32_t registerCount, const uint32_t registers[6]) +//{ +// uint32_t renumregs[6]; +// for (int i=6-registerCount; i < 6; ++i) { +// int countless = 0; +// for (int j=6-registerCount; j < i; ++j) { +// if ( registers[j] < registers[i] ) +// ++countless; +// } +// renumregs[i] = registers[i] - countless -1; +// } +// uint32_t permutationEncoding = 0; +// switch ( registerCount ) { +// case 6: +// permutationEncoding |= (120*renumregs[0] + 24*renumregs[1] +// + 6*renumregs[2] + 2*renumregs[3] +// + renumregs[4]); +// break; +// case 5: +// permutationEncoding |= (120*renumregs[1] + 24*renumregs[2] +// + 6*renumregs[3] + 2*renumregs[4] +// + renumregs[5]); +// break; +// case 4: +// permutationEncoding |= (60*renumregs[2] + 12*renumregs[3] +// + 3*renumregs[4] + renumregs[5]); +// break; +// case 3: +// permutationEncoding |= (20*renumregs[3] + 4*renumregs[4] +// + renumregs[5]); +// break; +// case 2: +// permutationEncoding |= (5*renumregs[4] + renumregs[5]); +// break; +// case 1: +// permutationEncoding |= (renumregs[5]); +// break; +// } +// return permutationEncoding; +//} +// + + + + +// +// x86_64 +// +// 1-bit: start +// 1-bit: has lsda +// 2-bit: personality index +// +// 4-bits: 0=old, 1=rbp based, 2=stack-imm, 3=stack-ind, 4=DWARF +// rbp based: +// 15-bits (5*3-bits per reg) register permutation +// 8-bits for stack offset +// frameless: +// 8-bits stack size +// 3-bits stack adjust +// 3-bits register count +// 10-bits register permutation +// +enum { + UNWIND_X86_64_MODE_MASK = 0x0F000000, + UNWIND_X86_64_MODE_RBP_FRAME = 0x01000000, + UNWIND_X86_64_MODE_STACK_IMMD = 0x02000000, + UNWIND_X86_64_MODE_STACK_IND = 0x03000000, + UNWIND_X86_64_MODE_DWARF = 0x04000000, + + UNWIND_X86_64_RBP_FRAME_REGISTERS = 0x00007FFF, + UNWIND_X86_64_RBP_FRAME_OFFSET = 0x00FF0000, + + UNWIND_X86_64_FRAMELESS_STACK_SIZE = 0x00FF0000, + UNWIND_X86_64_FRAMELESS_STACK_ADJUST = 0x0000E000, + UNWIND_X86_64_FRAMELESS_STACK_REG_COUNT = 0x00001C00, + UNWIND_X86_64_FRAMELESS_STACK_REG_PERMUTATION = 0x000003FF, + + UNWIND_X86_64_DWARF_SECTION_OFFSET = 0x00FFFFFF, +}; + +enum { + UNWIND_X86_64_REG_NONE = 0, + UNWIND_X86_64_REG_RBX = 1, + UNWIND_X86_64_REG_R12 = 2, + UNWIND_X86_64_REG_R13 = 3, + UNWIND_X86_64_REG_R14 = 4, + UNWIND_X86_64_REG_R15 = 5, + UNWIND_X86_64_REG_RBP = 6, +}; +// +// For x86_64 there are four modes for the compact unwind encoding: +// UNWIND_X86_64_MODE_RBP_FRAME: +// RBP based frame where RBP is push on stack immediately after return address, +// then RSP is moved to RBP. Thus, to unwind RSP is restored with the current +// EPB value, then RBP is restored by popping off the stack, and the return +// is done by popping the stack once more into the pc. +// All non-volatile registers that need to be restored must have been saved +// in a small range in the stack that starts RBP-8 to RBP-2040. The offset/8 +// is encoded in the UNWIND_X86_64_RBP_FRAME_OFFSET bits. The registers saved +// are encoded in the UNWIND_X86_64_RBP_FRAME_REGISTERS bits as five 3-bit entries. +// Each entry contains which register to restore. +// UNWIND_X86_64_MODE_STACK_IMMD: +// A "frameless" (RBP not used as frame pointer) function with a small +// constant stack size. To return, a constant (encoded in the compact +// unwind encoding) is added to the RSP. Then the return is done by +// popping the stack into the pc. +// All non-volatile registers that need to be restored must have been saved +// on the stack immediately after the return address. The stack_size/8 is +// encoded in the UNWIND_X86_64_FRAMELESS_STACK_SIZE (max stack size is 2048). +// The number of registers saved is encoded in UNWIND_X86_64_FRAMELESS_STACK_REG_COUNT. +// UNWIND_X86_64_FRAMELESS_STACK_REG_PERMUTATION constains which registers were +// saved and their order. +// UNWIND_X86_64_MODE_STACK_IND: +// A "frameless" (RBP not used as frame pointer) function large constant +// stack size. This case is like the previous, except the stack size is too +// large to encode in the compact unwind encoding. Instead it requires that +// the function contains "subq $nnnnnnnn,RSP" in its prolog. The compact +// encoding contains the offset to the nnnnnnnn value in the function in +// UNWIND_X86_64_FRAMELESS_STACK_SIZE. +// UNWIND_X86_64_MODE_DWARF: +// No compact unwind encoding is available. Instead the low 24-bits of the +// compact encoding is the offset of the DWARF FDE in the __eh_frame section. +// This mode is never used in object files. It is only generated by the +// linker in final linked images which have only DWARF unwind info for a +// function. +// + + +// ARM64 +// +// 1-bit: start +// 1-bit: has lsda +// 2-bit: personality index +// +// 4-bits: 4=frame-based, 3=DWARF, 2=frameless +// frameless: +// 12-bits of stack size +// frame-based: +// 4-bits D reg pairs saved +// 5-bits X reg pairs saved +// DWARF: +// 24-bits offset of DWARF FDE in __eh_frame section +// +enum { + UNWIND_ARM64_MODE_MASK = 0x0F000000, + UNWIND_ARM64_MODE_FRAMELESS = 0x02000000, + UNWIND_ARM64_MODE_DWARF = 0x03000000, + UNWIND_ARM64_MODE_FRAME = 0x04000000, + + UNWIND_ARM64_FRAME_X19_X20_PAIR = 0x00000001, + UNWIND_ARM64_FRAME_X21_X22_PAIR = 0x00000002, + UNWIND_ARM64_FRAME_X23_X24_PAIR = 0x00000004, + UNWIND_ARM64_FRAME_X25_X26_PAIR = 0x00000008, + UNWIND_ARM64_FRAME_X27_X28_PAIR = 0x00000010, + UNWIND_ARM64_FRAME_D8_D9_PAIR = 0x00000100, + UNWIND_ARM64_FRAME_D10_D11_PAIR = 0x00000200, + UNWIND_ARM64_FRAME_D12_D13_PAIR = 0x00000400, + UNWIND_ARM64_FRAME_D14_D15_PAIR = 0x00000800, + + UNWIND_ARM64_FRAMELESS_STACK_SIZE_MASK = 0x00FFF000, + UNWIND_ARM64_DWARF_SECTION_OFFSET = 0x00FFFFFF, +}; +// For arm64 there are three modes for the compact unwind encoding: +// UNWIND_ARM64_MODE_FRAME: +// This is a standard arm64 prolog where FP/LR are immediately pushed on the +// stack, then SP is copied to FP. If there are any non-volatile registers +// saved, then are copied into the stack frame in pairs in a contiguous +// range right below the saved FP/LR pair. Any subset of the five X pairs +// and four D pairs can be saved, but the memory layout must be in register +// number order. +// UNWIND_ARM64_MODE_FRAMELESS: +// A "frameless" leaf function, where FP/LR are not saved. The return address +// remains in LR throughout the function. If any non-volatile registers +// are saved, they must be pushed onto the stack before any stack space is +// allocated for local variables. The stack sized (including any saved +// non-volatile registers) divided by 16 is encoded in the bits +// UNWIND_ARM64_FRAMELESS_STACK_SIZE_MASK. +// UNWIND_ARM64_MODE_DWARF: +// No compact unwind encoding is available. Instead the low 24-bits of the +// compact encoding is the offset of the DWARF FDE in the __eh_frame section. +// This mode is never used in object files. It is only generated by the +// linker in final linked images which have only DWARF unwind info for a +// function. +// + + + + + +//////////////////////////////////////////////////////////////////////////////// +// +// Relocatable Object Files: __LD,__compact_unwind +// +//////////////////////////////////////////////////////////////////////////////// + +// +// A compiler can generated compact unwind information for a function by adding +// a "row" to the __LD,__compact_unwind section. This section has the +// S_ATTR_DEBUG bit set, so the section will be ignored by older linkers. +// It is removed by the new linker, so never ends up in final executables. +// This section is a table, initially with one row per function (that needs +// unwind info). The table columns and some conceptual entries are: +// +// range-start pointer to start of function/range +// range-length +// compact-unwind-encoding 32-bit encoding +// personality-function or zero if no personality function +// lsda or zero if no LSDA data +// +// The length and encoding fields are 32-bits. The other are all pointer sized. +// +// In x86_64 assembly, these entry would look like: +// +// .section __LD,__compact_unwind,regular,debug +// +// #compact unwind for _foo +// .quad _foo +// .set L1,LfooEnd-_foo +// .long L1 +// .long 0x01010001 +// .quad 0 +// .quad 0 +// +// #compact unwind for _bar +// .quad _bar +// .set L2,LbarEnd-_bar +// .long L2 +// .long 0x01020011 +// .quad __gxx_personality +// .quad except_tab1 +// +// +// Notes: There is no need for any labels in the the __compact_unwind section. +// The use of the .set directive is to force the evaluation of the +// range-length at assembly time, instead of generating relocations. +// +// To support future compiler optimizations where which non-volatile registers +// are saved changes within a function (e.g. delay saving non-volatiles until +// necessary), there can by multiple lines in the __compact_unwind table for one +// function, each with a different (non-overlapping) range and each with +// different compact unwind encodings that correspond to the non-volatiles +// saved at that range of the function. +// +// If a particular function is so wacky that there is no compact unwind way +// to encode it, then the compiler can emit traditional DWARF unwind info. +// The runtime will use which ever is available. +// +// Runtime support for compact unwind encodings are only available on 10.6 +// and later. So, the compiler should not generate it when targeting pre-10.6. + + + + +//////////////////////////////////////////////////////////////////////////////// +// +// Final Linked Images: __TEXT,__unwind_info +// +//////////////////////////////////////////////////////////////////////////////// + +// +// The __TEXT,__unwind_info section is laid out for an efficient two level lookup. +// The header of the section contains a coarse index that maps function address +// to the page (4096 byte block) containing the unwind info for that function. +// + +#define UNWIND_SECTION_VERSION 1 +struct unwind_info_section_header +{ + uint32_t version; // UNWIND_SECTION_VERSION + uint32_t commonEncodingsArraySectionOffset; + uint32_t commonEncodingsArrayCount; + uint32_t personalityArraySectionOffset; + uint32_t personalityArrayCount; + uint32_t indexSectionOffset; + uint32_t indexCount; + // compact_unwind_encoding_t[] + // uint32_t personalities[] + // unwind_info_section_header_index_entry[] + // unwind_info_section_header_lsda_index_entry[] +}; + +struct unwind_info_section_header_index_entry +{ + uint32_t functionOffset; + uint32_t secondLevelPagesSectionOffset; // section offset to start of regular or compress page + uint32_t lsdaIndexArraySectionOffset; // section offset to start of lsda_index array for this range +}; + +struct unwind_info_section_header_lsda_index_entry +{ + uint32_t functionOffset; + uint32_t lsdaOffset; +}; + +// +// There are two kinds of second level index pages: regular and compressed. +// A compressed page can hold up to 1021 entries, but it cannot be used +// if too many different encoding types are used. The regular page holds +// 511 entries. +// + +struct unwind_info_regular_second_level_entry +{ + uint32_t functionOffset; + compact_unwind_encoding_t encoding; +}; + +#define UNWIND_SECOND_LEVEL_REGULAR 2 +struct unwind_info_regular_second_level_page_header +{ + uint32_t kind; // UNWIND_SECOND_LEVEL_REGULAR + uint16_t entryPageOffset; + uint16_t entryCount; + // entry array +}; + +#define UNWIND_SECOND_LEVEL_COMPRESSED 3 +struct unwind_info_compressed_second_level_page_header +{ + uint32_t kind; // UNWIND_SECOND_LEVEL_COMPRESSED + uint16_t entryPageOffset; + uint16_t entryCount; + uint16_t encodingsPageOffset; + uint16_t encodingsCount; + // 32-bit entry array + // encodings array +}; + +#define UNWIND_INFO_COMPRESSED_ENTRY_FUNC_OFFSET(entry) (entry & 0x00FFFFFF) +#define UNWIND_INFO_COMPRESSED_ENTRY_ENCODING_INDEX(entry) ((entry >> 24) & 0xFF) + + + +#endif + diff --git a/src/coreclr/src/nativeaot/libunwind/include/unwind.h b/src/coreclr/src/nativeaot/libunwind/include/unwind.h new file mode 100644 index 0000000000000..47d303c3f095a --- /dev/null +++ b/src/coreclr/src/nativeaot/libunwind/include/unwind.h @@ -0,0 +1,400 @@ +//===------------------------------- unwind.h -----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// +// C++ ABI Level 1 ABI documented at: +// https://itanium-cxx-abi.github.io/cxx-abi/abi-eh.html +// +//===----------------------------------------------------------------------===// + +#ifndef __UNWIND_H__ +#define __UNWIND_H__ + +#include <__libunwind_config.h> + +#include +#include + +#if defined(__SEH__) && !defined(__USING_SJLJ_EXCEPTIONS__) && defined(_WIN32) +#include +#include +#endif + +#if defined(__APPLE__) +#define LIBUNWIND_UNAVAIL __attribute__ (( unavailable )) +#else +#define LIBUNWIND_UNAVAIL +#endif + +typedef enum { + _URC_NO_REASON = 0, + _URC_OK = 0, + _URC_FOREIGN_EXCEPTION_CAUGHT = 1, + _URC_FATAL_PHASE2_ERROR = 2, + _URC_FATAL_PHASE1_ERROR = 3, + _URC_NORMAL_STOP = 4, + _URC_END_OF_STACK = 5, + _URC_HANDLER_FOUND = 6, + _URC_INSTALL_CONTEXT = 7, + _URC_CONTINUE_UNWIND = 8, +#if defined(_LIBUNWIND_ARM_EHABI) + _URC_FAILURE = 9 +#endif +} _Unwind_Reason_Code; + +typedef enum { + _UA_SEARCH_PHASE = 1, + _UA_CLEANUP_PHASE = 2, + _UA_HANDLER_FRAME = 4, + _UA_FORCE_UNWIND = 8, + _UA_END_OF_STACK = 16 // gcc extension to C++ ABI +} _Unwind_Action; + +typedef struct _Unwind_Context _Unwind_Context; // opaque + +#if defined(_LIBUNWIND_ARM_EHABI) +typedef uint32_t _Unwind_State; + +static const _Unwind_State _US_VIRTUAL_UNWIND_FRAME = 0; +static const _Unwind_State _US_UNWIND_FRAME_STARTING = 1; +static const _Unwind_State _US_UNWIND_FRAME_RESUME = 2; +static const _Unwind_State _US_ACTION_MASK = 3; +/* Undocumented flag for force unwinding. */ +static const _Unwind_State _US_FORCE_UNWIND = 8; + +typedef uint32_t _Unwind_EHT_Header; + +struct _Unwind_Control_Block; +typedef struct _Unwind_Control_Block _Unwind_Control_Block; +typedef struct _Unwind_Control_Block _Unwind_Exception; /* Alias */ + +struct _Unwind_Control_Block { + uint64_t exception_class; + void (*exception_cleanup)(_Unwind_Reason_Code, _Unwind_Control_Block*); + + /* Unwinder cache, private fields for the unwinder's use */ + struct { + uint32_t reserved1; /* init reserved1 to 0, then don't touch */ + uint32_t reserved2; + uint32_t reserved3; + uint32_t reserved4; + uint32_t reserved5; + } unwinder_cache; + + /* Propagation barrier cache (valid after phase 1): */ + struct { + uint32_t sp; + uint32_t bitpattern[5]; + } barrier_cache; + + /* Cleanup cache (preserved over cleanup): */ + struct { + uint32_t bitpattern[4]; + } cleanup_cache; + + /* Pr cache (for pr's benefit): */ + struct { + uint32_t fnstart; /* function start address */ + _Unwind_EHT_Header* ehtp; /* pointer to EHT entry header word */ + uint32_t additional; + uint32_t reserved1; + } pr_cache; + + long long int :0; /* Enforce the 8-byte alignment */ +} __attribute__((__aligned__(8))); + +typedef _Unwind_Reason_Code (*_Unwind_Stop_Fn) + (_Unwind_State state, + _Unwind_Exception* exceptionObject, + struct _Unwind_Context* context); + +typedef _Unwind_Reason_Code (*__personality_routine) + (_Unwind_State state, + _Unwind_Exception* exceptionObject, + struct _Unwind_Context* context); +#else +struct _Unwind_Context; // opaque +struct _Unwind_Exception; // forward declaration +typedef struct _Unwind_Exception _Unwind_Exception; + +struct _Unwind_Exception { + uint64_t exception_class; + void (*exception_cleanup)(_Unwind_Reason_Code reason, + _Unwind_Exception *exc); +#if defined(__SEH__) && !defined(__USING_SJLJ_EXCEPTIONS__) + uintptr_t private_[6]; +#else + uintptr_t private_1; // non-zero means forced unwind + uintptr_t private_2; // holds sp that phase1 found for phase2 to use +#endif +#if __SIZEOF_POINTER__ == 4 + // The implementation of _Unwind_Exception uses an attribute mode on the + // above fields which has the side effect of causing this whole struct to + // round up to 32 bytes in size (48 with SEH). To be more explicit, we add + // pad fields added for binary compatibility. + uint32_t reserved[3]; +#endif + // The Itanium ABI requires that _Unwind_Exception objects are "double-word + // aligned". GCC has interpreted this to mean "use the maximum useful + // alignment for the target"; so do we. +} __attribute__((__aligned__)); + +typedef _Unwind_Reason_Code (*_Unwind_Stop_Fn) + (int version, + _Unwind_Action actions, + uint64_t exceptionClass, + _Unwind_Exception* exceptionObject, + struct _Unwind_Context* context, + void* stop_parameter ); + +typedef _Unwind_Reason_Code (*__personality_routine) + (int version, + _Unwind_Action actions, + uint64_t exceptionClass, + _Unwind_Exception* exceptionObject, + struct _Unwind_Context* context); +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +// +// The following are the base functions documented by the C++ ABI +// +#ifdef __USING_SJLJ_EXCEPTIONS__ +extern _Unwind_Reason_Code + _Unwind_SjLj_RaiseException(_Unwind_Exception *exception_object); +extern void _Unwind_SjLj_Resume(_Unwind_Exception *exception_object); +#else +extern _Unwind_Reason_Code + _Unwind_RaiseException(_Unwind_Exception *exception_object); +extern void _Unwind_Resume(_Unwind_Exception *exception_object); +#endif +extern void _Unwind_DeleteException(_Unwind_Exception *exception_object); + +#if defined(_LIBUNWIND_ARM_EHABI) +typedef enum { + _UVRSC_CORE = 0, /* integer register */ + _UVRSC_VFP = 1, /* vfp */ + _UVRSC_WMMXD = 3, /* Intel WMMX data register */ + _UVRSC_WMMXC = 4 /* Intel WMMX control register */ +} _Unwind_VRS_RegClass; + +typedef enum { + _UVRSD_UINT32 = 0, + _UVRSD_VFPX = 1, + _UVRSD_UINT64 = 3, + _UVRSD_FLOAT = 4, + _UVRSD_DOUBLE = 5 +} _Unwind_VRS_DataRepresentation; + +typedef enum { + _UVRSR_OK = 0, + _UVRSR_NOT_IMPLEMENTED = 1, + _UVRSR_FAILED = 2 +} _Unwind_VRS_Result; + +extern void _Unwind_Complete(_Unwind_Exception* exception_object); + +extern _Unwind_VRS_Result +_Unwind_VRS_Get(_Unwind_Context *context, _Unwind_VRS_RegClass regclass, + uint32_t regno, _Unwind_VRS_DataRepresentation representation, + void *valuep); + +extern _Unwind_VRS_Result +_Unwind_VRS_Set(_Unwind_Context *context, _Unwind_VRS_RegClass regclass, + uint32_t regno, _Unwind_VRS_DataRepresentation representation, + void *valuep, uint32_t *pos); + +extern _Unwind_VRS_Result +_Unwind_VRS_Pop(_Unwind_Context *context, _Unwind_VRS_RegClass regclass, + uint32_t discriminator, + _Unwind_VRS_DataRepresentation representation); +#endif + +#if !defined(_LIBUNWIND_ARM_EHABI) + +extern uintptr_t _Unwind_GetGR(struct _Unwind_Context *context, int index); +extern void _Unwind_SetGR(struct _Unwind_Context *context, int index, + uintptr_t new_value, uintptr_t *pos); +extern uintptr_t _Unwind_GetIP(struct _Unwind_Context *context); +extern void _Unwind_SetIP(struct _Unwind_Context *, uintptr_t new_value); + +#else // defined(_LIBUNWIND_ARM_EHABI) + +#if defined(_LIBUNWIND_UNWIND_LEVEL1_EXTERNAL_LINKAGE) +#define _LIBUNWIND_EXPORT_UNWIND_LEVEL1 extern +#else +#define _LIBUNWIND_EXPORT_UNWIND_LEVEL1 static __inline__ +#endif + +// These are de facto helper functions for ARM, which delegate the function +// calls to _Unwind_VRS_Get/Set(). These are not a part of ARM EHABI +// specification, thus these function MUST be inlined. Please don't replace +// these with the "extern" function declaration; otherwise, the program +// including this header won't be ABI compatible and will result in +// link error when we are linking the program with libgcc. + +_LIBUNWIND_EXPORT_UNWIND_LEVEL1 +uintptr_t _Unwind_GetGR(struct _Unwind_Context *context, int index) { + uintptr_t value = 0; + _Unwind_VRS_Get(context, _UVRSC_CORE, (uint32_t)index, _UVRSD_UINT32, &value); + return value; +} + +_LIBUNWIND_EXPORT_UNWIND_LEVEL1 +void _Unwind_SetGR(struct _Unwind_Context *context, int index, + uintptr_t value,uintptr_t *pos) { + _Unwind_VRS_Set(context, _UVRSC_CORE, (uint32_t)index, _UVRSD_UINT32, &value, pos); +} + +_LIBUNWIND_EXPORT_UNWIND_LEVEL1 +uintptr_t _Unwind_GetIP(struct _Unwind_Context *context) { + // remove the thumb-bit before returning + return _Unwind_GetGR(context, 15) & (~(uintptr_t)0x1); +} + +_LIBUNWIND_EXPORT_UNWIND_LEVEL1 +void _Unwind_SetIP(struct _Unwind_Context *context, uintptr_t value) { + uintptr_t thumb_bit = _Unwind_GetGR(context, 15) & ((uintptr_t)0x1); + _Unwind_SetGR(context, 15, value | thumb_bit, NULL); +} +#endif // defined(_LIBUNWIND_ARM_EHABI) + +extern uintptr_t _Unwind_GetRegionStart(struct _Unwind_Context *context); +extern uintptr_t + _Unwind_GetLanguageSpecificData(struct _Unwind_Context *context); +#ifdef __USING_SJLJ_EXCEPTIONS__ +extern _Unwind_Reason_Code + _Unwind_SjLj_ForcedUnwind(_Unwind_Exception *exception_object, + _Unwind_Stop_Fn stop, void *stop_parameter); +#else +extern _Unwind_Reason_Code + _Unwind_ForcedUnwind(_Unwind_Exception *exception_object, + _Unwind_Stop_Fn stop, void *stop_parameter); +#endif + +#ifdef __USING_SJLJ_EXCEPTIONS__ +typedef struct _Unwind_FunctionContext *_Unwind_FunctionContext_t; +extern void _Unwind_SjLj_Register(_Unwind_FunctionContext_t fc); +extern void _Unwind_SjLj_Unregister(_Unwind_FunctionContext_t fc); +#endif + +// +// The following are semi-suppoted extensions to the C++ ABI +// + +// +// called by __cxa_rethrow(). +// +#ifdef __USING_SJLJ_EXCEPTIONS__ +extern _Unwind_Reason_Code + _Unwind_SjLj_Resume_or_Rethrow(_Unwind_Exception *exception_object); +#else +extern _Unwind_Reason_Code + _Unwind_Resume_or_Rethrow(_Unwind_Exception *exception_object); +#endif + +// _Unwind_Backtrace() is a gcc extension that walks the stack and calls the +// _Unwind_Trace_Fn once per frame until it reaches the bottom of the stack +// or the _Unwind_Trace_Fn function returns something other than _URC_NO_REASON. +typedef _Unwind_Reason_Code (*_Unwind_Trace_Fn)(struct _Unwind_Context *, + void *); +extern _Unwind_Reason_Code _Unwind_Backtrace(_Unwind_Trace_Fn, void *); + +// _Unwind_GetCFA is a gcc extension that can be called from within a +// personality handler to get the CFA (stack pointer before call) of +// current frame. +extern uintptr_t _Unwind_GetCFA(struct _Unwind_Context *); + + +// _Unwind_GetIPInfo is a gcc extension that can be called from within a +// personality handler. Similar to _Unwind_GetIP() but also returns in +// *ipBefore a non-zero value if the instruction pointer is at or before the +// instruction causing the unwind. Normally, in a function call, the IP returned +// is the return address which is after the call instruction and may be past the +// end of the function containing the call instruction. +extern uintptr_t _Unwind_GetIPInfo(struct _Unwind_Context *context, + int *ipBefore); + + +// __register_frame() is used with dynamically generated code to register the +// FDE for a generated (JIT) code. The FDE must use pc-rel addressing to point +// to its function and optional LSDA. +// __register_frame() has existed in all versions of Mac OS X, but in 10.4 and +// 10.5 it was buggy and did not actually register the FDE with the unwinder. +// In 10.6 and later it does register properly. +extern void __register_frame(const void *fde); +extern void __deregister_frame(const void *fde); + +// _Unwind_Find_FDE() will locate the FDE if the pc is in some function that has +// an associated FDE. Note, Mac OS X 10.6 and later, introduces "compact unwind +// info" which the runtime uses in preference to DWARF unwind info. This +// function will only work if the target function has an FDE but no compact +// unwind info. +struct dwarf_eh_bases { + uintptr_t tbase; + uintptr_t dbase; + uintptr_t func; +}; +extern const void *_Unwind_Find_FDE(const void *pc, struct dwarf_eh_bases *); + + +// This function attempts to find the start (address of first instruction) of +// a function given an address inside the function. It only works if the +// function has an FDE (DWARF unwind info). +// This function is unimplemented on Mac OS X 10.6 and later. Instead, use +// _Unwind_Find_FDE() and look at the dwarf_eh_bases.func result. +extern void *_Unwind_FindEnclosingFunction(void *pc); + +// Mac OS X does not support text-rel and data-rel addressing so these functions +// are unimplemented +extern uintptr_t _Unwind_GetDataRelBase(struct _Unwind_Context *context) + LIBUNWIND_UNAVAIL; +extern uintptr_t _Unwind_GetTextRelBase(struct _Unwind_Context *context) + LIBUNWIND_UNAVAIL; + +// Mac OS X 10.4 and 10.5 had implementations of these functions in +// libgcc_s.dylib, but they never worked. +/// These functions are no longer available on Mac OS X. +extern void __register_frame_info_bases(const void *fde, void *ob, void *tb, + void *db) LIBUNWIND_UNAVAIL; +extern void __register_frame_info(const void *fde, void *ob) + LIBUNWIND_UNAVAIL; +extern void __register_frame_info_table_bases(const void *fde, void *ob, + void *tb, void *db) + LIBUNWIND_UNAVAIL; +extern void __register_frame_info_table(const void *fde, void *ob) + LIBUNWIND_UNAVAIL; +extern void __register_frame_table(const void *fde) + LIBUNWIND_UNAVAIL; +extern void *__deregister_frame_info(const void *fde) + LIBUNWIND_UNAVAIL; +extern void *__deregister_frame_info_bases(const void *fde) + LIBUNWIND_UNAVAIL; + +#if defined(__SEH__) && !defined(__USING_SJLJ_EXCEPTIONS__) +#ifndef _WIN32 +typedef struct _EXCEPTION_RECORD EXCEPTION_RECORD; +typedef struct _CONTEXT CONTEXT; +typedef struct _DISPATCHER_CONTEXT DISPATCHER_CONTEXT; +#elif !defined(__MINGW32__) && VER_PRODUCTBUILD < 8000 +typedef struct _DISPATCHER_CONTEXT DISPATCHER_CONTEXT; +#endif +// This is the common wrapper for GCC-style personality functions with SEH. +extern EXCEPTION_DISPOSITION _GCC_specific_handler(EXCEPTION_RECORD *exc, + void *frame, + CONTEXT *ctx, + DISPATCHER_CONTEXT *disp, + __personality_routine pers); +#endif + +#ifdef __cplusplus +} +#endif + +#endif // __UNWIND_H__ diff --git a/src/coreclr/src/nativeaot/libunwind/src/AddressSpace.hpp b/src/coreclr/src/nativeaot/libunwind/src/AddressSpace.hpp new file mode 100644 index 0000000000000..fb07c807db9e9 --- /dev/null +++ b/src/coreclr/src/nativeaot/libunwind/src/AddressSpace.hpp @@ -0,0 +1,615 @@ +//===------------------------- AddressSpace.hpp ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// +// Abstracts accessing local vs remote address spaces. +// +//===----------------------------------------------------------------------===// + +#ifndef __ADDRESSSPACE_HPP__ +#define __ADDRESSSPACE_HPP__ + +#include +#include +#include +#include + +#ifndef _LIBUNWIND_USE_DLADDR + #if !defined(_LIBUNWIND_IS_BAREMETAL) && !defined(_WIN32) + #define _LIBUNWIND_USE_DLADDR 1 + #else + #define _LIBUNWIND_USE_DLADDR 0 + #endif +#endif + +#if _LIBUNWIND_USE_DLADDR +#include +#if defined(__unix__) && defined(__ELF__) && defined(_LIBUNWIND_HAS_COMMENT_LIB_PRAGMA) +#pragma comment(lib, "dl") +#endif +#endif + +#ifdef __APPLE__ +#include +namespace libunwind { + bool checkKeyMgrRegisteredFDEs(uintptr_t targetAddr, void *&fde); +} +#endif + +#include "libunwind.h" +#include "config.h" +#include "dwarf2.h" +#include "EHHeaderParser.hpp" +#include "Registers.hpp" + +#ifdef __APPLE__ + + struct dyld_unwind_sections + { + const struct mach_header* mh; + const void* dwarf_section; + uintptr_t dwarf_section_length; + const void* compact_unwind_section; + uintptr_t compact_unwind_section_length; + }; + #if (defined(__MAC_OS_X_VERSION_MIN_REQUIRED) \ + && (__MAC_OS_X_VERSION_MIN_REQUIRED >= 1070)) \ + || defined(__IPHONE_OS_VERSION_MIN_REQUIRED) + // In 10.7.0 or later, libSystem.dylib implements this function. + extern "C" bool _dyld_find_unwind_sections(void *, dyld_unwind_sections *); + #else + // In 10.6.x and earlier, we need to implement this functionality. Note + // that this requires a newer version of libmacho (from cctools) than is + // present in libSystem on 10.6.x (for getsectiondata). + static inline bool _dyld_find_unwind_sections(void* addr, + dyld_unwind_sections* info) { + // Find mach-o image containing address. + Dl_info dlinfo; + if (!dladdr(addr, &dlinfo)) + return false; +#if __LP64__ + const struct mach_header_64 *mh = (const struct mach_header_64 *)dlinfo.dli_fbase; +#else + const struct mach_header *mh = (const struct mach_header *)dlinfo.dli_fbase; +#endif + + // Initialize the return struct + info->mh = (const struct mach_header *)mh; + info->dwarf_section = getsectiondata(mh, "__TEXT", "__eh_frame", &info->dwarf_section_length); + info->compact_unwind_section = getsectiondata(mh, "__TEXT", "__unwind_info", &info->compact_unwind_section_length); + + if (!info->dwarf_section) { + info->dwarf_section_length = 0; + } + + if (!info->compact_unwind_section) { + info->compact_unwind_section_length = 0; + } + + return true; + } + #endif + +#elif defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) && defined(_LIBUNWIND_IS_BAREMETAL) + +// When statically linked on bare-metal, the symbols for the EH table are looked +// up without going through the dynamic loader. + +// The following linker script may be used to produce the necessary sections and symbols. +// Unless the --eh-frame-hdr linker option is provided, the section is not generated +// and does not take space in the output file. +// +// .eh_frame : +// { +// __eh_frame_start = .; +// KEEP(*(.eh_frame)) +// __eh_frame_end = .; +// } +// +// .eh_frame_hdr : +// { +// KEEP(*(.eh_frame_hdr)) +// } +// +// __eh_frame_hdr_start = SIZEOF(.eh_frame_hdr) > 0 ? ADDR(.eh_frame_hdr) : 0; +// __eh_frame_hdr_end = SIZEOF(.eh_frame_hdr) > 0 ? . : 0; + +extern char __eh_frame_start; +extern char __eh_frame_end; + +#if defined(_LIBUNWIND_SUPPORT_DWARF_INDEX) +extern char __eh_frame_hdr_start; +extern char __eh_frame_hdr_end; +#endif + +#elif defined(_LIBUNWIND_ARM_EHABI) && defined(_LIBUNWIND_IS_BAREMETAL) + +// When statically linked on bare-metal, the symbols for the EH table are looked +// up without going through the dynamic loader. +extern char __exidx_start; +extern char __exidx_end; + +#elif defined(_LIBUNWIND_ARM_EHABI) || defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) + +// ELF-based systems may use dl_iterate_phdr() to access sections +// containing unwinding information. The ElfW() macro for pointer-size +// independent ELF header traversal is not provided by on some +// systems (e.g., FreeBSD). On these systems the data structures are +// just called Elf_XXX. Define ElfW() locally. +#ifndef _WIN32 +#include +#else +#include +#include +#endif +#if !defined(ElfW) +#define ElfW(type) Elf_##type +#endif + +#endif + +namespace libunwind { + +/// Used by findUnwindSections() to return info about needed sections. +struct UnwindInfoSections { +#if defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) || defined(_LIBUNWIND_SUPPORT_DWARF_INDEX) || \ + defined(_LIBUNWIND_SUPPORT_COMPACT_UNWIND) + // No dso_base for SEH or ARM EHABI. + uintptr_t dso_base; +#endif +#if defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) + uintptr_t dwarf_section; + uintptr_t dwarf_section_length; +#endif +#if defined(_LIBUNWIND_SUPPORT_DWARF_INDEX) + uintptr_t dwarf_index_section; + uintptr_t dwarf_index_section_length; +#endif +#if defined(_LIBUNWIND_SUPPORT_COMPACT_UNWIND) + uintptr_t compact_unwind_section; + uintptr_t compact_unwind_section_length; +#endif +#if defined(_LIBUNWIND_ARM_EHABI) + uintptr_t arm_section; + uintptr_t arm_section_length; +#endif +}; + + +/// LocalAddressSpace is used as a template parameter to UnwindCursor when +/// unwinding a thread in the same process. The wrappers compile away, +/// making local unwinds fast. +class _LIBUNWIND_HIDDEN LocalAddressSpace { +public: + typedef uintptr_t pint_t; + typedef intptr_t sint_t; + uint8_t get8(pint_t addr) { + uint8_t val; + memcpy(&val, (void *)addr, sizeof(val)); + return val; + } + uint16_t get16(pint_t addr) { + uint16_t val; + memcpy(&val, (void *)addr, sizeof(val)); + return val; + } + uint32_t get32(pint_t addr) { + uint32_t val; + memcpy(&val, (void *)addr, sizeof(val)); + return val; + } + uint64_t get64(pint_t addr) { + uint64_t val; + memcpy(&val, (void *)addr, sizeof(val)); + return val; + } + double getDouble(pint_t addr) { + double val; + memcpy(&val, (void *)addr, sizeof(val)); + return val; + } + v128 getVector(pint_t addr) { + v128 val; + memcpy(&val, (void *)addr, sizeof(val)); + return val; + } + uintptr_t getP(pint_t addr); + uint64_t getRegister(pint_t addr); + static uint64_t getULEB128(pint_t &addr, pint_t end); + static int64_t getSLEB128(pint_t &addr, pint_t end); + + pint_t getEncodedP(pint_t &addr, pint_t end, uint8_t encoding, + pint_t datarelBase = 0); + bool findFunctionName(pint_t addr, char *buf, size_t bufLen, + unw_word_t *offset); + bool findUnwindSections(pint_t targetAddr, UnwindInfoSections &info); + bool findOtherFDE(pint_t targetAddr, pint_t &fde); + + static LocalAddressSpace sThisAddressSpace; +}; + +inline uintptr_t LocalAddressSpace::getP(pint_t addr) { +#if __SIZEOF_POINTER__ == 8 + return get64(addr); +#else + return get32(addr); +#endif +} + +inline uint64_t LocalAddressSpace::getRegister(pint_t addr) { +#if __SIZEOF_POINTER__ == 8 || defined(__mips64) + return get64(addr); +#else + return get32(addr); +#endif +} + +/// Read a ULEB128 into a 64-bit word. +inline uint64_t LocalAddressSpace::getULEB128(pint_t &addr, pint_t end) { + const uint8_t *p = (uint8_t *)addr; + const uint8_t *pend = (uint8_t *)end; + uint64_t result = 0; + int bit = 0; + do { + uint64_t b; + + if (p == pend) + _LIBUNWIND_ABORT("truncated uleb128 expression"); + + b = *p & 0x7f; + + if (bit >= 64 || b << bit >> bit != b) { + _LIBUNWIND_ABORT("malformed uleb128 expression"); + } else { + result |= b << bit; + bit += 7; + } + } while (*p++ >= 0x80); + addr = (pint_t) p; + return result; +} + +/// Read a SLEB128 into a 64-bit word. +inline int64_t LocalAddressSpace::getSLEB128(pint_t &addr, pint_t end) { + const uint8_t *p = (uint8_t *)addr; + const uint8_t *pend = (uint8_t *)end; + int64_t result = 0; + int bit = 0; + uint8_t byte; + do { + if (p == pend) + _LIBUNWIND_ABORT("truncated sleb128 expression"); + byte = *p++; + result |= ((byte & 0x7f) << bit); + bit += 7; + } while (byte & 0x80); + // sign extend negative numbers + if ((byte & 0x40) != 0) + result |= (-1ULL) << bit; + addr = (pint_t) p; + return result; +} + +inline LocalAddressSpace::pint_t +LocalAddressSpace::getEncodedP(pint_t &addr, pint_t end, uint8_t encoding, + pint_t datarelBase) { + pint_t startAddr = addr; + const uint8_t *p = (uint8_t *)addr; + pint_t result; + + // first get value + switch (encoding & 0x0F) { + case DW_EH_PE_ptr: + result = getP(addr); + p += sizeof(pint_t); + addr = (pint_t) p; + break; + case DW_EH_PE_uleb128: + result = (pint_t)getULEB128(addr, end); + break; + case DW_EH_PE_udata2: + result = get16(addr); + p += 2; + addr = (pint_t) p; + break; + case DW_EH_PE_udata4: + result = get32(addr); + p += 4; + addr = (pint_t) p; + break; + case DW_EH_PE_udata8: + result = (pint_t)get64(addr); + p += 8; + addr = (pint_t) p; + break; + case DW_EH_PE_sleb128: + result = (pint_t)getSLEB128(addr, end); + break; + case DW_EH_PE_sdata2: + // Sign extend from signed 16-bit value. + result = (pint_t)(int16_t)get16(addr); + p += 2; + addr = (pint_t) p; + break; + case DW_EH_PE_sdata4: + // Sign extend from signed 32-bit value. + result = (pint_t)(int32_t)get32(addr); + p += 4; + addr = (pint_t) p; + break; + case DW_EH_PE_sdata8: + result = (pint_t)get64(addr); + p += 8; + addr = (pint_t) p; + break; + default: + _LIBUNWIND_ABORT("unknown pointer encoding"); + } + + // then add relative offset + switch (encoding & 0x70) { + case DW_EH_PE_absptr: + // do nothing + break; + case DW_EH_PE_pcrel: + result += startAddr; + break; + case DW_EH_PE_textrel: + _LIBUNWIND_ABORT("DW_EH_PE_textrel pointer encoding not supported"); + break; + case DW_EH_PE_datarel: + // DW_EH_PE_datarel is only valid in a few places, so the parameter has a + // default value of 0, and we abort in the event that someone calls this + // function with a datarelBase of 0 and DW_EH_PE_datarel encoding. + if (datarelBase == 0) + _LIBUNWIND_ABORT("DW_EH_PE_datarel is invalid with a datarelBase of 0"); + result += datarelBase; + break; + case DW_EH_PE_funcrel: + _LIBUNWIND_ABORT("DW_EH_PE_funcrel pointer encoding not supported"); + break; + case DW_EH_PE_aligned: + _LIBUNWIND_ABORT("DW_EH_PE_aligned pointer encoding not supported"); + break; + default: + _LIBUNWIND_ABORT("unknown pointer encoding"); + break; + } + + if (encoding & DW_EH_PE_indirect) + result = getP(result); + + return result; +} + +inline bool LocalAddressSpace::findUnwindSections(pint_t targetAddr, + UnwindInfoSections &info) { +#ifdef __APPLE__ + dyld_unwind_sections dyldInfo; + if (_dyld_find_unwind_sections((void *)targetAddr, &dyldInfo)) { + info.dso_base = (uintptr_t)dyldInfo.mh; + #if defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) + info.dwarf_section = (uintptr_t)dyldInfo.dwarf_section; + info.dwarf_section_length = dyldInfo.dwarf_section_length; + #endif + info.compact_unwind_section = (uintptr_t)dyldInfo.compact_unwind_section; + info.compact_unwind_section_length = dyldInfo.compact_unwind_section_length; + return true; + } +#elif defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) && defined(_LIBUNWIND_IS_BAREMETAL) + // Bare metal is statically linked, so no need to ask the dynamic loader + info.dwarf_section_length = (uintptr_t)(&__eh_frame_end - &__eh_frame_start); + info.dwarf_section = (uintptr_t)(&__eh_frame_start); + _LIBUNWIND_TRACE_UNWINDING("findUnwindSections: section %p length %p", + (void *)info.dwarf_section, (void *)info.dwarf_section_length); +#if defined(_LIBUNWIND_SUPPORT_DWARF_INDEX) + info.dwarf_index_section = (uintptr_t)(&__eh_frame_hdr_start); + info.dwarf_index_section_length = (uintptr_t)(&__eh_frame_hdr_end - &__eh_frame_hdr_start); + _LIBUNWIND_TRACE_UNWINDING("findUnwindSections: index section %p length %p", + (void *)info.dwarf_index_section, (void *)info.dwarf_index_section_length); +#endif + if (info.dwarf_section_length) + return true; +#elif defined(_LIBUNWIND_ARM_EHABI) && defined(_LIBUNWIND_IS_BAREMETAL) + // Bare metal is statically linked, so no need to ask the dynamic loader + info.arm_section = (uintptr_t)(&__exidx_start); + info.arm_section_length = (uintptr_t)(&__exidx_end - &__exidx_start); + _LIBUNWIND_TRACE_UNWINDING("findUnwindSections: section %p length %p", + (void *)info.arm_section, (void *)info.arm_section_length); + if (info.arm_section && info.arm_section_length) + return true; +#elif defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) && defined(_WIN32) + HMODULE mods[1024]; + HANDLE process = GetCurrentProcess(); + DWORD needed; + + if (!EnumProcessModules(process, mods, sizeof(mods), &needed)) + return false; + + for (unsigned i = 0; i < (needed / sizeof(HMODULE)); i++) { + PIMAGE_DOS_HEADER pidh = (PIMAGE_DOS_HEADER)mods[i]; + PIMAGE_NT_HEADERS pinh = (PIMAGE_NT_HEADERS)((BYTE *)pidh + pidh->e_lfanew); + PIMAGE_FILE_HEADER pifh = (PIMAGE_FILE_HEADER)&pinh->FileHeader; + PIMAGE_SECTION_HEADER pish = IMAGE_FIRST_SECTION(pinh); + bool found_obj = false; + bool found_hdr = false; + + info.dso_base = (uintptr_t)mods[i]; + for (unsigned j = 0; j < pifh->NumberOfSections; j++, pish++) { + uintptr_t begin = pish->VirtualAddress + (uintptr_t)mods[i]; + uintptr_t end = begin + pish->Misc.VirtualSize; + if (!strncmp((const char *)pish->Name, ".text", + IMAGE_SIZEOF_SHORT_NAME)) { + if (targetAddr >= begin && targetAddr < end) + found_obj = true; + } else if (!strncmp((const char *)pish->Name, ".eh_frame", + IMAGE_SIZEOF_SHORT_NAME)) { + info.dwarf_section = begin; + info.dwarf_section_length = pish->Misc.VirtualSize; + found_hdr = true; + } + if (found_obj && found_hdr) + return true; + } + } + return false; +#elif defined(_LIBUNWIND_SUPPORT_SEH_UNWIND) && defined(_WIN32) + // Don't even bother, since Windows has functions that do all this stuff + // for us. + (void)targetAddr; + (void)info; + return true; +#elif defined(_LIBUNWIND_ARM_EHABI) && defined(__BIONIC__) && \ + (__ANDROID_API__ < 21) + int length = 0; + info.arm_section = + (uintptr_t)dl_unwind_find_exidx((_Unwind_Ptr)targetAddr, &length); + info.arm_section_length = (uintptr_t)length; + if (info.arm_section && info.arm_section_length) + return true; +#elif defined(_LIBUNWIND_ARM_EHABI) || defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) + struct dl_iterate_cb_data { + LocalAddressSpace *addressSpace; + UnwindInfoSections *sects; + uintptr_t targetAddr; + }; + + dl_iterate_cb_data cb_data = {this, &info, targetAddr}; + int found = dl_iterate_phdr( + [](struct dl_phdr_info *pinfo, size_t, void *data) -> int { + auto cbdata = static_cast(data); + bool found_obj = false; + bool found_hdr = false; + + assert(cbdata); + assert(cbdata->sects); + + if (cbdata->targetAddr < pinfo->dlpi_addr) { + return false; + } + +#if !defined(Elf_Half) + typedef ElfW(Half) Elf_Half; +#endif +#if !defined(Elf_Phdr) + typedef ElfW(Phdr) Elf_Phdr; +#endif +#if !defined(Elf_Addr) && defined(__ANDROID__) + typedef ElfW(Addr) Elf_Addr; +#endif + + #if defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) + #if !defined(_LIBUNWIND_SUPPORT_DWARF_INDEX) + #error "_LIBUNWIND_SUPPORT_DWARF_UNWIND requires _LIBUNWIND_SUPPORT_DWARF_INDEX on this platform." + #endif + size_t object_length; +#if defined(__ANDROID__) + Elf_Addr image_base = + pinfo->dlpi_phnum + ? reinterpret_cast(pinfo->dlpi_phdr) - + reinterpret_cast(pinfo->dlpi_phdr) + ->p_offset + : 0; +#endif + + for (Elf_Half i = 0; i < pinfo->dlpi_phnum; i++) { + const Elf_Phdr *phdr = &pinfo->dlpi_phdr[i]; + if (phdr->p_type == PT_LOAD) { + uintptr_t begin = pinfo->dlpi_addr + phdr->p_vaddr; +#if defined(__ANDROID__) + if (pinfo->dlpi_addr == 0 && phdr->p_vaddr < image_base) + begin = begin + image_base; +#endif + uintptr_t end = begin + phdr->p_memsz; + if (cbdata->targetAddr >= begin && cbdata->targetAddr < end) { + cbdata->sects->dso_base = begin; + object_length = phdr->p_memsz; + found_obj = true; + } + } else if (phdr->p_type == PT_GNU_EH_FRAME) { + EHHeaderParser::EHHeaderInfo hdrInfo; + uintptr_t eh_frame_hdr_start = pinfo->dlpi_addr + phdr->p_vaddr; +#if defined(__ANDROID__) + if (pinfo->dlpi_addr == 0 && phdr->p_vaddr < image_base) + eh_frame_hdr_start = eh_frame_hdr_start + image_base; +#endif + cbdata->sects->dwarf_index_section = eh_frame_hdr_start; + cbdata->sects->dwarf_index_section_length = phdr->p_memsz; + found_hdr = EHHeaderParser::decodeEHHdr( + *cbdata->addressSpace, eh_frame_hdr_start, phdr->p_memsz, + hdrInfo); + if (found_hdr) + cbdata->sects->dwarf_section = hdrInfo.eh_frame_ptr; + } + } + + if (found_obj && found_hdr) { + cbdata->sects->dwarf_section_length = object_length; + return true; + } else { + return false; + } + #else // defined(_LIBUNWIND_ARM_EHABI) + for (Elf_Half i = 0; i < pinfo->dlpi_phnum; i++) { + const Elf_Phdr *phdr = &pinfo->dlpi_phdr[i]; + if (phdr->p_type == PT_LOAD) { + uintptr_t begin = pinfo->dlpi_addr + phdr->p_vaddr; + uintptr_t end = begin + phdr->p_memsz; + if (cbdata->targetAddr >= begin && cbdata->targetAddr < end) + found_obj = true; + } else if (phdr->p_type == PT_ARM_EXIDX) { + uintptr_t exidx_start = pinfo->dlpi_addr + phdr->p_vaddr; + cbdata->sects->arm_section = exidx_start; + cbdata->sects->arm_section_length = phdr->p_memsz; + found_hdr = true; + } + } + return found_obj && found_hdr; + #endif + }, + &cb_data); + return static_cast(found); +#endif + + return false; +} + + +inline bool LocalAddressSpace::findOtherFDE(pint_t targetAddr, pint_t &fde) { +#ifdef __APPLE__ + return checkKeyMgrRegisteredFDEs(targetAddr, *((void**)&fde)); +#else + // TO DO: if OS has way to dynamically register FDEs, check that. + (void)targetAddr; + (void)fde; + return false; +#endif +} + +inline bool LocalAddressSpace::findFunctionName(pint_t addr, char *buf, + size_t bufLen, + unw_word_t *offset) { +#if _LIBUNWIND_USE_DLADDR + Dl_info dyldInfo; + if (dladdr((void *)addr, &dyldInfo)) { + if (dyldInfo.dli_sname != NULL) { + snprintf(buf, bufLen, "%s", dyldInfo.dli_sname); + *offset = (addr - (pint_t) dyldInfo.dli_saddr); + return true; + } + } +#else + (void)addr; + (void)buf; + (void)bufLen; + (void)offset; +#endif + return false; +} + +} // namespace libunwind + +#endif // __ADDRESSSPACE_HPP__ diff --git a/src/coreclr/src/nativeaot/libunwind/src/CMakeLists.txt b/src/coreclr/src/nativeaot/libunwind/src/CMakeLists.txt new file mode 100644 index 0000000000000..572c82396bfa4 --- /dev/null +++ b/src/coreclr/src/nativeaot/libunwind/src/CMakeLists.txt @@ -0,0 +1,178 @@ +# Get sources + +set(LIBUNWIND_CXX_SOURCES + libunwind.cpp + Unwind-EHABI.cpp + Unwind-seh.cpp + ) +unwind_append_if(LIBUNWIND_CXX_SOURCES APPLE Unwind_AppleExtras.cpp) + +set(LIBUNWIND_C_SOURCES + UnwindLevel1.c + UnwindLevel1-gcc-ext.c + Unwind-sjlj.c + ) +set_source_files_properties(${LIBUNWIND_C_SOURCES} + PROPERTIES + COMPILE_FLAGS "-std=c99") + +set(LIBUNWIND_ASM_SOURCES + UnwindRegistersRestore.S + UnwindRegistersSave.S + ) +set_source_files_properties(${LIBUNWIND_ASM_SOURCES} + PROPERTIES + LANGUAGE C) + +set(LIBUNWIND_HEADERS + AddressSpace.hpp + assembly.h + CompactUnwinder.hpp + config.h + dwarf2.h + DwarfInstructions.hpp + DwarfParser.hpp + libunwind_ext.h + Registers.hpp + RWMutex.hpp + UnwindCursor.hpp + ../include/libunwind.h + ../include/unwind.h + ) + +unwind_append_if(LIBUNWIND_HEADERS APPLE + ../include/mach-o/compact_unwind_encoding.h + ) + +if (MSVC_IDE) + # Force them all into the headers dir on MSVC, otherwise they end up at + # project scope because they don't have extensions. + source_group("Header Files" FILES ${LIBUNWIND_HEADERS}) +endif() + +set(LIBUNWIND_SOURCES + ${LIBUNWIND_CXX_SOURCES} + ${LIBUNWIND_C_SOURCES} + ${LIBUNWIND_ASM_SOURCES}) + +# Generate library list. +set(libraries) +unwind_append_if(libraries LIBUNWIND_HAS_C_LIB c) +if (LIBUNWIND_USE_COMPILER_RT) + list(APPEND libraries "${LIBUNWIND_BUILTINS_LIBRARY}") +else() + unwind_append_if(libraries LIBUNWIND_HAS_GCC_S_LIB gcc_s) + unwind_append_if(libraries LIBUNWIND_HAS_GCC_LIB gcc) +endif() +unwind_append_if(libraries LIBUNWIND_HAS_DL_LIB dl) +if (LIBUNWIND_ENABLE_THREADS) + unwind_append_if(libraries LIBUNWIND_HAS_PTHREAD_LIB pthread) + unwind_append_if(LIBUNWIND_COMPILE_FLAGS LIBUNWIND_WEAK_PTHREAD_LIB -DLIBUNWIND_USE_WEAK_PTHREAD=1) +endif() + +# Setup flags. +unwind_append_if(LIBUNWIND_CXX_FLAGS LIBUNWIND_HAS_NO_RTTI_FLAG -fno-rtti) + +unwind_append_if(LIBUNWIND_LINK_FLAGS LIBUNWIND_HAS_NODEFAULTLIBS_FLAG -nodefaultlibs) + +# MINGW_LIBRARIES is defined in config-ix.cmake +unwind_append_if(libraries MINGW "${MINGW_LIBRARIES}") + +if (LIBUNWIND_HAS_NO_EXCEPTIONS_FLAG AND LIBUNWIND_HAS_FUNWIND_TABLES) + list(APPEND LIBUNWIND_COMPILE_FLAGS -fno-exceptions) + list(APPEND LIBUNWIND_COMPILE_FLAGS -funwind-tables) +elseif (LIBUNWIND_ENABLE_SHARED) + message(FATAL_ERROR + "Compiler doesn't support generation of unwind tables if exception " + "support is disabled. Building libunwind DSO with runtime dependency " + "on C++ ABI library is not supported.") +endif() + +if (APPLE) + list(APPEND LIBUNWIND_COMPILE_FLAGS "-U__STRICT_ANSI__") + list(APPEND LIBUNWIND_LINK_FLAGS + "-compatibility_version 1" + "-install_name /usr/lib/libunwind.1.dylib") + + if (CMAKE_OSX_DEPLOYMENT_TARGET STREQUAL "10.6") + list(APPEND LIBUNWIND_LINK_FLAGS + "-current_version ${LIBUNWIND_VERSION}" + "/usr/lib/libSystem.B.dylib") + endif () +endif () + +string(REPLACE ";" " " LIBUNWIND_COMPILE_FLAGS "${LIBUNWIND_COMPILE_FLAGS}") +string(REPLACE ";" " " LIBUNWIND_CXX_FLAGS "${LIBUNWIND_CXX_FLAGS}") +string(REPLACE ";" " " LIBUNWIND_C_FLAGS "${LIBUNWIND_C_FLAGS}") +string(REPLACE ";" " " LIBUNWIND_LINK_FLAGS "${LIBUNWIND_LINK_FLAGS}") + +set_property(SOURCE ${LIBUNWIND_CXX_SOURCES} + APPEND_STRING PROPERTY COMPILE_FLAGS " ${LIBUNWIND_CXX_FLAGS}") +set_property(SOURCE ${LIBUNWIND_C_SOURCES} + APPEND_STRING PROPERTY COMPILE_FLAGS " ${LIBUNWIND_C_FLAGS}") + +# Build the shared library. +if (LIBUNWIND_ENABLE_SHARED) + add_library(unwind_shared SHARED ${LIBUNWIND_SOURCES} ${LIBUNWIND_HEADERS}) + if(COMMAND llvm_setup_rpath) + llvm_setup_rpath(unwind_shared) + endif() + target_link_libraries(unwind_shared PRIVATE ${libraries}) + set_target_properties(unwind_shared + PROPERTIES + COMPILE_FLAGS "${LIBUNWIND_COMPILE_FLAGS}" + LINK_FLAGS "${LIBUNWIND_LINK_FLAGS}" + OUTPUT_NAME "unwind" + VERSION "1.0" + SOVERSION "1") + list(APPEND LIBUNWIND_BUILD_TARGETS "unwind_shared") + if (LIBUNWIND_INSTALL_SHARED_LIBRARY) + list(APPEND LIBUNWIND_INSTALL_TARGETS "unwind_shared") + endif() +endif() + +# Build the static library. +if (LIBUNWIND_ENABLE_STATIC) + add_library(unwind_static STATIC ${LIBUNWIND_SOURCES} ${LIBUNWIND_HEADERS}) + target_link_libraries(unwind_static PRIVATE ${libraries}) + set_target_properties(unwind_static + PROPERTIES + COMPILE_FLAGS "${LIBUNWIND_COMPILE_FLAGS}" + LINK_FLAGS "${LIBUNWIND_LINK_FLAGS}" + OUTPUT_NAME "unwind") + + if(LIBUNWIND_HERMETIC_STATIC_LIBRARY) + append_flags_if_supported(UNWIND_STATIC_LIBRARY_FLAGS -fvisibility=hidden) + append_flags_if_supported(UNWIND_STATIC_LIBRARY_FLAGS -fvisibility-global-new-delete-hidden) + target_compile_options(unwind_static PRIVATE ${UNWIND_STATIC_LIBRARY_FLAGS}) + target_compile_definitions(unwind_static PRIVATE _LIBUNWIND_DISABLE_VISIBILITY_ANNOTATIONS) + endif() + + list(APPEND LIBUNWIND_BUILD_TARGETS "unwind_static") + if (LIBUNWIND_INSTALL_STATIC_LIBRARY) + list(APPEND LIBUNWIND_INSTALL_TARGETS "unwind_static") + endif() +endif() + +# Add a meta-target for both libraries. +add_custom_target(unwind DEPENDS ${LIBUNWIND_BUILD_TARGETS}) + +if (LIBUNWIND_INSTALL_LIBRARY) + install(TARGETS ${LIBUNWIND_INSTALL_TARGETS} + LIBRARY DESTINATION ${LIBUNWIND_INSTALL_PREFIX}${LIBUNWIND_INSTALL_LIBRARY_DIR} COMPONENT unwind + ARCHIVE DESTINATION ${LIBUNWIND_INSTALL_PREFIX}${LIBUNWIND_INSTALL_LIBRARY_DIR} COMPONENT unwind) +endif() + +if (NOT CMAKE_CONFIGURATION_TYPES AND LIBUNWIND_INSTALL_LIBRARY) + add_custom_target(install-unwind + DEPENDS unwind + COMMAND "${CMAKE_COMMAND}" + -DCMAKE_INSTALL_COMPONENT=unwind + -P "${LIBUNWIND_BINARY_DIR}/cmake_install.cmake") + add_custom_target(install-unwind-stripped + DEPENDS unwind + COMMAND "${CMAKE_COMMAND}" + -DCMAKE_INSTALL_COMPONENT=unwind + -DCMAKE_INSTALL_DO_STRIP=1 + -P "${LIBUNWIND_BINARY_DIR}/cmake_install.cmake") +endif() diff --git a/src/coreclr/src/nativeaot/libunwind/src/CompactUnwinder.hpp b/src/coreclr/src/nativeaot/libunwind/src/CompactUnwinder.hpp new file mode 100644 index 0000000000000..3546f195120a1 --- /dev/null +++ b/src/coreclr/src/nativeaot/libunwind/src/CompactUnwinder.hpp @@ -0,0 +1,697 @@ +//===-------------------------- CompactUnwinder.hpp -----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// +// Does runtime stack unwinding using compact unwind encodings. +// +//===----------------------------------------------------------------------===// + +#ifndef __COMPACT_UNWINDER_HPP__ +#define __COMPACT_UNWINDER_HPP__ + +#include +#include + +#include +#include + +#include "Registers.hpp" + +#define EXTRACT_BITS(value, mask) \ + ((value >> __builtin_ctz(mask)) & (((1 << __builtin_popcount(mask))) - 1)) + +namespace libunwind { + +#if defined(_LIBUNWIND_TARGET_I386) +/// CompactUnwinder_x86 uses a compact unwind info to virtually "step" (aka +/// unwind) by modifying a Registers_x86 register set +template +class CompactUnwinder_x86 { +public: + + static int stepWithCompactEncoding(compact_unwind_encoding_t info, + uint32_t functionStart, A &addressSpace, + Registers_x86 ®isters); + +private: + typename A::pint_t pint_t; + + static void frameUnwind(A &addressSpace, Registers_x86 ®isters); + static void framelessUnwind(A &addressSpace, + typename A::pint_t returnAddressLocation, + Registers_x86 ®isters); + static int + stepWithCompactEncodingEBPFrame(compact_unwind_encoding_t compactEncoding, + uint32_t functionStart, A &addressSpace, + Registers_x86 ®isters); + static int stepWithCompactEncodingFrameless( + compact_unwind_encoding_t compactEncoding, uint32_t functionStart, + A &addressSpace, Registers_x86 ®isters, bool indirectStackSize); +}; + +template +int CompactUnwinder_x86::stepWithCompactEncoding( + compact_unwind_encoding_t compactEncoding, uint32_t functionStart, + A &addressSpace, Registers_x86 ®isters) { + switch (compactEncoding & UNWIND_X86_MODE_MASK) { + case UNWIND_X86_MODE_EBP_FRAME: + return stepWithCompactEncodingEBPFrame(compactEncoding, functionStart, + addressSpace, registers); + case UNWIND_X86_MODE_STACK_IMMD: + return stepWithCompactEncodingFrameless(compactEncoding, functionStart, + addressSpace, registers, false); + case UNWIND_X86_MODE_STACK_IND: + return stepWithCompactEncodingFrameless(compactEncoding, functionStart, + addressSpace, registers, true); + } + _LIBUNWIND_ABORT("invalid compact unwind encoding"); +} + +template +int CompactUnwinder_x86::stepWithCompactEncodingEBPFrame( + compact_unwind_encoding_t compactEncoding, uint32_t functionStart, + A &addressSpace, Registers_x86 ®isters) { + uint32_t savedRegistersOffset = + EXTRACT_BITS(compactEncoding, UNWIND_X86_EBP_FRAME_OFFSET); + uint32_t savedRegistersLocations = + EXTRACT_BITS(compactEncoding, UNWIND_X86_EBP_FRAME_REGISTERS); + + uint32_t savedRegisters = registers.getEBP() - 4 * savedRegistersOffset; + for (int i = 0; i < 5; ++i) { + switch (savedRegistersLocations & 0x7) { + case UNWIND_X86_REG_NONE: + // no register saved in this slot + break; + case UNWIND_X86_REG_EBX: + registers.setEBX(addressSpace.get32(savedRegisters), savedRegisters); + break; + case UNWIND_X86_REG_ECX: + registers.setECX(addressSpace.get32(savedRegisters), savedRegisters); + break; + case UNWIND_X86_REG_EDX: + registers.setEDX(addressSpace.get32(savedRegisters), savedRegisters); + break; + case UNWIND_X86_REG_EDI: + registers.setEDI(addressSpace.get32(savedRegisters), savedRegisters); + break; + case UNWIND_X86_REG_ESI: + registers.setESI(addressSpace.get32(savedRegisters), savedRegisters); + break; + default: + (void)functionStart; + _LIBUNWIND_DEBUG_LOG("bad register for EBP frame, encoding=%08X for " + "function starting at 0x%X", + compactEncoding, functionStart); + _LIBUNWIND_ABORT("invalid compact unwind encoding"); + } + savedRegisters += 4; + savedRegistersLocations = (savedRegistersLocations >> 3); + } + frameUnwind(addressSpace, registers); + return UNW_STEP_SUCCESS; +} + +template +int CompactUnwinder_x86::stepWithCompactEncodingFrameless( + compact_unwind_encoding_t encoding, uint32_t functionStart, + A &addressSpace, Registers_x86 ®isters, bool indirectStackSize) { + uint32_t stackSizeEncoded = + EXTRACT_BITS(encoding, UNWIND_X86_FRAMELESS_STACK_SIZE); + uint32_t stackAdjust = + EXTRACT_BITS(encoding, UNWIND_X86_FRAMELESS_STACK_ADJUST); + uint32_t regCount = + EXTRACT_BITS(encoding, UNWIND_X86_FRAMELESS_STACK_REG_COUNT); + uint32_t permutation = + EXTRACT_BITS(encoding, UNWIND_X86_FRAMELESS_STACK_REG_PERMUTATION); + uint32_t stackSize = stackSizeEncoded * 4; + if (indirectStackSize) { + // stack size is encoded in subl $xxx,%esp instruction + uint32_t subl = addressSpace.get32(functionStart + stackSizeEncoded); + stackSize = subl + 4 * stackAdjust; + } + // decompress permutation + uint32_t permunreg[6]; + switch (regCount) { + case 6: + permunreg[0] = permutation / 120; + permutation -= (permunreg[0] * 120); + permunreg[1] = permutation / 24; + permutation -= (permunreg[1] * 24); + permunreg[2] = permutation / 6; + permutation -= (permunreg[2] * 6); + permunreg[3] = permutation / 2; + permutation -= (permunreg[3] * 2); + permunreg[4] = permutation; + permunreg[5] = 0; + break; + case 5: + permunreg[0] = permutation / 120; + permutation -= (permunreg[0] * 120); + permunreg[1] = permutation / 24; + permutation -= (permunreg[1] * 24); + permunreg[2] = permutation / 6; + permutation -= (permunreg[2] * 6); + permunreg[3] = permutation / 2; + permutation -= (permunreg[3] * 2); + permunreg[4] = permutation; + break; + case 4: + permunreg[0] = permutation / 60; + permutation -= (permunreg[0] * 60); + permunreg[1] = permutation / 12; + permutation -= (permunreg[1] * 12); + permunreg[2] = permutation / 3; + permutation -= (permunreg[2] * 3); + permunreg[3] = permutation; + break; + case 3: + permunreg[0] = permutation / 20; + permutation -= (permunreg[0] * 20); + permunreg[1] = permutation / 4; + permutation -= (permunreg[1] * 4); + permunreg[2] = permutation; + break; + case 2: + permunreg[0] = permutation / 5; + permutation -= (permunreg[0] * 5); + permunreg[1] = permutation; + break; + case 1: + permunreg[0] = permutation; + break; + } + // re-number registers back to standard numbers + int registersSaved[6]; + bool used[7] = { false, false, false, false, false, false, false }; + for (uint32_t i = 0; i < regCount; ++i) { + uint32_t renum = 0; + for (int u = 1; u < 7; ++u) { + if (!used[u]) { + if (renum == permunreg[i]) { + registersSaved[i] = u; + used[u] = true; + break; + } + ++renum; + } + } + } + uint32_t savedRegisters = registers.getSP() + stackSize - 4 - 4 * regCount; + for (uint32_t i = 0; i < regCount; ++i) { + switch (registersSaved[i]) { + case UNWIND_X86_REG_EBX: + registers.setEBX(addressSpace.get32(savedRegisters), savedRegisters); + break; + case UNWIND_X86_REG_ECX: + registers.setECX(addressSpace.get32(savedRegisters), savedRegisters); + break; + case UNWIND_X86_REG_EDX: + registers.setEDX(addressSpace.get32(savedRegisters), savedRegisters); + break; + case UNWIND_X86_REG_EDI: + registers.setEDI(addressSpace.get32(savedRegisters), savedRegisters); + break; + case UNWIND_X86_REG_ESI: + registers.setESI(addressSpace.get32(savedRegisters), savedRegisters); + break; + case UNWIND_X86_REG_EBP: + registers.setEBP(addressSpace.get32(savedRegisters), savedRegisters); + break; + default: + _LIBUNWIND_DEBUG_LOG("bad register for frameless, encoding=%08X for " + "function starting at 0x%X", + encoding, functionStart); + _LIBUNWIND_ABORT("invalid compact unwind encoding"); + } + savedRegisters += 4; + } + framelessUnwind(addressSpace, savedRegisters, registers); + return UNW_STEP_SUCCESS; +} + + +template +void CompactUnwinder_x86::frameUnwind(A &addressSpace, + Registers_x86 ®isters) { + typename A::pint_t bp = registers.getEBP(); + // ebp points to old ebp + registers.setEBP(addressSpace.get32(bp), bp); + // old esp is ebp less saved ebp and return address + registers.setSP((uint32_t)bp + 8, 0); + // pop return address into eip + registers.setIP(addressSpace.get32(bp + 4), bp + 4); +} + +template +void CompactUnwinder_x86::framelessUnwind( + A &addressSpace, typename A::pint_t returnAddressLocation, + Registers_x86 ®isters) { + // return address is on stack after last saved register + registers.setIP(addressSpace.get32(returnAddressLocation), returnAddressLocation); + // old esp is before return address + registers.setSP((uint32_t)returnAddressLocation + 4, 0); +} +#endif // _LIBUNWIND_TARGET_I386 + + +#if defined(_LIBUNWIND_TARGET_X86_64) +/// CompactUnwinder_x86_64 uses a compact unwind info to virtually "step" (aka +/// unwind) by modifying a Registers_x86_64 register set +template +class CompactUnwinder_x86_64 { +public: + + static int stepWithCompactEncoding(compact_unwind_encoding_t compactEncoding, + uint64_t functionStart, A &addressSpace, + Registers_x86_64 ®isters); + +private: + typename A::pint_t pint_t; + + static void frameUnwind(A &addressSpace, Registers_x86_64 ®isters); + static void framelessUnwind(A &addressSpace, uint64_t returnAddressLocation, + Registers_x86_64 ®isters); + static int + stepWithCompactEncodingRBPFrame(compact_unwind_encoding_t compactEncoding, + uint64_t functionStart, A &addressSpace, + Registers_x86_64 ®isters); + static int stepWithCompactEncodingFrameless( + compact_unwind_encoding_t compactEncoding, uint64_t functionStart, + A &addressSpace, Registers_x86_64 ®isters, bool indirectStackSize); +}; + +template +int CompactUnwinder_x86_64::stepWithCompactEncoding( + compact_unwind_encoding_t compactEncoding, uint64_t functionStart, + A &addressSpace, Registers_x86_64 ®isters) { + switch (compactEncoding & UNWIND_X86_64_MODE_MASK) { + case UNWIND_X86_64_MODE_RBP_FRAME: + return stepWithCompactEncodingRBPFrame(compactEncoding, functionStart, + addressSpace, registers); + case UNWIND_X86_64_MODE_STACK_IMMD: + return stepWithCompactEncodingFrameless(compactEncoding, functionStart, + addressSpace, registers, false); + case UNWIND_X86_64_MODE_STACK_IND: + return stepWithCompactEncodingFrameless(compactEncoding, functionStart, + addressSpace, registers, true); + } + _LIBUNWIND_ABORT("invalid compact unwind encoding"); +} + +template +int CompactUnwinder_x86_64::stepWithCompactEncodingRBPFrame( + compact_unwind_encoding_t compactEncoding, uint64_t functionStart, + A &addressSpace, Registers_x86_64 ®isters) { + uint32_t savedRegistersOffset = + EXTRACT_BITS(compactEncoding, UNWIND_X86_64_RBP_FRAME_OFFSET); + uint32_t savedRegistersLocations = + EXTRACT_BITS(compactEncoding, UNWIND_X86_64_RBP_FRAME_REGISTERS); + + uint64_t savedRegisters = registers.getRBP() - 8 * savedRegistersOffset; + for (int i = 0; i < 5; ++i) { + switch (savedRegistersLocations & 0x7) { + case UNWIND_X86_64_REG_NONE: + // no register saved in this slot + break; + case UNWIND_X86_64_REG_RBX: + registers.setRBX(addressSpace.get64(savedRegisters), savedRegisters); + break; + case UNWIND_X86_64_REG_R12: + registers.setR12(addressSpace.get64(savedRegisters), savedRegisters); + break; + case UNWIND_X86_64_REG_R13: + registers.setR13(addressSpace.get64(savedRegisters), savedRegisters); + break; + case UNWIND_X86_64_REG_R14: + registers.setR14(addressSpace.get64(savedRegisters), savedRegisters); + break; + case UNWIND_X86_64_REG_R15: + registers.setR15(addressSpace.get64(savedRegisters), savedRegisters); + break; + default: + (void)functionStart; + _LIBUNWIND_DEBUG_LOG("bad register for RBP frame, encoding=%08X for " + "function starting at 0x%llX", + compactEncoding, functionStart); + _LIBUNWIND_ABORT("invalid compact unwind encoding"); + } + savedRegisters += 8; + savedRegistersLocations = (savedRegistersLocations >> 3); + } + frameUnwind(addressSpace, registers); + return UNW_STEP_SUCCESS; +} + +template +int CompactUnwinder_x86_64::stepWithCompactEncodingFrameless( + compact_unwind_encoding_t encoding, uint64_t functionStart, A &addressSpace, + Registers_x86_64 ®isters, bool indirectStackSize) { + uint32_t stackSizeEncoded = + EXTRACT_BITS(encoding, UNWIND_X86_64_FRAMELESS_STACK_SIZE); + uint32_t stackAdjust = + EXTRACT_BITS(encoding, UNWIND_X86_64_FRAMELESS_STACK_ADJUST); + uint32_t regCount = + EXTRACT_BITS(encoding, UNWIND_X86_64_FRAMELESS_STACK_REG_COUNT); + uint32_t permutation = + EXTRACT_BITS(encoding, UNWIND_X86_64_FRAMELESS_STACK_REG_PERMUTATION); + uint32_t stackSize = stackSizeEncoded * 8; + if (indirectStackSize) { + // stack size is encoded in subl $xxx,%esp instruction + uint32_t subl = addressSpace.get32(functionStart + stackSizeEncoded); + stackSize = subl + 8 * stackAdjust; + } + // decompress permutation + uint32_t permunreg[6]; + switch (regCount) { + case 6: + permunreg[0] = permutation / 120; + permutation -= (permunreg[0] * 120); + permunreg[1] = permutation / 24; + permutation -= (permunreg[1] * 24); + permunreg[2] = permutation / 6; + permutation -= (permunreg[2] * 6); + permunreg[3] = permutation / 2; + permutation -= (permunreg[3] * 2); + permunreg[4] = permutation; + permunreg[5] = 0; + break; + case 5: + permunreg[0] = permutation / 120; + permutation -= (permunreg[0] * 120); + permunreg[1] = permutation / 24; + permutation -= (permunreg[1] * 24); + permunreg[2] = permutation / 6; + permutation -= (permunreg[2] * 6); + permunreg[3] = permutation / 2; + permutation -= (permunreg[3] * 2); + permunreg[4] = permutation; + break; + case 4: + permunreg[0] = permutation / 60; + permutation -= (permunreg[0] * 60); + permunreg[1] = permutation / 12; + permutation -= (permunreg[1] * 12); + permunreg[2] = permutation / 3; + permutation -= (permunreg[2] * 3); + permunreg[3] = permutation; + break; + case 3: + permunreg[0] = permutation / 20; + permutation -= (permunreg[0] * 20); + permunreg[1] = permutation / 4; + permutation -= (permunreg[1] * 4); + permunreg[2] = permutation; + break; + case 2: + permunreg[0] = permutation / 5; + permutation -= (permunreg[0] * 5); + permunreg[1] = permutation; + break; + case 1: + permunreg[0] = permutation; + break; + } + // re-number registers back to standard numbers + int registersSaved[6]; + bool used[7] = { false, false, false, false, false, false, false }; + for (uint32_t i = 0; i < regCount; ++i) { + uint32_t renum = 0; + for (int u = 1; u < 7; ++u) { + if (!used[u]) { + if (renum == permunreg[i]) { + registersSaved[i] = u; + used[u] = true; + break; + } + ++renum; + } + } + } + uint64_t savedRegisters = registers.getSP() + stackSize - 8 - 8 * regCount; + for (uint32_t i = 0; i < regCount; ++i) { + switch (registersSaved[i]) { + case UNWIND_X86_64_REG_RBX: + registers.setRBX(addressSpace.get64(savedRegisters), savedRegisters); + break; + case UNWIND_X86_64_REG_R12: + registers.setR12(addressSpace.get64(savedRegisters), savedRegisters); + break; + case UNWIND_X86_64_REG_R13: + registers.setR13(addressSpace.get64(savedRegisters), savedRegisters); + break; + case UNWIND_X86_64_REG_R14: + registers.setR14(addressSpace.get64(savedRegisters), savedRegisters); + break; + case UNWIND_X86_64_REG_R15: + registers.setR15(addressSpace.get64(savedRegisters), savedRegisters); + break; + case UNWIND_X86_64_REG_RBP: + registers.setRBP(addressSpace.get64(savedRegisters), savedRegisters); + break; + default: + _LIBUNWIND_DEBUG_LOG("bad register for frameless, encoding=%08X for " + "function starting at 0x%llX", + encoding, functionStart); + _LIBUNWIND_ABORT("invalid compact unwind encoding"); + } + savedRegisters += 8; + } + framelessUnwind(addressSpace, savedRegisters, registers); + return UNW_STEP_SUCCESS; +} + + +template +void CompactUnwinder_x86_64::frameUnwind(A &addressSpace, + Registers_x86_64 ®isters) { + uint64_t rbp = registers.getRBP(); + // ebp points to old ebp + registers.setRBP(addressSpace.get64(rbp), rbp); + // old esp is ebp less saved ebp and return address + registers.setSP(rbp + 16, 0); + // pop return address into eip + registers.setIP(addressSpace.get64(rbp + 8), rbp + 8); +} + +template +void CompactUnwinder_x86_64::framelessUnwind(A &addressSpace, + uint64_t returnAddressLocation, + Registers_x86_64 ®isters) { + // return address is on stack after last saved register + registers.setIP(addressSpace.get64(returnAddressLocation), returnAddressLocation); + // old esp is before return address + registers.setSP(returnAddressLocation + 8, 0); +} +#endif // _LIBUNWIND_TARGET_X86_64 + + + +#if defined(_LIBUNWIND_TARGET_AARCH64) +/// CompactUnwinder_arm64 uses a compact unwind info to virtually "step" (aka +/// unwind) by modifying a Registers_arm64 register set +template +class CompactUnwinder_arm64 { +public: + + static int stepWithCompactEncoding(compact_unwind_encoding_t compactEncoding, + uint64_t functionStart, A &addressSpace, + Registers_arm64 ®isters); + +private: + typename A::pint_t pint_t; + + static int + stepWithCompactEncodingFrame(compact_unwind_encoding_t compactEncoding, + uint64_t functionStart, A &addressSpace, + Registers_arm64 ®isters); + static int stepWithCompactEncodingFrameless( + compact_unwind_encoding_t compactEncoding, uint64_t functionStart, + A &addressSpace, Registers_arm64 ®isters); +}; + +template +int CompactUnwinder_arm64::stepWithCompactEncoding( + compact_unwind_encoding_t compactEncoding, uint64_t functionStart, + A &addressSpace, Registers_arm64 ®isters) { + switch (compactEncoding & UNWIND_ARM64_MODE_MASK) { + case UNWIND_ARM64_MODE_FRAME: + return stepWithCompactEncodingFrame(compactEncoding, functionStart, + addressSpace, registers); + case UNWIND_ARM64_MODE_FRAMELESS: + return stepWithCompactEncodingFrameless(compactEncoding, functionStart, + addressSpace, registers); + } + _LIBUNWIND_ABORT("invalid compact unwind encoding"); +} + +template +int CompactUnwinder_arm64::stepWithCompactEncodingFrameless( + compact_unwind_encoding_t encoding, uint64_t, A &addressSpace, + Registers_arm64 ®isters) { + uint32_t stackSize = + 16 * EXTRACT_BITS(encoding, UNWIND_ARM64_FRAMELESS_STACK_SIZE_MASK); + + uint64_t savedRegisterLoc = registers.getSP() + stackSize; + + if (encoding & UNWIND_ARM64_FRAME_X19_X20_PAIR) { + registers.setRegister(UNW_ARM64_X19, addressSpace.get64(savedRegisterLoc), savedRegisterLoc); + savedRegisterLoc -= 8; + registers.setRegister(UNW_ARM64_X20, addressSpace.get64(savedRegisterLoc), savedRegisterLoc); + savedRegisterLoc -= 8; + } + if (encoding & UNWIND_ARM64_FRAME_X21_X22_PAIR) { + registers.setRegister(UNW_ARM64_X21, addressSpace.get64(savedRegisterLoc), savedRegisterLoc); + savedRegisterLoc -= 8; + registers.setRegister(UNW_ARM64_X22, addressSpace.get64(savedRegisterLoc), savedRegisterLoc); + savedRegisterLoc -= 8; + } + if (encoding & UNWIND_ARM64_FRAME_X23_X24_PAIR) { + registers.setRegister(UNW_ARM64_X23, addressSpace.get64(savedRegisterLoc), savedRegisterLoc); + savedRegisterLoc -= 8; + registers.setRegister(UNW_ARM64_X24, addressSpace.get64(savedRegisterLoc), savedRegisterLoc); + savedRegisterLoc -= 8; + } + if (encoding & UNWIND_ARM64_FRAME_X25_X26_PAIR) { + registers.setRegister(UNW_ARM64_X25, addressSpace.get64(savedRegisterLoc), savedRegisterLoc); + savedRegisterLoc -= 8; + registers.setRegister(UNW_ARM64_X26, addressSpace.get64(savedRegisterLoc), savedRegisterLoc); + savedRegisterLoc -= 8; + } + if (encoding & UNWIND_ARM64_FRAME_X27_X28_PAIR) { + registers.setRegister(UNW_ARM64_X27, addressSpace.get64(savedRegisterLoc), savedRegisterLoc); + savedRegisterLoc -= 8; + registers.setRegister(UNW_ARM64_X28, addressSpace.get64(savedRegisterLoc), savedRegisterLoc); + savedRegisterLoc -= 8; + } + + if (encoding & UNWIND_ARM64_FRAME_D8_D9_PAIR) { + registers.setFloatRegister(UNW_ARM64_D8, + addressSpace.getDouble(savedRegisterLoc), savedRegisterLoc); + savedRegisterLoc -= 8; + registers.setFloatRegister(UNW_ARM64_D9, + addressSpace.getDouble(savedRegisterLoc), savedRegisterLoc); + savedRegisterLoc -= 8; + } + if (encoding & UNWIND_ARM64_FRAME_D10_D11_PAIR) { + registers.setFloatRegister(UNW_ARM64_D10, + addressSpace.getDouble(savedRegisterLoc), savedRegisterLoc); + savedRegisterLoc -= 8; + registers.setFloatRegister(UNW_ARM64_D11, + addressSpace.getDouble(savedRegisterLoc), savedRegisterLoc); + savedRegisterLoc -= 8; + } + if (encoding & UNWIND_ARM64_FRAME_D12_D13_PAIR) { + registers.setFloatRegister(UNW_ARM64_D12, + addressSpace.getDouble(savedRegisterLoc), savedRegisterLoc); + savedRegisterLoc -= 8; + registers.setFloatRegister(UNW_ARM64_D13, + addressSpace.getDouble(savedRegisterLoc), savedRegisterLoc); + savedRegisterLoc -= 8; + } + if (encoding & UNWIND_ARM64_FRAME_D14_D15_PAIR) { + registers.setFloatRegister(UNW_ARM64_D14, + addressSpace.getDouble(savedRegisterLoc), savedRegisterLoc); + savedRegisterLoc -= 8; + registers.setFloatRegister(UNW_ARM64_D15, + addressSpace.getDouble(savedRegisterLoc), savedRegisterLoc); + savedRegisterLoc -= 8; + } + + // subtract stack size off of sp + registers.setSP(savedRegisterLoc, 0); + + // set pc to be value in lr + registers.setIP(registers.getRegister(UNW_ARM64_LR), 0); + + return UNW_STEP_SUCCESS; +} + +template +int CompactUnwinder_arm64::stepWithCompactEncodingFrame( + compact_unwind_encoding_t encoding, uint64_t, A &addressSpace, + Registers_arm64 ®isters) { + uint64_t savedRegisterLoc = registers.getFP() - 8; + + if (encoding & UNWIND_ARM64_FRAME_X19_X20_PAIR) { + registers.setRegister(UNW_ARM64_X19, addressSpace.get64(savedRegisterLoc), savedRegisterLoc); + savedRegisterLoc -= 8; + registers.setRegister(UNW_ARM64_X20, addressSpace.get64(savedRegisterLoc), savedRegisterLoc); + savedRegisterLoc -= 8; + } + if (encoding & UNWIND_ARM64_FRAME_X21_X22_PAIR) { + registers.setRegister(UNW_ARM64_X21, addressSpace.get64(savedRegisterLoc), savedRegisterLoc); + savedRegisterLoc -= 8; + registers.setRegister(UNW_ARM64_X22, addressSpace.get64(savedRegisterLoc), savedRegisterLoc); + savedRegisterLoc -= 8; + } + if (encoding & UNWIND_ARM64_FRAME_X23_X24_PAIR) { + registers.setRegister(UNW_ARM64_X23, addressSpace.get64(savedRegisterLoc), savedRegisterLoc); + savedRegisterLoc -= 8; + registers.setRegister(UNW_ARM64_X24, addressSpace.get64(savedRegisterLoc), savedRegisterLoc); + savedRegisterLoc -= 8; + } + if (encoding & UNWIND_ARM64_FRAME_X25_X26_PAIR) { + registers.setRegister(UNW_ARM64_X25, addressSpace.get64(savedRegisterLoc), savedRegisterLoc); + savedRegisterLoc -= 8; + registers.setRegister(UNW_ARM64_X26, addressSpace.get64(savedRegisterLoc), savedRegisterLoc); + savedRegisterLoc -= 8; + } + if (encoding & UNWIND_ARM64_FRAME_X27_X28_PAIR) { + registers.setRegister(UNW_ARM64_X27, addressSpace.get64(savedRegisterLoc), savedRegisterLoc); + savedRegisterLoc -= 8; + registers.setRegister(UNW_ARM64_X28, addressSpace.get64(savedRegisterLoc), savedRegisterLoc); + savedRegisterLoc -= 8; + } + + if (encoding & UNWIND_ARM64_FRAME_D8_D9_PAIR) { + registers.setFloatRegister(UNW_ARM64_D8, + addressSpace.getDouble(savedRegisterLoc)); + savedRegisterLoc -= 8; + registers.setFloatRegister(UNW_ARM64_D9, + addressSpace.getDouble(savedRegisterLoc)); + savedRegisterLoc -= 8; + } + if (encoding & UNWIND_ARM64_FRAME_D10_D11_PAIR) { + registers.setFloatRegister(UNW_ARM64_D10, + addressSpace.getDouble(savedRegisterLoc)); + savedRegisterLoc -= 8; + registers.setFloatRegister(UNW_ARM64_D11, + addressSpace.getDouble(savedRegisterLoc)); + savedRegisterLoc -= 8; + } + if (encoding & UNWIND_ARM64_FRAME_D12_D13_PAIR) { + registers.setFloatRegister(UNW_ARM64_D12, + addressSpace.getDouble(savedRegisterLoc)); + savedRegisterLoc -= 8; + registers.setFloatRegister(UNW_ARM64_D13, + addressSpace.getDouble(savedRegisterLoc)); + savedRegisterLoc -= 8; + } + if (encoding & UNWIND_ARM64_FRAME_D14_D15_PAIR) { + registers.setFloatRegister(UNW_ARM64_D14, + addressSpace.getDouble(savedRegisterLoc)); + savedRegisterLoc -= 8; + registers.setFloatRegister(UNW_ARM64_D15, + addressSpace.getDouble(savedRegisterLoc)); + savedRegisterLoc -= 8; + } + + uint64_t fp = registers.getFP(); + // fp points to old fp + registers.setFP(addressSpace.get64(fp), fp); + // old sp is fp less saved fp and lr + registers.setSP(fp + 16, 0); + // pop return address into pc + registers.setIP(addressSpace.get64(fp + 8), fp + 8); + + return UNW_STEP_SUCCESS; +} +#endif // _LIBUNWIND_TARGET_AARCH64 + + +} // namespace libunwind + +#endif // __COMPACT_UNWINDER_HPP__ diff --git a/src/coreclr/src/nativeaot/libunwind/src/DwarfInstructions.hpp b/src/coreclr/src/nativeaot/libunwind/src/DwarfInstructions.hpp new file mode 100644 index 0000000000000..c5cc6c9d5107e --- /dev/null +++ b/src/coreclr/src/nativeaot/libunwind/src/DwarfInstructions.hpp @@ -0,0 +1,831 @@ +//===-------------------------- DwarfInstructions.hpp ---------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// +// Processor specific interpretation of DWARF unwind info. +// +//===----------------------------------------------------------------------===// + +#ifndef __DWARF_INSTRUCTIONS_HPP__ +#define __DWARF_INSTRUCTIONS_HPP__ + +#include +#include +#include + +#include "dwarf2.h" +#include "Registers.hpp" +#include "DwarfParser.hpp" +#include "config.h" + + +namespace libunwind { + + +/// DwarfInstructions maps abtract DWARF unwind instructions to a particular +/// architecture +template +class DwarfInstructions { +public: + typedef typename A::pint_t pint_t; + typedef typename A::sint_t sint_t; + + static int stepWithDwarf(A &addressSpace, pint_t pc, pint_t fdeStart, + R ®isters); + +private: + + enum { + DW_X86_64_RET_ADDR = 16 + }; + + enum { + DW_X86_RET_ADDR = 8 + }; + + typedef typename CFI_Parser::RegisterLocation RegisterLocation; + typedef typename CFI_Parser::PrologInfo PrologInfo; + typedef typename CFI_Parser::FDE_Info FDE_Info; + typedef typename CFI_Parser::CIE_Info CIE_Info; + + static pint_t evaluateExpression(pint_t expression, A &addressSpace, + const R ®isters, + pint_t initialStackValue); + static pint_t getSavedRegister(A &addressSpace, const R ®isters, + pint_t cfa, const RegisterLocation &savedReg, + pint_t& location); + static double getSavedFloatRegister(A &addressSpace, const R ®isters, + pint_t cfa, const RegisterLocation &savedReg); + static v128 getSavedVectorRegister(A &addressSpace, const R ®isters, + pint_t cfa, const RegisterLocation &savedReg); + + static pint_t getCFA(A &addressSpace, const PrologInfo &prolog, + const R ®isters) { + if (prolog.cfaRegister != 0) + return (pint_t)((sint_t)registers.getRegister((int)prolog.cfaRegister) + + prolog.cfaRegisterOffset); + if (prolog.cfaExpression != 0) + return evaluateExpression((pint_t)prolog.cfaExpression, addressSpace, + registers, 0); + assert(0 && "getCFA(): unknown location"); + __builtin_unreachable(); + } +}; + + +template +typename A::pint_t DwarfInstructions::getSavedRegister( + A &addressSpace, const R ®isters, pint_t cfa, + const RegisterLocation &savedReg, + typename A::pint_t& location) { + switch (savedReg.location) { + case CFI_Parser::kRegisterInCFA: + location = cfa + (pint_t)savedReg.value; + return (pint_t)addressSpace.getP(location); + + case CFI_Parser::kRegisterAtExpression: + location = evaluateExpression((pint_t)savedReg.value, addressSpace, + registers, cfa); + return (pint_t)addressSpace.getP(location); + + case CFI_Parser::kRegisterIsExpression: + location = 0; + return evaluateExpression((pint_t)savedReg.value, addressSpace, + registers, cfa); + case CFI_Parser::kRegisterInRegister: + location = 0; + return registers.getRegister((int)savedReg.value); + + case CFI_Parser::kRegisterUnused: + case CFI_Parser::kRegisterOffsetFromCFA: + // FIX ME + break; + } + _LIBUNWIND_ABORT("unsupported restore location for register"); +} + +template +double DwarfInstructions::getSavedFloatRegister( + A &addressSpace, const R ®isters, pint_t cfa, + const RegisterLocation &savedReg) { + switch (savedReg.location) { + case CFI_Parser::kRegisterInCFA: + return addressSpace.getDouble(cfa + (pint_t)savedReg.value); + + case CFI_Parser::kRegisterAtExpression: + return addressSpace.getDouble( + evaluateExpression((pint_t)savedReg.value, addressSpace, + registers, cfa)); + + case CFI_Parser::kRegisterIsExpression: + case CFI_Parser::kRegisterUnused: + case CFI_Parser::kRegisterOffsetFromCFA: + case CFI_Parser::kRegisterInRegister: + // FIX ME + break; + } + _LIBUNWIND_ABORT("unsupported restore location for float register"); +} + +template +v128 DwarfInstructions::getSavedVectorRegister( + A &addressSpace, const R ®isters, pint_t cfa, + const RegisterLocation &savedReg) { + switch (savedReg.location) { + case CFI_Parser::kRegisterInCFA: + return addressSpace.getVector(cfa + (pint_t)savedReg.value); + + case CFI_Parser::kRegisterAtExpression: + return addressSpace.getVector( + evaluateExpression((pint_t)savedReg.value, addressSpace, + registers, cfa)); + + case CFI_Parser::kRegisterIsExpression: + case CFI_Parser::kRegisterUnused: + case CFI_Parser::kRegisterOffsetFromCFA: + case CFI_Parser::kRegisterInRegister: + // FIX ME + break; + } + _LIBUNWIND_ABORT("unsupported restore location for vector register"); +} + +template +int DwarfInstructions::stepWithDwarf(A &addressSpace, pint_t pc, + pint_t fdeStart, R ®isters) { + FDE_Info fdeInfo; + CIE_Info cieInfo; + if (CFI_Parser::decodeFDE(addressSpace, fdeStart, &fdeInfo, + &cieInfo) == NULL) { + PrologInfo prolog; + if (CFI_Parser::parseFDEInstructions(addressSpace, fdeInfo, cieInfo, pc, + R::getArch(), &prolog)) { + // get pointer to cfa (architecture specific) + pint_t cfa = getCFA(addressSpace, prolog, registers); + + // restore registers that DWARF says were saved + R newRegisters = registers; + pint_t returnAddress = 0; + const int lastReg = R::lastDwarfRegNum(); + assert(static_cast(CFI_Parser::kMaxRegisterNumber) >= lastReg && + "register range too large"); + assert(lastReg >= (int)cieInfo.returnAddressRegister && + "register range does not contain return address register"); + for (int i = 0; i <= lastReg; ++i) { + if (prolog.savedRegisters[i].location != + CFI_Parser::kRegisterUnused) { + if (registers.validFloatRegister(i)) + newRegisters.setFloatRegister( + i, getSavedFloatRegister(addressSpace, registers, cfa, + prolog.savedRegisters[i])); + else if (registers.validVectorRegister(i)) + newRegisters.setVectorRegister( + i, getSavedVectorRegister(addressSpace, registers, cfa, + prolog.savedRegisters[i])); + else if (i == (int)cieInfo.returnAddressRegister) { + pint_t dummyLocation; + returnAddress = getSavedRegister(addressSpace, registers, cfa, + prolog.savedRegisters[i], + dummyLocation); + } + else if (registers.validRegister(i)) { + pint_t value; + pint_t location; + value = getSavedRegister(addressSpace, registers, cfa, + prolog.savedRegisters[i], + location); + + newRegisters.setRegister(i, value, location); + } + else + return UNW_EBADREG; + } + } + + // By definition, the CFA is the stack pointer at the call site, so + // restoring SP means setting it to CFA. + newRegisters.setSP(cfa, 0); + +#if defined(_LIBUNWIND_TARGET_AARCH64) + // If the target is aarch64 then the return address may have been signed + // using the v8.3 pointer authentication extensions. The original + // return address needs to be authenticated before the return address is + // restored. autia1716 is used instead of autia as autia1716 assembles + // to a NOP on pre-v8.3a architectures. + if ((R::getArch() == REGISTERS_ARM64) && + prolog.savedRegisters[UNW_ARM64_RA_SIGN_STATE].value) { +#if !defined(_LIBUNWIND_IS_NATIVE_ONLY) + return UNW_ECROSSRASIGNING; +#else + register unsigned long long x17 __asm("x17") = returnAddress; + register unsigned long long x16 __asm("x16") = cfa; + + // These are the autia1716/autib1716 instructions. The hint instructions + // are used here as gcc does not assemble autia1716/autib1716 for pre + // armv8.3a targets. + if (cieInfo.addressesSignedWithBKey) + asm("hint 0xe" : "+r"(x17) : "r"(x16)); // autib1716 + else + asm("hint 0xc" : "+r"(x17) : "r"(x16)); // autia1716 + returnAddress = x17; +#endif + } +#endif + +#if defined(_LIBUNWIND_TARGET_SPARC) + if (R::getArch() == REGISTERS_SPARC) { + // Skip call site instruction and delay slot + returnAddress += 8; + // Skip unimp instruction if function returns a struct + if ((addressSpace.get32(returnAddress) & 0xC1C00000) == 0) + returnAddress += 4; + } +#endif + +#if defined(_LIBUNWIND_TARGET_PPC64) +#define PPC64_ELFV1_R2_LOAD_INST_ENCODING 0xe8410028u // ld r2,40(r1) +#define PPC64_ELFV1_R2_OFFSET 40 +#define PPC64_ELFV2_R2_LOAD_INST_ENCODING 0xe8410018u // ld r2,24(r1) +#define PPC64_ELFV2_R2_OFFSET 24 + // If the instruction at return address is a TOC (r2) restore, + // then r2 was saved and needs to be restored. + // ELFv2 ABI specifies that the TOC Pointer must be saved at SP + 24, + // while in ELFv1 ABI it is saved at SP + 40. + if (R::getArch() == REGISTERS_PPC64 && returnAddress != 0) { + pint_t sp = newRegisters.getRegister(UNW_REG_SP); + pint_t r2 = 0; + switch (addressSpace.get32(returnAddress)) { + case PPC64_ELFV1_R2_LOAD_INST_ENCODING: + r2 = addressSpace.get64(sp + PPC64_ELFV1_R2_OFFSET); + break; + case PPC64_ELFV2_R2_LOAD_INST_ENCODING: + r2 = addressSpace.get64(sp + PPC64_ELFV2_R2_OFFSET); + break; + } + if (r2) + newRegisters.setRegister(UNW_PPC64_R2, r2); + } +#endif + + // Return address is address after call site instruction, so setting IP to + // that does simualates a return. + newRegisters.setIP(returnAddress, 0); + + // Simulate the step by replacing the register set with the new ones. + registers = newRegisters; + + return UNW_STEP_SUCCESS; + } + } + return UNW_EBADFRAME; +} + +template +typename A::pint_t +DwarfInstructions::evaluateExpression(pint_t expression, A &addressSpace, + const R ®isters, + pint_t initialStackValue) { + const bool log = false; + pint_t p = expression; + pint_t expressionEnd = expression + 20; // temp, until len read + pint_t length = (pint_t)addressSpace.getULEB128(p, expressionEnd); + expressionEnd = p + length; + if (log) + fprintf(stderr, "evaluateExpression(): length=%" PRIu64 "\n", + (uint64_t)length); + pint_t stack[100]; + pint_t *sp = stack; + *(++sp) = initialStackValue; + + while (p < expressionEnd) { + if (log) { + for (pint_t *t = sp; t > stack; --t) { + fprintf(stderr, "sp[] = 0x%" PRIx64 "\n", (uint64_t)(*t)); + } + } + uint8_t opcode = addressSpace.get8(p++); + sint_t svalue, svalue2; + pint_t value; + uint32_t reg; + switch (opcode) { + case DW_OP_addr: + // push immediate address sized value + value = addressSpace.getP(p); + p += sizeof(pint_t); + *(++sp) = value; + if (log) + fprintf(stderr, "push 0x%" PRIx64 "\n", (uint64_t)value); + break; + + case DW_OP_deref: + // pop stack, dereference, push result + value = *sp--; + *(++sp) = addressSpace.getP(value); + if (log) + fprintf(stderr, "dereference 0x%" PRIx64 "\n", (uint64_t)value); + break; + + case DW_OP_const1u: + // push immediate 1 byte value + value = addressSpace.get8(p); + p += 1; + *(++sp) = value; + if (log) + fprintf(stderr, "push 0x%" PRIx64 "\n", (uint64_t)value); + break; + + case DW_OP_const1s: + // push immediate 1 byte signed value + svalue = (int8_t) addressSpace.get8(p); + p += 1; + *(++sp) = (pint_t)svalue; + if (log) + fprintf(stderr, "push 0x%" PRIx64 "\n", (uint64_t)svalue); + break; + + case DW_OP_const2u: + // push immediate 2 byte value + value = addressSpace.get16(p); + p += 2; + *(++sp) = value; + if (log) + fprintf(stderr, "push 0x%" PRIx64 "\n", (uint64_t)value); + break; + + case DW_OP_const2s: + // push immediate 2 byte signed value + svalue = (int16_t) addressSpace.get16(p); + p += 2; + *(++sp) = (pint_t)svalue; + if (log) + fprintf(stderr, "push 0x%" PRIx64 "\n", (uint64_t)svalue); + break; + + case DW_OP_const4u: + // push immediate 4 byte value + value = addressSpace.get32(p); + p += 4; + *(++sp) = value; + if (log) + fprintf(stderr, "push 0x%" PRIx64 "\n", (uint64_t)value); + break; + + case DW_OP_const4s: + // push immediate 4 byte signed value + svalue = (int32_t)addressSpace.get32(p); + p += 4; + *(++sp) = (pint_t)svalue; + if (log) + fprintf(stderr, "push 0x%" PRIx64 "\n", (uint64_t)svalue); + break; + + case DW_OP_const8u: + // push immediate 8 byte value + value = (pint_t)addressSpace.get64(p); + p += 8; + *(++sp) = value; + if (log) + fprintf(stderr, "push 0x%" PRIx64 "\n", (uint64_t)value); + break; + + case DW_OP_const8s: + // push immediate 8 byte signed value + value = (pint_t)addressSpace.get64(p); + p += 8; + *(++sp) = value; + if (log) + fprintf(stderr, "push 0x%" PRIx64 "\n", (uint64_t)value); + break; + + case DW_OP_constu: + // push immediate ULEB128 value + value = (pint_t)addressSpace.getULEB128(p, expressionEnd); + *(++sp) = value; + if (log) + fprintf(stderr, "push 0x%" PRIx64 "\n", (uint64_t)value); + break; + + case DW_OP_consts: + // push immediate SLEB128 value + svalue = (sint_t)addressSpace.getSLEB128(p, expressionEnd); + *(++sp) = (pint_t)svalue; + if (log) + fprintf(stderr, "push 0x%" PRIx64 "\n", (uint64_t)svalue); + break; + + case DW_OP_dup: + // push top of stack + value = *sp; + *(++sp) = value; + if (log) + fprintf(stderr, "duplicate top of stack\n"); + break; + + case DW_OP_drop: + // pop + --sp; + if (log) + fprintf(stderr, "pop top of stack\n"); + break; + + case DW_OP_over: + // dup second + value = sp[-1]; + *(++sp) = value; + if (log) + fprintf(stderr, "duplicate second in stack\n"); + break; + + case DW_OP_pick: + // pick from + reg = addressSpace.get8(p); + p += 1; + value = sp[-reg]; + *(++sp) = value; + if (log) + fprintf(stderr, "duplicate %d in stack\n", reg); + break; + + case DW_OP_swap: + // swap top two + value = sp[0]; + sp[0] = sp[-1]; + sp[-1] = value; + if (log) + fprintf(stderr, "swap top of stack\n"); + break; + + case DW_OP_rot: + // rotate top three + value = sp[0]; + sp[0] = sp[-1]; + sp[-1] = sp[-2]; + sp[-2] = value; + if (log) + fprintf(stderr, "rotate top three of stack\n"); + break; + + case DW_OP_xderef: + // pop stack, dereference, push result + value = *sp--; + *sp = *((pint_t*)value); + if (log) + fprintf(stderr, "x-dereference 0x%" PRIx64 "\n", (uint64_t)value); + break; + + case DW_OP_abs: + svalue = (sint_t)*sp; + if (svalue < 0) + *sp = (pint_t)(-svalue); + if (log) + fprintf(stderr, "abs\n"); + break; + + case DW_OP_and: + value = *sp--; + *sp &= value; + if (log) + fprintf(stderr, "and\n"); + break; + + case DW_OP_div: + svalue = (sint_t)(*sp--); + svalue2 = (sint_t)*sp; + *sp = (pint_t)(svalue2 / svalue); + if (log) + fprintf(stderr, "div\n"); + break; + + case DW_OP_minus: + value = *sp--; + *sp = *sp - value; + if (log) + fprintf(stderr, "minus\n"); + break; + + case DW_OP_mod: + svalue = (sint_t)(*sp--); + svalue2 = (sint_t)*sp; + *sp = (pint_t)(svalue2 % svalue); + if (log) + fprintf(stderr, "module\n"); + break; + + case DW_OP_mul: + svalue = (sint_t)(*sp--); + svalue2 = (sint_t)*sp; + *sp = (pint_t)(svalue2 * svalue); + if (log) + fprintf(stderr, "mul\n"); + break; + + case DW_OP_neg: + *sp = 0 - *sp; + if (log) + fprintf(stderr, "neg\n"); + break; + + case DW_OP_not: + svalue = (sint_t)(*sp); + *sp = (pint_t)(~svalue); + if (log) + fprintf(stderr, "not\n"); + break; + + case DW_OP_or: + value = *sp--; + *sp |= value; + if (log) + fprintf(stderr, "or\n"); + break; + + case DW_OP_plus: + value = *sp--; + *sp += value; + if (log) + fprintf(stderr, "plus\n"); + break; + + case DW_OP_plus_uconst: + // pop stack, add uelb128 constant, push result + *sp += static_cast(addressSpace.getULEB128(p, expressionEnd)); + if (log) + fprintf(stderr, "add constant\n"); + break; + + case DW_OP_shl: + value = *sp--; + *sp = *sp << value; + if (log) + fprintf(stderr, "shift left\n"); + break; + + case DW_OP_shr: + value = *sp--; + *sp = *sp >> value; + if (log) + fprintf(stderr, "shift left\n"); + break; + + case DW_OP_shra: + value = *sp--; + svalue = (sint_t)*sp; + *sp = (pint_t)(svalue >> value); + if (log) + fprintf(stderr, "shift left arithmetric\n"); + break; + + case DW_OP_xor: + value = *sp--; + *sp ^= value; + if (log) + fprintf(stderr, "xor\n"); + break; + + case DW_OP_skip: + svalue = (int16_t) addressSpace.get16(p); + p += 2; + p = (pint_t)((sint_t)p + svalue); + if (log) + fprintf(stderr, "skip %" PRIu64 "\n", (uint64_t)svalue); + break; + + case DW_OP_bra: + svalue = (int16_t) addressSpace.get16(p); + p += 2; + if (*sp--) + p = (pint_t)((sint_t)p + svalue); + if (log) + fprintf(stderr, "bra %" PRIu64 "\n", (uint64_t)svalue); + break; + + case DW_OP_eq: + value = *sp--; + *sp = (*sp == value); + if (log) + fprintf(stderr, "eq\n"); + break; + + case DW_OP_ge: + value = *sp--; + *sp = (*sp >= value); + if (log) + fprintf(stderr, "ge\n"); + break; + + case DW_OP_gt: + value = *sp--; + *sp = (*sp > value); + if (log) + fprintf(stderr, "gt\n"); + break; + + case DW_OP_le: + value = *sp--; + *sp = (*sp <= value); + if (log) + fprintf(stderr, "le\n"); + break; + + case DW_OP_lt: + value = *sp--; + *sp = (*sp < value); + if (log) + fprintf(stderr, "lt\n"); + break; + + case DW_OP_ne: + value = *sp--; + *sp = (*sp != value); + if (log) + fprintf(stderr, "ne\n"); + break; + + case DW_OP_lit0: + case DW_OP_lit1: + case DW_OP_lit2: + case DW_OP_lit3: + case DW_OP_lit4: + case DW_OP_lit5: + case DW_OP_lit6: + case DW_OP_lit7: + case DW_OP_lit8: + case DW_OP_lit9: + case DW_OP_lit10: + case DW_OP_lit11: + case DW_OP_lit12: + case DW_OP_lit13: + case DW_OP_lit14: + case DW_OP_lit15: + case DW_OP_lit16: + case DW_OP_lit17: + case DW_OP_lit18: + case DW_OP_lit19: + case DW_OP_lit20: + case DW_OP_lit21: + case DW_OP_lit22: + case DW_OP_lit23: + case DW_OP_lit24: + case DW_OP_lit25: + case DW_OP_lit26: + case DW_OP_lit27: + case DW_OP_lit28: + case DW_OP_lit29: + case DW_OP_lit30: + case DW_OP_lit31: + value = static_cast(opcode - DW_OP_lit0); + *(++sp) = value; + if (log) + fprintf(stderr, "push literal 0x%" PRIx64 "\n", (uint64_t)value); + break; + + case DW_OP_reg0: + case DW_OP_reg1: + case DW_OP_reg2: + case DW_OP_reg3: + case DW_OP_reg4: + case DW_OP_reg5: + case DW_OP_reg6: + case DW_OP_reg7: + case DW_OP_reg8: + case DW_OP_reg9: + case DW_OP_reg10: + case DW_OP_reg11: + case DW_OP_reg12: + case DW_OP_reg13: + case DW_OP_reg14: + case DW_OP_reg15: + case DW_OP_reg16: + case DW_OP_reg17: + case DW_OP_reg18: + case DW_OP_reg19: + case DW_OP_reg20: + case DW_OP_reg21: + case DW_OP_reg22: + case DW_OP_reg23: + case DW_OP_reg24: + case DW_OP_reg25: + case DW_OP_reg26: + case DW_OP_reg27: + case DW_OP_reg28: + case DW_OP_reg29: + case DW_OP_reg30: + case DW_OP_reg31: + reg = static_cast(opcode - DW_OP_reg0); + *(++sp) = registers.getRegister((int)reg); + if (log) + fprintf(stderr, "push reg %d\n", reg); + break; + + case DW_OP_regx: + reg = static_cast(addressSpace.getULEB128(p, expressionEnd)); + *(++sp) = registers.getRegister((int)reg); + if (log) + fprintf(stderr, "push reg %d + 0x%" PRIx64 "\n", reg, (uint64_t)svalue); + break; + + case DW_OP_breg0: + case DW_OP_breg1: + case DW_OP_breg2: + case DW_OP_breg3: + case DW_OP_breg4: + case DW_OP_breg5: + case DW_OP_breg6: + case DW_OP_breg7: + case DW_OP_breg8: + case DW_OP_breg9: + case DW_OP_breg10: + case DW_OP_breg11: + case DW_OP_breg12: + case DW_OP_breg13: + case DW_OP_breg14: + case DW_OP_breg15: + case DW_OP_breg16: + case DW_OP_breg17: + case DW_OP_breg18: + case DW_OP_breg19: + case DW_OP_breg20: + case DW_OP_breg21: + case DW_OP_breg22: + case DW_OP_breg23: + case DW_OP_breg24: + case DW_OP_breg25: + case DW_OP_breg26: + case DW_OP_breg27: + case DW_OP_breg28: + case DW_OP_breg29: + case DW_OP_breg30: + case DW_OP_breg31: + reg = static_cast(opcode - DW_OP_breg0); + svalue = (sint_t)addressSpace.getSLEB128(p, expressionEnd); + svalue += static_cast(registers.getRegister((int)reg)); + *(++sp) = (pint_t)(svalue); + if (log) + fprintf(stderr, "push reg %d + 0x%" PRIx64 "\n", reg, (uint64_t)svalue); + break; + + case DW_OP_bregx: + reg = static_cast(addressSpace.getULEB128(p, expressionEnd)); + svalue = (sint_t)addressSpace.getSLEB128(p, expressionEnd); + svalue += static_cast(registers.getRegister((int)reg)); + *(++sp) = (pint_t)(svalue); + if (log) + fprintf(stderr, "push reg %d + 0x%" PRIx64 "\n", reg, (uint64_t)svalue); + break; + + case DW_OP_fbreg: + _LIBUNWIND_ABORT("DW_OP_fbreg not implemented"); + break; + + case DW_OP_piece: + _LIBUNWIND_ABORT("DW_OP_piece not implemented"); + break; + + case DW_OP_deref_size: + // pop stack, dereference, push result + value = *sp--; + switch (addressSpace.get8(p++)) { + case 1: + value = addressSpace.get8(value); + break; + case 2: + value = addressSpace.get16(value); + break; + case 4: + value = addressSpace.get32(value); + break; + case 8: + value = (pint_t)addressSpace.get64(value); + break; + default: + _LIBUNWIND_ABORT("DW_OP_deref_size with bad size"); + } + *(++sp) = value; + if (log) + fprintf(stderr, "sized dereference 0x%" PRIx64 "\n", (uint64_t)value); + break; + + case DW_OP_xderef_size: + case DW_OP_nop: + case DW_OP_push_object_addres: + case DW_OP_call2: + case DW_OP_call4: + case DW_OP_call_ref: + default: + _LIBUNWIND_ABORT("DWARF opcode not implemented"); + } + + } + if (log) + fprintf(stderr, "expression evaluates to 0x%" PRIx64 "\n", (uint64_t)*sp); + return *sp; +} + + + +} // namespace libunwind + +#endif // __DWARF_INSTRUCTIONS_HPP__ diff --git a/src/coreclr/src/nativeaot/libunwind/src/DwarfParser.hpp b/src/coreclr/src/nativeaot/libunwind/src/DwarfParser.hpp new file mode 100644 index 0000000000000..a2ebf3bb0e189 --- /dev/null +++ b/src/coreclr/src/nativeaot/libunwind/src/DwarfParser.hpp @@ -0,0 +1,766 @@ +//===--------------------------- DwarfParser.hpp --------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// +// Parses DWARF CFIs (FDEs and CIEs). +// +//===----------------------------------------------------------------------===// + +#ifndef __DWARF_PARSER_HPP__ +#define __DWARF_PARSER_HPP__ +#define __STDC_FORMAT_MACROS + +#include +#include +#include +#include + +#include "libunwind.h" +#include "dwarf2.h" +#include "Registers.hpp" + +#include "config.h" + +namespace libunwind { + +/// CFI_Parser does basic parsing of a CFI (Call Frame Information) records. +/// See DWARF Spec for details: +/// http://refspecs.linuxbase.org/LSB_3.1.0/LSB-Core-generic/LSB-Core-generic/ehframechpt.html +/// +template +class CFI_Parser { +public: + typedef typename A::pint_t pint_t; + + /// Information encoded in a CIE (Common Information Entry) + struct CIE_Info { + pint_t cieStart; + pint_t cieLength; + pint_t cieInstructions; + uint8_t pointerEncoding; + uint8_t lsdaEncoding; + uint8_t personalityEncoding; + uint8_t personalityOffsetInCIE; + pint_t personality; + uint32_t codeAlignFactor; + int dataAlignFactor; + bool isSignalFrame; + bool fdesHaveAugmentationData; + uint8_t returnAddressRegister; +#if defined(_LIBUNWIND_TARGET_AARCH64) + bool addressesSignedWithBKey; +#endif + }; + + /// Information about an FDE (Frame Description Entry) + struct FDE_Info { + pint_t fdeStart; + pint_t fdeLength; + pint_t fdeInstructions; + pint_t pcStart; + pint_t pcEnd; + pint_t lsda; + }; + + enum { + kMaxRegisterNumber = _LIBUNWIND_HIGHEST_DWARF_REGISTER + }; + enum RegisterSavedWhere { + kRegisterUnused, + kRegisterInCFA, + kRegisterOffsetFromCFA, + kRegisterInRegister, + kRegisterAtExpression, + kRegisterIsExpression + }; + struct RegisterLocation { + RegisterSavedWhere location; + int64_t value; + }; + /// Information about a frame layout and registers saved determined + /// by "running" the DWARF FDE "instructions" + struct PrologInfo { + uint32_t cfaRegister; + int32_t cfaRegisterOffset; // CFA = (cfaRegister)+cfaRegisterOffset + int64_t cfaExpression; // CFA = expression + uint32_t spExtraArgSize; + uint32_t codeOffsetAtStackDecrement; + bool registersInOtherRegisters; + bool sameValueUsed; + RegisterLocation savedRegisters[kMaxRegisterNumber + 1]; + }; + + struct PrologInfoStackEntry { + PrologInfoStackEntry(PrologInfoStackEntry *n, const PrologInfo &i) + : next(n), info(i) {} + PrologInfoStackEntry *next; + PrologInfo info; + }; + + static bool findFDE(A &addressSpace, pint_t pc, pint_t ehSectionStart, + uint32_t sectionLength, pint_t fdeHint, FDE_Info *fdeInfo, + CIE_Info *cieInfo); + static const char *decodeFDE(A &addressSpace, pint_t fdeStart, + FDE_Info *fdeInfo, CIE_Info *cieInfo); + static bool parseFDEInstructions(A &addressSpace, const FDE_Info &fdeInfo, + const CIE_Info &cieInfo, pint_t upToPC, + int arch, PrologInfo *results); + + static const char *parseCIE(A &addressSpace, pint_t cie, CIE_Info *cieInfo); + +private: + static bool parseInstructions(A &addressSpace, pint_t instructions, + pint_t instructionsEnd, const CIE_Info &cieInfo, + pint_t pcoffset, + PrologInfoStackEntry *&rememberStack, int arch, + PrologInfo *results); +}; + +/// Parse a FDE into a CIE_Info and an FDE_Info +template +const char *CFI_Parser::decodeFDE(A &addressSpace, pint_t fdeStart, + FDE_Info *fdeInfo, CIE_Info *cieInfo) { + pint_t p = fdeStart; + pint_t cfiLength = (pint_t)addressSpace.get32(p); + p += 4; + if (cfiLength == 0xffffffff) { + // 0xffffffff means length is really next 8 bytes + cfiLength = (pint_t)addressSpace.get64(p); + p += 8; + } + if (cfiLength == 0) + return "FDE has zero length"; // end marker + uint32_t ciePointer = addressSpace.get32(p); + if (ciePointer == 0) + return "FDE is really a CIE"; // this is a CIE not an FDE + pint_t nextCFI = p + cfiLength; + pint_t cieStart = p - ciePointer; + const char *err = parseCIE(addressSpace, cieStart, cieInfo); + if (err != NULL) + return err; + p += 4; + // Parse pc begin and range. + pint_t pcStart = + addressSpace.getEncodedP(p, nextCFI, cieInfo->pointerEncoding); + pint_t pcRange = + addressSpace.getEncodedP(p, nextCFI, cieInfo->pointerEncoding & 0x0F); + // Parse rest of info. + fdeInfo->lsda = 0; + // Check for augmentation length. + if (cieInfo->fdesHaveAugmentationData) { + pint_t augLen = (pint_t)addressSpace.getULEB128(p, nextCFI); + pint_t endOfAug = p + augLen; + if (cieInfo->lsdaEncoding != DW_EH_PE_omit) { + // Peek at value (without indirection). Zero means no LSDA. + pint_t lsdaStart = p; + if (addressSpace.getEncodedP(p, nextCFI, cieInfo->lsdaEncoding & 0x0F) != + 0) { + // Reset pointer and re-parse LSDA address. + p = lsdaStart; + fdeInfo->lsda = + addressSpace.getEncodedP(p, nextCFI, cieInfo->lsdaEncoding); + } + } + p = endOfAug; + } + fdeInfo->fdeStart = fdeStart; + fdeInfo->fdeLength = nextCFI - fdeStart; + fdeInfo->fdeInstructions = p; + fdeInfo->pcStart = pcStart; + fdeInfo->pcEnd = pcStart + pcRange; + return NULL; // success +} + +/// Scan an eh_frame section to find an FDE for a pc +template +bool CFI_Parser::findFDE(A &addressSpace, pint_t pc, pint_t ehSectionStart, + uint32_t sectionLength, pint_t fdeHint, + FDE_Info *fdeInfo, CIE_Info *cieInfo) { + //fprintf(stderr, "findFDE(0x%llX)\n", (long long)pc); + pint_t p = (fdeHint != 0) ? fdeHint : ehSectionStart; + const pint_t ehSectionEnd = p + sectionLength; + while (p < ehSectionEnd) { + pint_t currentCFI = p; + //fprintf(stderr, "findFDE() CFI at 0x%llX\n", (long long)p); + pint_t cfiLength = addressSpace.get32(p); + p += 4; + if (cfiLength == 0xffffffff) { + // 0xffffffff means length is really next 8 bytes + cfiLength = (pint_t)addressSpace.get64(p); + p += 8; + } + if (cfiLength == 0) + return false; // end marker + uint32_t id = addressSpace.get32(p); + if (id == 0) { + // Skip over CIEs. + p += cfiLength; + } else { + // Process FDE to see if it covers pc. + pint_t nextCFI = p + cfiLength; + uint32_t ciePointer = addressSpace.get32(p); + pint_t cieStart = p - ciePointer; + // Validate pointer to CIE is within section. + if ((ehSectionStart <= cieStart) && (cieStart < ehSectionEnd)) { + if (parseCIE(addressSpace, cieStart, cieInfo) == NULL) { + p += 4; + // Parse pc begin and range. + pint_t pcStart = + addressSpace.getEncodedP(p, nextCFI, cieInfo->pointerEncoding); + pint_t pcRange = addressSpace.getEncodedP( + p, nextCFI, cieInfo->pointerEncoding & 0x0F); + // Test if pc is within the function this FDE covers. + if ((pcStart < pc) && (pc <= pcStart + pcRange)) { + // parse rest of info + fdeInfo->lsda = 0; + // check for augmentation length + if (cieInfo->fdesHaveAugmentationData) { + pint_t augLen = (pint_t)addressSpace.getULEB128(p, nextCFI); + pint_t endOfAug = p + augLen; + if (cieInfo->lsdaEncoding != DW_EH_PE_omit) { + // Peek at value (without indirection). Zero means no LSDA. + pint_t lsdaStart = p; + if (addressSpace.getEncodedP( + p, nextCFI, cieInfo->lsdaEncoding & 0x0F) != 0) { + // Reset pointer and re-parse LSDA address. + p = lsdaStart; + fdeInfo->lsda = addressSpace + .getEncodedP(p, nextCFI, cieInfo->lsdaEncoding); + } + } + p = endOfAug; + } + fdeInfo->fdeStart = currentCFI; + fdeInfo->fdeLength = nextCFI - currentCFI; + fdeInfo->fdeInstructions = p; + fdeInfo->pcStart = pcStart; + fdeInfo->pcEnd = pcStart + pcRange; + return true; + } else { + // pc is not in begin/range, skip this FDE + } + } else { + // Malformed CIE, now augmentation describing pc range encoding. + } + } else { + // malformed FDE. CIE is bad + } + p = nextCFI; + } + } + return false; +} + +/// Extract info from a CIE +template +const char *CFI_Parser::parseCIE(A &addressSpace, pint_t cie, + CIE_Info *cieInfo) { + cieInfo->pointerEncoding = 0; + cieInfo->lsdaEncoding = DW_EH_PE_omit; + cieInfo->personalityEncoding = 0; + cieInfo->personalityOffsetInCIE = 0; + cieInfo->personality = 0; + cieInfo->codeAlignFactor = 0; + cieInfo->dataAlignFactor = 0; + cieInfo->isSignalFrame = false; + cieInfo->fdesHaveAugmentationData = false; +#if defined(_LIBUNWIND_TARGET_AARCH64) + cieInfo->addressesSignedWithBKey = false; +#endif + cieInfo->cieStart = cie; + pint_t p = cie; + pint_t cieLength = (pint_t)addressSpace.get32(p); + p += 4; + pint_t cieContentEnd = p + cieLength; + if (cieLength == 0xffffffff) { + // 0xffffffff means length is really next 8 bytes + cieLength = (pint_t)addressSpace.get64(p); + p += 8; + cieContentEnd = p + cieLength; + } + if (cieLength == 0) + return NULL; + // CIE ID is always 0 + if (addressSpace.get32(p) != 0) + return "CIE ID is not zero"; + p += 4; + // Version is always 1 or 3 + uint8_t version = addressSpace.get8(p); + if ((version != 1) && (version != 3)) + return "CIE version is not 1 or 3"; + ++p; + // save start of augmentation string and find end + pint_t strStart = p; + while (addressSpace.get8(p) != 0) + ++p; + ++p; + // parse code aligment factor + cieInfo->codeAlignFactor = (uint32_t)addressSpace.getULEB128(p, cieContentEnd); + // parse data alignment factor + cieInfo->dataAlignFactor = (int)addressSpace.getSLEB128(p, cieContentEnd); + // parse return address register + uint64_t raReg = addressSpace.getULEB128(p, cieContentEnd); + assert(raReg < 255 && "return address register too large"); + cieInfo->returnAddressRegister = (uint8_t)raReg; + // parse augmentation data based on augmentation string + const char *result = NULL; + if (addressSpace.get8(strStart) == 'z') { + // parse augmentation data length + addressSpace.getULEB128(p, cieContentEnd); + for (pint_t s = strStart; addressSpace.get8(s) != '\0'; ++s) { + switch (addressSpace.get8(s)) { + case 'z': + cieInfo->fdesHaveAugmentationData = true; + break; + case 'P': + cieInfo->personalityEncoding = addressSpace.get8(p); + ++p; + cieInfo->personalityOffsetInCIE = (uint8_t)(p - cie); + cieInfo->personality = addressSpace + .getEncodedP(p, cieContentEnd, cieInfo->personalityEncoding); + break; + case 'L': + cieInfo->lsdaEncoding = addressSpace.get8(p); + ++p; + break; + case 'R': + cieInfo->pointerEncoding = addressSpace.get8(p); + ++p; + break; + case 'S': + cieInfo->isSignalFrame = true; + break; +#if defined(_LIBUNWIND_TARGET_AARCH64) + case 'B': + cieInfo->addressesSignedWithBKey = true; + break; +#endif + default: + // ignore unknown letters + break; + } + } + } + cieInfo->cieLength = cieContentEnd - cieInfo->cieStart; + cieInfo->cieInstructions = p; + return result; +} + + +/// "run" the DWARF instructions and create the abstact PrologInfo for an FDE +template +bool CFI_Parser::parseFDEInstructions(A &addressSpace, + const FDE_Info &fdeInfo, + const CIE_Info &cieInfo, pint_t upToPC, + int arch, PrologInfo *results) { + // clear results + memset(results, '\0', sizeof(PrologInfo)); + PrologInfoStackEntry *rememberStack = NULL; + + // parse CIE then FDE instructions + return parseInstructions(addressSpace, cieInfo.cieInstructions, + cieInfo.cieStart + cieInfo.cieLength, cieInfo, + (pint_t)(-1), rememberStack, arch, results) && + parseInstructions(addressSpace, fdeInfo.fdeInstructions, + fdeInfo.fdeStart + fdeInfo.fdeLength, cieInfo, + upToPC - fdeInfo.pcStart, rememberStack, arch, + results); +} + +/// "run" the DWARF instructions +template +bool CFI_Parser::parseInstructions(A &addressSpace, pint_t instructions, + pint_t instructionsEnd, + const CIE_Info &cieInfo, pint_t pcoffset, + PrologInfoStackEntry *&rememberStack, + int arch, PrologInfo *results) { + pint_t p = instructions; + pint_t codeOffset = 0; + PrologInfo initialState = *results; + + _LIBUNWIND_TRACE_DWARF("parseInstructions(instructions=0x%0" PRIx64 ")\n", + static_cast(instructionsEnd)); + + // see DWARF Spec, section 6.4.2 for details on unwind opcodes + while ((p < instructionsEnd) && (codeOffset < pcoffset)) { + uint64_t reg; + uint64_t reg2; + int64_t offset; + uint64_t length; + uint8_t opcode = addressSpace.get8(p); + uint8_t operand; +#if !defined(_LIBUNWIND_NO_HEAP) + PrologInfoStackEntry *entry; +#endif + ++p; + switch (opcode) { + case DW_CFA_nop: + _LIBUNWIND_TRACE_DWARF("DW_CFA_nop\n"); + break; + case DW_CFA_set_loc: + codeOffset = + addressSpace.getEncodedP(p, instructionsEnd, cieInfo.pointerEncoding); + _LIBUNWIND_TRACE_DWARF("DW_CFA_set_loc\n"); + break; + case DW_CFA_advance_loc1: + codeOffset += (addressSpace.get8(p) * cieInfo.codeAlignFactor); + p += 1; + _LIBUNWIND_TRACE_DWARF("DW_CFA_advance_loc1: new offset=%" PRIu64 "\n", + static_cast(codeOffset)); + break; + case DW_CFA_advance_loc2: + codeOffset += (addressSpace.get16(p) * cieInfo.codeAlignFactor); + p += 2; + _LIBUNWIND_TRACE_DWARF("DW_CFA_advance_loc2: new offset=%" PRIu64 "\n", + static_cast(codeOffset)); + break; + case DW_CFA_advance_loc4: + codeOffset += (addressSpace.get32(p) * cieInfo.codeAlignFactor); + p += 4; + _LIBUNWIND_TRACE_DWARF("DW_CFA_advance_loc4: new offset=%" PRIu64 "\n", + static_cast(codeOffset)); + break; + case DW_CFA_offset_extended: + reg = addressSpace.getULEB128(p, instructionsEnd); + offset = (int64_t)addressSpace.getULEB128(p, instructionsEnd) + * cieInfo.dataAlignFactor; + if (reg > kMaxRegisterNumber) { + _LIBUNWIND_LOG0( + "malformed DW_CFA_offset_extended DWARF unwind, reg too big"); + return false; + } + results->savedRegisters[reg].location = kRegisterInCFA; + results->savedRegisters[reg].value = offset; + _LIBUNWIND_TRACE_DWARF("DW_CFA_offset_extended(reg=%" PRIu64 ", " + "offset=%" PRId64 ")\n", + reg, offset); + break; + case DW_CFA_restore_extended: + reg = addressSpace.getULEB128(p, instructionsEnd); + if (reg > kMaxRegisterNumber) { + _LIBUNWIND_LOG0( + "malformed DW_CFA_restore_extended DWARF unwind, reg too big"); + return false; + } + results->savedRegisters[reg] = initialState.savedRegisters[reg]; + _LIBUNWIND_TRACE_DWARF("DW_CFA_restore_extended(reg=%" PRIu64 ")\n", reg); + break; + case DW_CFA_undefined: + reg = addressSpace.getULEB128(p, instructionsEnd); + if (reg > kMaxRegisterNumber) { + _LIBUNWIND_LOG0( + "malformed DW_CFA_undefined DWARF unwind, reg too big"); + return false; + } + results->savedRegisters[reg].location = kRegisterUnused; + _LIBUNWIND_TRACE_DWARF("DW_CFA_undefined(reg=%" PRIu64 ")\n", reg); + break; + case DW_CFA_same_value: + reg = addressSpace.getULEB128(p, instructionsEnd); + if (reg > kMaxRegisterNumber) { + _LIBUNWIND_LOG0( + "malformed DW_CFA_same_value DWARF unwind, reg too big"); + return false; + } + // DW_CFA_same_value unsupported + // "same value" means register was stored in frame, but its current + // value has not changed, so no need to restore from frame. + // We model this as if the register was never saved. + results->savedRegisters[reg].location = kRegisterUnused; + // set flag to disable conversion to compact unwind + results->sameValueUsed = true; + _LIBUNWIND_TRACE_DWARF("DW_CFA_same_value(reg=%" PRIu64 ")\n", reg); + break; + case DW_CFA_register: + reg = addressSpace.getULEB128(p, instructionsEnd); + reg2 = addressSpace.getULEB128(p, instructionsEnd); + if (reg > kMaxRegisterNumber) { + _LIBUNWIND_LOG0( + "malformed DW_CFA_register DWARF unwind, reg too big"); + return false; + } + if (reg2 > kMaxRegisterNumber) { + _LIBUNWIND_LOG0( + "malformed DW_CFA_register DWARF unwind, reg2 too big"); + return false; + } + results->savedRegisters[reg].location = kRegisterInRegister; + results->savedRegisters[reg].value = (int64_t)reg2; + // set flag to disable conversion to compact unwind + results->registersInOtherRegisters = true; + _LIBUNWIND_TRACE_DWARF( + "DW_CFA_register(reg=%" PRIu64 ", reg2=%" PRIu64 ")\n", reg, reg2); + break; +#if !defined(_LIBUNWIND_NO_HEAP) + case DW_CFA_remember_state: + // avoid operator new, because that would be an upward dependency + entry = (PrologInfoStackEntry *)malloc(sizeof(PrologInfoStackEntry)); + if (entry != NULL) { + entry->next = rememberStack; + entry->info = *results; + rememberStack = entry; + } else { + return false; + } + _LIBUNWIND_TRACE_DWARF("DW_CFA_remember_state\n"); + break; + case DW_CFA_restore_state: + if (rememberStack != NULL) { + PrologInfoStackEntry *top = rememberStack; + *results = top->info; + rememberStack = top->next; + free((char *)top); + } else { + return false; + } + _LIBUNWIND_TRACE_DWARF("DW_CFA_restore_state\n"); + break; +#endif + case DW_CFA_def_cfa: + reg = addressSpace.getULEB128(p, instructionsEnd); + offset = (int64_t)addressSpace.getULEB128(p, instructionsEnd); + if (reg > kMaxRegisterNumber) { + _LIBUNWIND_LOG0("malformed DW_CFA_def_cfa DWARF unwind, reg too big"); + return false; + } + results->cfaRegister = (uint32_t)reg; + results->cfaRegisterOffset = (int32_t)offset; + _LIBUNWIND_TRACE_DWARF( + "DW_CFA_def_cfa(reg=%" PRIu64 ", offset=%" PRIu64 ")\n", reg, offset); + break; + case DW_CFA_def_cfa_register: + reg = addressSpace.getULEB128(p, instructionsEnd); + if (reg > kMaxRegisterNumber) { + _LIBUNWIND_LOG0( + "malformed DW_CFA_def_cfa_register DWARF unwind, reg too big"); + return false; + } + results->cfaRegister = (uint32_t)reg; + _LIBUNWIND_TRACE_DWARF("DW_CFA_def_cfa_register(%" PRIu64 ")\n", reg); + break; + case DW_CFA_def_cfa_offset: + results->cfaRegisterOffset = (int32_t) + addressSpace.getULEB128(p, instructionsEnd); + results->codeOffsetAtStackDecrement = (uint32_t)codeOffset; + _LIBUNWIND_TRACE_DWARF("DW_CFA_def_cfa_offset(%d)\n", + results->cfaRegisterOffset); + break; + case DW_CFA_def_cfa_expression: + results->cfaRegister = 0; + results->cfaExpression = (int64_t)p; + length = addressSpace.getULEB128(p, instructionsEnd); + assert(length < static_cast(~0) && "pointer overflow"); + p += static_cast(length); + _LIBUNWIND_TRACE_DWARF("DW_CFA_def_cfa_expression(expression=0x%" PRIx64 + ", length=%" PRIu64 ")\n", + results->cfaExpression, length); + break; + case DW_CFA_expression: + reg = addressSpace.getULEB128(p, instructionsEnd); + if (reg > kMaxRegisterNumber) { + _LIBUNWIND_LOG0( + "malformed DW_CFA_expression DWARF unwind, reg too big"); + return false; + } + results->savedRegisters[reg].location = kRegisterAtExpression; + results->savedRegisters[reg].value = (int64_t)p; + length = addressSpace.getULEB128(p, instructionsEnd); + assert(length < static_cast(~0) && "pointer overflow"); + p += static_cast(length); + _LIBUNWIND_TRACE_DWARF("DW_CFA_expression(reg=%" PRIu64 ", " + "expression=0x%" PRIx64 ", " + "length=%" PRIu64 ")\n", + reg, results->savedRegisters[reg].value, length); + break; + case DW_CFA_offset_extended_sf: + reg = addressSpace.getULEB128(p, instructionsEnd); + if (reg > kMaxRegisterNumber) { + _LIBUNWIND_LOG0( + "malformed DW_CFA_offset_extended_sf DWARF unwind, reg too big"); + return false; + } + offset = + addressSpace.getSLEB128(p, instructionsEnd) * cieInfo.dataAlignFactor; + results->savedRegisters[reg].location = kRegisterInCFA; + results->savedRegisters[reg].value = offset; + _LIBUNWIND_TRACE_DWARF("DW_CFA_offset_extended_sf(reg=%" PRIu64 ", " + "offset=%" PRId64 ")\n", + reg, offset); + break; + case DW_CFA_def_cfa_sf: + reg = addressSpace.getULEB128(p, instructionsEnd); + offset = + addressSpace.getSLEB128(p, instructionsEnd) * cieInfo.dataAlignFactor; + if (reg > kMaxRegisterNumber) { + _LIBUNWIND_LOG0( + "malformed DW_CFA_def_cfa_sf DWARF unwind, reg too big"); + return false; + } + results->cfaRegister = (uint32_t)reg; + results->cfaRegisterOffset = (int32_t)offset; + _LIBUNWIND_TRACE_DWARF("DW_CFA_def_cfa_sf(reg=%" PRIu64 ", " + "offset=%" PRId64 ")\n", + reg, offset); + break; + case DW_CFA_def_cfa_offset_sf: + results->cfaRegisterOffset = (int32_t) + (addressSpace.getSLEB128(p, instructionsEnd) * cieInfo.dataAlignFactor); + results->codeOffsetAtStackDecrement = (uint32_t)codeOffset; + _LIBUNWIND_TRACE_DWARF("DW_CFA_def_cfa_offset_sf(%d)\n", + results->cfaRegisterOffset); + break; + case DW_CFA_val_offset: + reg = addressSpace.getULEB128(p, instructionsEnd); + if (reg > kMaxRegisterNumber) { + _LIBUNWIND_LOG( + "malformed DW_CFA_val_offset DWARF unwind, reg (%" PRIu64 + ") out of range\n", + reg); + return false; + } + offset = (int64_t)addressSpace.getULEB128(p, instructionsEnd) + * cieInfo.dataAlignFactor; + results->savedRegisters[reg].location = kRegisterOffsetFromCFA; + results->savedRegisters[reg].value = offset; + _LIBUNWIND_TRACE_DWARF("DW_CFA_val_offset(reg=%" PRIu64 ", " + "offset=%" PRId64 "\n", + reg, offset); + break; + case DW_CFA_val_offset_sf: + reg = addressSpace.getULEB128(p, instructionsEnd); + if (reg > kMaxRegisterNumber) { + _LIBUNWIND_LOG0( + "malformed DW_CFA_val_offset_sf DWARF unwind, reg too big"); + return false; + } + offset = + addressSpace.getSLEB128(p, instructionsEnd) * cieInfo.dataAlignFactor; + results->savedRegisters[reg].location = kRegisterOffsetFromCFA; + results->savedRegisters[reg].value = offset; + _LIBUNWIND_TRACE_DWARF("DW_CFA_val_offset_sf(reg=%" PRIu64 ", " + "offset=%" PRId64 "\n", + reg, offset); + break; + case DW_CFA_val_expression: + reg = addressSpace.getULEB128(p, instructionsEnd); + if (reg > kMaxRegisterNumber) { + _LIBUNWIND_LOG0( + "malformed DW_CFA_val_expression DWARF unwind, reg too big"); + return false; + } + results->savedRegisters[reg].location = kRegisterIsExpression; + results->savedRegisters[reg].value = (int64_t)p; + length = addressSpace.getULEB128(p, instructionsEnd); + assert(length < static_cast(~0) && "pointer overflow"); + p += static_cast(length); + _LIBUNWIND_TRACE_DWARF("DW_CFA_val_expression(reg=%" PRIu64 ", " + "expression=0x%" PRIx64 ", length=%" PRIu64 ")\n", + reg, results->savedRegisters[reg].value, length); + break; + case DW_CFA_GNU_args_size: + length = addressSpace.getULEB128(p, instructionsEnd); + results->spExtraArgSize = (uint32_t)length; + _LIBUNWIND_TRACE_DWARF("DW_CFA_GNU_args_size(%" PRIu64 ")\n", length); + break; + case DW_CFA_GNU_negative_offset_extended: + reg = addressSpace.getULEB128(p, instructionsEnd); + if (reg > kMaxRegisterNumber) { + _LIBUNWIND_LOG0("malformed DW_CFA_GNU_negative_offset_extended DWARF " + "unwind, reg too big"); + return false; + } + offset = (int64_t)addressSpace.getULEB128(p, instructionsEnd) + * cieInfo.dataAlignFactor; + results->savedRegisters[reg].location = kRegisterInCFA; + results->savedRegisters[reg].value = -offset; + _LIBUNWIND_TRACE_DWARF( + "DW_CFA_GNU_negative_offset_extended(%" PRId64 ")\n", offset); + break; + +#if defined(_LIBUNWIND_TARGET_AARCH64) || defined(_LIBUNWIND_TARGET_SPARC) + // The same constant is used to represent different instructions on + // AArch64 (negate_ra_state) and SPARC (window_save). + static_assert(DW_CFA_AARCH64_negate_ra_state == DW_CFA_GNU_window_save, + "uses the same constant"); + case DW_CFA_AARCH64_negate_ra_state: + switch (arch) { +#if defined(_LIBUNWIND_TARGET_AARCH64) + case REGISTERS_ARM64: + results->savedRegisters[UNW_ARM64_RA_SIGN_STATE].value ^= 0x1; + _LIBUNWIND_TRACE_DWARF("DW_CFA_AARCH64_negate_ra_state\n"); + break; +#endif +#if defined(_LIBUNWIND_TARGET_SPARC) + // case DW_CFA_GNU_window_save: + case REGISTERS_SPARC: + _LIBUNWIND_TRACE_DWARF("DW_CFA_GNU_window_save()\n"); + for (reg = UNW_SPARC_O0; reg <= UNW_SPARC_O7; reg++) { + results->savedRegisters[reg].location = kRegisterInRegister; + results->savedRegisters[reg].value = + ((int64_t)reg - UNW_SPARC_O0) + UNW_SPARC_I0; + } + + for (reg = UNW_SPARC_L0; reg <= UNW_SPARC_I7; reg++) { + results->savedRegisters[reg].location = kRegisterInCFA; + results->savedRegisters[reg].value = + ((int64_t)reg - UNW_SPARC_L0) * 4; + } + break; +#endif + } + break; +#else + (void)arch; +#endif + + default: + operand = opcode & 0x3F; + switch (opcode & 0xC0) { + case DW_CFA_offset: + reg = operand; + if (reg > kMaxRegisterNumber) { + _LIBUNWIND_LOG("malformed DW_CFA_offset DWARF unwind, reg (%" PRIu64 + ") out of range", + reg); + return false; + } + offset = (int64_t)addressSpace.getULEB128(p, instructionsEnd) + * cieInfo.dataAlignFactor; + results->savedRegisters[reg].location = kRegisterInCFA; + results->savedRegisters[reg].value = offset; + _LIBUNWIND_TRACE_DWARF("DW_CFA_offset(reg=%d, offset=%" PRId64 ")\n", + operand, offset); + break; + case DW_CFA_advance_loc: + codeOffset += operand * cieInfo.codeAlignFactor; + _LIBUNWIND_TRACE_DWARF("DW_CFA_advance_loc: new offset=%" PRIu64 "\n", + static_cast(codeOffset)); + break; + case DW_CFA_restore: + reg = operand; + if (reg > kMaxRegisterNumber) { + _LIBUNWIND_LOG("malformed DW_CFA_restore DWARF unwind, reg (%" PRIu64 + ") out of range", + reg); + return false; + } + results->savedRegisters[reg] = initialState.savedRegisters[reg]; + _LIBUNWIND_TRACE_DWARF("DW_CFA_restore(reg=%" PRIu64 ")\n", + static_cast(operand)); + break; + default: + _LIBUNWIND_TRACE_DWARF("unknown CFA opcode 0x%02X\n", opcode); + return false; + } + } + } + + return true; +} + +} // namespace libunwind + +#endif // __DWARF_PARSER_HPP__ diff --git a/src/coreclr/src/nativeaot/libunwind/src/EHHeaderParser.hpp b/src/coreclr/src/nativeaot/libunwind/src/EHHeaderParser.hpp new file mode 100644 index 0000000000000..0101835b8e63d --- /dev/null +++ b/src/coreclr/src/nativeaot/libunwind/src/EHHeaderParser.hpp @@ -0,0 +1,167 @@ +//===------------------------- EHHeaderParser.hpp -------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// +// Parses ELF .eh_frame_hdr sections. +// +//===----------------------------------------------------------------------===// + +#ifndef __EHHEADERPARSER_HPP__ +#define __EHHEADERPARSER_HPP__ + +#include "libunwind.h" + +#include "DwarfParser.hpp" + +namespace libunwind { + +/// \brief EHHeaderParser does basic parsing of an ELF .eh_frame_hdr section. +/// +/// See DWARF spec for details: +/// http://refspecs.linuxbase.org/LSB_3.1.0/LSB-Core-generic/LSB-Core-generic/ehframechpt.html +/// +template class EHHeaderParser { +public: + typedef typename A::pint_t pint_t; + + /// Information encoded in the EH frame header. + struct EHHeaderInfo { + pint_t eh_frame_ptr; + size_t fde_count; + pint_t table; + uint8_t table_enc; + }; + + static bool decodeEHHdr(A &addressSpace, pint_t ehHdrStart, pint_t ehHdrEnd, + EHHeaderInfo &ehHdrInfo); + static bool findFDE(A &addressSpace, pint_t pc, pint_t ehHdrStart, + uint32_t sectionLength, + typename CFI_Parser::FDE_Info *fdeInfo, + typename CFI_Parser::CIE_Info *cieInfo); + +private: + static bool decodeTableEntry(A &addressSpace, pint_t &tableEntry, + pint_t ehHdrStart, pint_t ehHdrEnd, + uint8_t tableEnc, + typename CFI_Parser::FDE_Info *fdeInfo, + typename CFI_Parser::CIE_Info *cieInfo); + static size_t getTableEntrySize(uint8_t tableEnc); +}; + +template +bool EHHeaderParser::decodeEHHdr(A &addressSpace, pint_t ehHdrStart, + pint_t ehHdrEnd, EHHeaderInfo &ehHdrInfo) { + pint_t p = ehHdrStart; + uint8_t version = addressSpace.get8(p++); + if (version != 1) { + _LIBUNWIND_LOG0("Unsupported .eh_frame_hdr version"); + return false; + } + + uint8_t eh_frame_ptr_enc = addressSpace.get8(p++); + uint8_t fde_count_enc = addressSpace.get8(p++); + ehHdrInfo.table_enc = addressSpace.get8(p++); + + ehHdrInfo.eh_frame_ptr = + addressSpace.getEncodedP(p, ehHdrEnd, eh_frame_ptr_enc, ehHdrStart); + ehHdrInfo.fde_count = + fde_count_enc == DW_EH_PE_omit + ? 0 + : addressSpace.getEncodedP(p, ehHdrEnd, fde_count_enc, ehHdrStart); + ehHdrInfo.table = p; + + return true; +} + +template +bool EHHeaderParser::decodeTableEntry( + A &addressSpace, pint_t &tableEntry, pint_t ehHdrStart, pint_t ehHdrEnd, + uint8_t tableEnc, typename CFI_Parser::FDE_Info *fdeInfo, + typename CFI_Parser::CIE_Info *cieInfo) { + // Have to decode the whole FDE for the PC range anyway, so just throw away + // the PC start. + addressSpace.getEncodedP(tableEntry, ehHdrEnd, tableEnc, ehHdrStart); + pint_t fde = + addressSpace.getEncodedP(tableEntry, ehHdrEnd, tableEnc, ehHdrStart); + const char *message = + CFI_Parser::decodeFDE(addressSpace, fde, fdeInfo, cieInfo); + if (message != NULL) { + _LIBUNWIND_DEBUG_LOG("EHHeaderParser::decodeTableEntry: bad fde: %s", + message); + return false; + } + + return true; +} + +template +bool EHHeaderParser::findFDE(A &addressSpace, pint_t pc, pint_t ehHdrStart, + uint32_t sectionLength, + typename CFI_Parser::FDE_Info *fdeInfo, + typename CFI_Parser::CIE_Info *cieInfo) { + pint_t ehHdrEnd = ehHdrStart + sectionLength; + + EHHeaderParser::EHHeaderInfo hdrInfo; + if (!EHHeaderParser::decodeEHHdr(addressSpace, ehHdrStart, ehHdrEnd, + hdrInfo)) + return false; + + size_t tableEntrySize = getTableEntrySize(hdrInfo.table_enc); + pint_t tableEntry; + + size_t low = 0; + for (size_t len = hdrInfo.fde_count; len > 1;) { + size_t mid = low + (len / 2); + tableEntry = hdrInfo.table + mid * tableEntrySize; + pint_t start = addressSpace.getEncodedP(tableEntry, ehHdrEnd, + hdrInfo.table_enc, ehHdrStart); + + if (start == pc) { + low = mid; + break; + } else if (start < pc) { + low = mid; + len -= (len / 2); + } else { + len /= 2; + } + } + + tableEntry = hdrInfo.table + low * tableEntrySize; + if (decodeTableEntry(addressSpace, tableEntry, ehHdrStart, ehHdrEnd, + hdrInfo.table_enc, fdeInfo, cieInfo)) { + if (pc >= fdeInfo->pcStart && pc < fdeInfo->pcEnd) + return true; + } + + return false; +} + +template +size_t EHHeaderParser::getTableEntrySize(uint8_t tableEnc) { + switch (tableEnc & 0x0f) { + case DW_EH_PE_sdata2: + case DW_EH_PE_udata2: + return 4; + case DW_EH_PE_sdata4: + case DW_EH_PE_udata4: + return 8; + case DW_EH_PE_sdata8: + case DW_EH_PE_udata8: + return 16; + case DW_EH_PE_sleb128: + case DW_EH_PE_uleb128: + _LIBUNWIND_ABORT("Can't binary search on variable length encoded data."); + case DW_EH_PE_omit: + return 0; + default: + _LIBUNWIND_ABORT("Unknown DWARF encoding for search table."); + } +} + +} + +#endif diff --git a/src/coreclr/src/nativeaot/libunwind/src/RWMutex.hpp b/src/coreclr/src/nativeaot/libunwind/src/RWMutex.hpp new file mode 100644 index 0000000000000..a37ac77144f38 --- /dev/null +++ b/src/coreclr/src/nativeaot/libunwind/src/RWMutex.hpp @@ -0,0 +1,114 @@ +//===----------------------------- Registers.hpp --------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// +// Abstract interface to shared reader/writer log, hiding platform and +// configuration differences. +// +//===----------------------------------------------------------------------===// + +#ifndef __RWMUTEX_HPP__ +#define __RWMUTEX_HPP__ + +#if defined(_WIN32) +#include +#elif !defined(_LIBUNWIND_HAS_NO_THREADS) +#include +#if defined(__unix__) && defined(__ELF__) && defined(_LIBUNWIND_HAS_COMMENT_LIB_PRAGMA) +#pragma comment(lib, "pthread") +#endif +#endif + +namespace libunwind { + +#if defined(_LIBUNWIND_HAS_NO_THREADS) + +class _LIBUNWIND_HIDDEN RWMutex { +public: + bool lock_shared() { return true; } + bool unlock_shared() { return true; } + bool lock() { return true; } + bool unlock() { return true; } +}; + +#elif defined(_WIN32) + +class _LIBUNWIND_HIDDEN RWMutex { +public: + bool lock_shared() { + AcquireSRWLockShared(&_lock); + return true; + } + bool unlock_shared() { + ReleaseSRWLockShared(&_lock); + return true; + } + bool lock() { + AcquireSRWLockExclusive(&_lock); + return true; + } + bool unlock() { + ReleaseSRWLockExclusive(&_lock); + return true; + } + +private: + SRWLOCK _lock = SRWLOCK_INIT; +}; + +#elif !defined(LIBUNWIND_USE_WEAK_PTHREAD) + +class _LIBUNWIND_HIDDEN RWMutex { +public: + bool lock_shared() { return pthread_rwlock_rdlock(&_lock) == 0; } + bool unlock_shared() { return pthread_rwlock_unlock(&_lock) == 0; } + bool lock() { return pthread_rwlock_wrlock(&_lock) == 0; } + bool unlock() { return pthread_rwlock_unlock(&_lock) == 0; } + +private: + pthread_rwlock_t _lock = PTHREAD_RWLOCK_INITIALIZER; +}; + +#else + +extern "C" int __attribute__((weak)) +pthread_create(pthread_t *thread, const pthread_attr_t *attr, + void *(*start_routine)(void *), void *arg); +extern "C" int __attribute__((weak)) +pthread_rwlock_rdlock(pthread_rwlock_t *lock); +extern "C" int __attribute__((weak)) +pthread_rwlock_wrlock(pthread_rwlock_t *lock); +extern "C" int __attribute__((weak)) +pthread_rwlock_unlock(pthread_rwlock_t *lock); + +// Calls to the locking functions are gated on pthread_create, and not the +// functions themselves, because the data structure should only be locked if +// another thread has been created. This is what similar libraries do. + +class _LIBUNWIND_HIDDEN RWMutex { +public: + bool lock_shared() { + return !pthread_create || (pthread_rwlock_rdlock(&_lock) == 0); + } + bool unlock_shared() { + return !pthread_create || (pthread_rwlock_unlock(&_lock) == 0); + } + bool lock() { + return !pthread_create || (pthread_rwlock_wrlock(&_lock) == 0); + } + bool unlock() { + return !pthread_create || (pthread_rwlock_unlock(&_lock) == 0); + } + +private: + pthread_rwlock_t _lock = PTHREAD_RWLOCK_INITIALIZER; +}; + +#endif + +} // namespace libunwind + +#endif // __RWMUTEX_HPP__ diff --git a/src/coreclr/src/nativeaot/libunwind/src/Registers.hpp b/src/coreclr/src/nativeaot/libunwind/src/Registers.hpp new file mode 100644 index 0000000000000..9f82d5c6766f5 --- /dev/null +++ b/src/coreclr/src/nativeaot/libunwind/src/Registers.hpp @@ -0,0 +1,3718 @@ +//===----------------------------- Registers.hpp --------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// +// Models register sets for supported processors. +// +//===----------------------------------------------------------------------===// + +#ifndef __REGISTERS_HPP__ +#define __REGISTERS_HPP__ + +#include +#include + +#include "libunwind.h" +#include "config.h" + +namespace libunwind { + +// For emulating 128-bit registers +struct v128 { uint32_t vec[4]; }; + +enum { + REGISTERS_X86, + REGISTERS_X86_64, + REGISTERS_PPC, + REGISTERS_PPC64, + REGISTERS_ARM64, + REGISTERS_ARM, + REGISTERS_OR1K, + REGISTERS_MIPS_O32, + REGISTERS_MIPS_NEWABI, + REGISTERS_SPARC, +}; + +#if defined(_LIBUNWIND_TARGET_I386) +/// Registers_x86 holds the register state of a thread in a 32-bit intel +/// process. +class _LIBUNWIND_HIDDEN Registers_x86 { +public: + Registers_x86(); + Registers_x86(const void *registers); + + bool validRegister(int num) const; + uint32_t getRegister(int num) const; + void setRegister(int num, uint32_t value, uint32_t location); + uint32_t getRegisterLocation(int num) const; + bool validFloatRegister(int) const { return false; } + double getFloatRegister(int num) const; + void setFloatRegister(int num, double value); + bool validVectorRegister(int) const { return false; } + v128 getVectorRegister(int num) const; + void setVectorRegister(int num, v128 value); + static const char *getRegisterName(int num); + void jumpto(); + static int lastDwarfRegNum() { return _LIBUNWIND_HIGHEST_DWARF_REGISTER_X86; } + static int getArch() { return REGISTERS_X86; } + + uint32_t getSP() const { return _registers.__esp; } + void setSP(uint32_t value, uint32_t location) { _registers.__esp = value; _registerLocations.__esp = location; } + uint32_t getIP() const { return _registers.__eip; } + void setIP(uint32_t value, uint32_t location) { _registers.__eip = value; _registerLocations.__eip = location; } + uint32_t getEBP() const { return _registers.__ebp; } + void setEBP(uint32_t value, uint32_t location) { _registers.__ebp = value; _registerLocations.__ebp = location; } + uint32_t getEBX() const { return _registers.__ebx; } + void setEBX(uint32_t value, uint32_t location) { _registers.__ebx = value; _registerLocations.__ebx = location; } + uint32_t getECX() const { return _registers.__ecx; } + void setECX(uint32_t value, uint32_t location) { _registers.__ecx = value; _registerLocations.__ecx = location; } + uint32_t getEDX() const { return _registers.__edx; } + void setEDX(uint32_t value, uint32_t location) { _registers.__edx = value; _registerLocations.__edx = location; } + uint32_t getESI() const { return _registers.__esi; } + void setESI(uint32_t value, uint32_t location) { _registers.__esi = value; _registerLocations.__esi = location; } + uint32_t getEDI() const { return _registers.__edi; } + void setEDI(uint32_t value, uint32_t location) { _registers.__edi = value; _registerLocations.__edi = location; } + +private: + struct GPRs { + unsigned int __eax; + unsigned int __ebx; + unsigned int __ecx; + unsigned int __edx; + unsigned int __edi; + unsigned int __esi; + unsigned int __ebp; + unsigned int __esp; + unsigned int __ss; + unsigned int __eflags; + unsigned int __eip; + unsigned int __cs; + unsigned int __ds; + unsigned int __es; + unsigned int __fs; + unsigned int __gs; + }; + struct GPRLocations { + unsigned int __eax; + unsigned int __ebx; + unsigned int __ecx; + unsigned int __edx; + unsigned int __edi; + unsigned int __esi; + unsigned int __ebp; + unsigned int __esp; + unsigned int __eip; + }; + + GPRs _registers; + GPRLocations _registerLocations; +}; + +inline Registers_x86::Registers_x86(const void *registers) { + static_assert((check_fit::does_fit), + "x86 registers do not fit into unw_context_t"); + memcpy(&_registers, registers, sizeof(_registers)); + memset(&_registerLocations, 0, sizeof(_registerLocations)); +} + +inline Registers_x86::Registers_x86() { + memset(&_registers, 0, sizeof(_registers)); + memset(&_registerLocations, 0, sizeof(_registerLocations)); +} + +inline bool Registers_x86::validRegister(int regNum) const { + if (regNum == UNW_REG_IP) + return true; + if (regNum == UNW_REG_SP) + return true; + if (regNum < 0) + return false; + if (regNum > 7) + return false; + return true; +} + +inline uint32_t Registers_x86::getRegister(int regNum) const { + switch (regNum) { + case UNW_REG_IP: + return _registers.__eip; + case UNW_REG_SP: + return _registers.__esp; + case UNW_X86_EAX: + return _registers.__eax; + case UNW_X86_ECX: + return _registers.__ecx; + case UNW_X86_EDX: + return _registers.__edx; + case UNW_X86_EBX: + return _registers.__ebx; +#if !defined(__APPLE__) + case UNW_X86_ESP: +#else + case UNW_X86_EBP: +#endif + return _registers.__ebp; +#if !defined(__APPLE__) + case UNW_X86_EBP: +#else + case UNW_X86_ESP: +#endif + return _registers.__esp; + case UNW_X86_ESI: + return _registers.__esi; + case UNW_X86_EDI: + return _registers.__edi; + } + _LIBUNWIND_ABORT("unsupported x86 register"); +} + +inline void Registers_x86::setRegister(int regNum, uint32_t value, uint32_t location) { + switch (regNum) { + case UNW_REG_IP: + _registers.__eip = value; + _registerLocations.__eip = location; + return; + case UNW_REG_SP: + _registers.__esp = value; + _registerLocations.__esp = location; + return; + case UNW_X86_EAX: + _registers.__eax = value; + _registerLocations.__eax = location; + return; + case UNW_X86_ECX: + _registers.__ecx = value; + _registerLocations.__ecx = location; + return; + case UNW_X86_EDX: + _registers.__edx = value; + _registerLocations.__edx = location; + return; + case UNW_X86_EBX: + _registers.__ebx = value; + _registerLocations.__ebx = location; + return; +#if !defined(__APPLE__) + case UNW_X86_ESP: +#else + case UNW_X86_EBP: +#endif + _registers.__ebp = value; + _registerLocations.__ebp = location; + return; +#if !defined(__APPLE__) + case UNW_X86_EBP: +#else + case UNW_X86_ESP: +#endif + _registers.__esp = value; + _registerLocations.__esp = location; + return; + case UNW_X86_ESI: + _registers.__esi = value; + _registerLocations.__esi = location; + return; + case UNW_X86_EDI: + _registers.__edi = value; + _registerLocations.__edi = location; + return; + } + _LIBUNWIND_ABORT("unsupported x86 register"); +} + +inline uint32_t Registers_x86::getRegisterLocation(int regNum) const { + switch (regNum) { + case UNW_REG_IP: + return _registerLocations.__eip; + case UNW_REG_SP: + return _registerLocations.__esp; + case UNW_X86_EAX: + return _registerLocations.__eax; + case UNW_X86_ECX: + return _registerLocations.__ecx; + case UNW_X86_EDX: + return _registerLocations.__edx; + case UNW_X86_EBX: + return _registerLocations.__ebx; + case UNW_X86_EBP: + return _registerLocations.__ebp; + case UNW_X86_ESP: + return _registerLocations.__esp; + case UNW_X86_ESI: + return _registerLocations.__esi; + case UNW_X86_EDI: + return _registerLocations.__edi; + } + _LIBUNWIND_ABORT("unsupported x86 register"); +} + +inline const char *Registers_x86::getRegisterName(int regNum) { + switch (regNum) { + case UNW_REG_IP: + return "ip"; + case UNW_REG_SP: + return "esp"; + case UNW_X86_EAX: + return "eax"; + case UNW_X86_ECX: + return "ecx"; + case UNW_X86_EDX: + return "edx"; + case UNW_X86_EBX: + return "ebx"; + case UNW_X86_EBP: + return "ebp"; + case UNW_X86_ESP: + return "esp"; + case UNW_X86_ESI: + return "esi"; + case UNW_X86_EDI: + return "edi"; + default: + return "unknown register"; + } +} + +inline double Registers_x86::getFloatRegister(int) const { + _LIBUNWIND_ABORT("no x86 float registers"); +} + +inline void Registers_x86::setFloatRegister(int, double) { + _LIBUNWIND_ABORT("no x86 float registers"); +} + +inline v128 Registers_x86::getVectorRegister(int) const { + _LIBUNWIND_ABORT("no x86 vector registers"); +} + +inline void Registers_x86::setVectorRegister(int, v128) { + _LIBUNWIND_ABORT("no x86 vector registers"); +} +#endif // _LIBUNWIND_TARGET_I386 + + +#if defined(_LIBUNWIND_TARGET_X86_64) +/// Registers_x86_64 holds the register state of a thread in a 64-bit intel +/// process. +class _LIBUNWIND_HIDDEN Registers_x86_64 { +public: + Registers_x86_64(); + Registers_x86_64(const void *registers); + + bool validRegister(int num) const; + uint64_t getRegister(int num) const; + void setRegister(int num, uint64_t value, uint64_t location); + uint64_t getRegisterLocation(int num) const; + bool validFloatRegister(int) const { return false; } + double getFloatRegister(int num) const; + void setFloatRegister(int num, double value); + bool validVectorRegister(int) const; + v128 getVectorRegister(int num) const; + void setVectorRegister(int num, v128 value); + static const char *getRegisterName(int num); + void jumpto(); + static int lastDwarfRegNum() { return _LIBUNWIND_HIGHEST_DWARF_REGISTER_X86_64; } + static int getArch() { return REGISTERS_X86_64; } + + uint64_t getSP() const { return _registers.__rsp; } + void setSP(uint64_t value, uint64_t location) { _registers.__rsp = value; _registerLocations.__rsp = location;} + uint64_t getIP() const { return _registers.__rip; } + void setIP(uint64_t value, uint64_t location) { _registers.__rip = value; _registerLocations.__rip = location; } + uint64_t getRBP() const { return _registers.__rbp; } + void setRBP(uint64_t value, uint64_t location) { _registers.__rbp = value; _registerLocations.__rbp = location; } + uint64_t getRBX() const { return _registers.__rbx; } + void setRBX(uint64_t value, uint64_t location) { _registers.__rbx = value; _registerLocations.__rbx = location; } + uint64_t getR12() const { return _registers.__r12; } + void setR12(uint64_t value, uint64_t location) { _registers.__r12 = value; _registerLocations.__r12 = location; } + uint64_t getR13() const { return _registers.__r13; } + void setR13(uint64_t value, uint64_t location) { _registers.__r13 = value; _registerLocations.__r13 = location; } + uint64_t getR14() const { return _registers.__r14; } + void setR14(uint64_t value, uint64_t location) { _registers.__r14 = value; _registerLocations.__r14 = location; } + uint64_t getR15() const { return _registers.__r15; } + void setR15(uint64_t value, uint64_t location) { _registers.__r15 = value; _registerLocations.__r15 = location; } + +private: + struct GPRs { + uint64_t __rax; + uint64_t __rbx; + uint64_t __rcx; + uint64_t __rdx; + uint64_t __rdi; + uint64_t __rsi; + uint64_t __rbp; + uint64_t __rsp; + uint64_t __r8; + uint64_t __r9; + uint64_t __r10; + uint64_t __r11; + uint64_t __r12; + uint64_t __r13; + uint64_t __r14; + uint64_t __r15; + uint64_t __rip; + uint64_t __rflags; + uint64_t __cs; + uint64_t __fs; + uint64_t __gs; +#if defined(_WIN64) + uint64_t __padding; // 16-byte align +#endif + }; + struct GPRLocations { + uint64_t __rax; + uint64_t __rbx; + uint64_t __rcx; + uint64_t __rdx; + uint64_t __rdi; + uint64_t __rsi; + uint64_t __rbp; + uint64_t __rsp; + uint64_t __r8; + uint64_t __r9; + uint64_t __r10; + uint64_t __r11; + uint64_t __r12; + uint64_t __r13; + uint64_t __r14; + uint64_t __r15; + uint64_t __rip; + }; + GPRs _registers; + GPRLocations _registerLocations; +#if defined(_WIN64) + v128 _xmm[16]; +#endif +}; + +inline Registers_x86_64::Registers_x86_64(const void *registers) { + static_assert((check_fit::does_fit), + "x86_64 registers do not fit into unw_context_t"); + memcpy(&_registers, registers, sizeof(_registers)); + memset(&_registerLocations, 0, sizeof(_registerLocations)); +} + +inline Registers_x86_64::Registers_x86_64() { + memset(&_registers, 0, sizeof(_registers)); + memset(&_registerLocations, 0, sizeof(_registerLocations)); +} + +inline bool Registers_x86_64::validRegister(int regNum) const { + if (regNum == UNW_REG_IP) + return true; + if (regNum == UNW_REG_SP) + return true; + if (regNum < 0) + return false; + if (regNum > 15) + return false; + return true; +} + +inline uint64_t Registers_x86_64::getRegister(int regNum) const { + switch (regNum) { + case UNW_REG_IP: + return _registers.__rip; + case UNW_REG_SP: + return _registers.__rsp; + case UNW_X86_64_RAX: + return _registers.__rax; + case UNW_X86_64_RDX: + return _registers.__rdx; + case UNW_X86_64_RCX: + return _registers.__rcx; + case UNW_X86_64_RBX: + return _registers.__rbx; + case UNW_X86_64_RSI: + return _registers.__rsi; + case UNW_X86_64_RDI: + return _registers.__rdi; + case UNW_X86_64_RBP: + return _registers.__rbp; + case UNW_X86_64_RSP: + return _registers.__rsp; + case UNW_X86_64_R8: + return _registers.__r8; + case UNW_X86_64_R9: + return _registers.__r9; + case UNW_X86_64_R10: + return _registers.__r10; + case UNW_X86_64_R11: + return _registers.__r11; + case UNW_X86_64_R12: + return _registers.__r12; + case UNW_X86_64_R13: + return _registers.__r13; + case UNW_X86_64_R14: + return _registers.__r14; + case UNW_X86_64_R15: + return _registers.__r15; + } + _LIBUNWIND_ABORT("unsupported x86_64 register"); +} + +inline uint64_t Registers_x86_64::getRegisterLocation(int regNum) const { + switch (regNum) { + case UNW_REG_IP: + return _registerLocations.__rip; + case UNW_REG_SP: + return _registerLocations.__rsp; + case UNW_X86_64_RAX: + return _registerLocations.__rax; + case UNW_X86_64_RDX: + return _registerLocations.__rdx; + case UNW_X86_64_RCX: + return _registerLocations.__rcx; + case UNW_X86_64_RBX: + return _registerLocations.__rbx; + case UNW_X86_64_RSI: + return _registerLocations.__rsi; + case UNW_X86_64_RDI: + return _registerLocations.__rdi; + case UNW_X86_64_RBP: + return _registerLocations.__rbp; + case UNW_X86_64_RSP: + return _registerLocations.__rsp; + case UNW_X86_64_R8: + return _registerLocations.__r8; + case UNW_X86_64_R9: + return _registerLocations.__r9; + case UNW_X86_64_R10: + return _registerLocations.__r10; + case UNW_X86_64_R11: + return _registerLocations.__r11; + case UNW_X86_64_R12: + return _registerLocations.__r12; + case UNW_X86_64_R13: + return _registerLocations.__r13; + case UNW_X86_64_R14: + return _registerLocations.__r14; + case UNW_X86_64_R15: + return _registerLocations.__r15; + } + _LIBUNWIND_ABORT("unsupported x86_64 register"); +} + +inline void Registers_x86_64::setRegister(int regNum, uint64_t value, uint64_t location) { + switch (regNum) { + case UNW_REG_IP: + _registers.__rip = value; + _registerLocations.__rip = location; + return; + case UNW_REG_SP: + _registers.__rsp = value; + _registerLocations.__rsp = location; + return; + case UNW_X86_64_RAX: + _registers.__rax = value; + _registerLocations.__rax = location; + return; + case UNW_X86_64_RDX: + _registers.__rdx = value; + _registerLocations.__rdx = location; + return; + case UNW_X86_64_RCX: + _registers.__rcx = value; + _registerLocations.__rcx = location; + return; + case UNW_X86_64_RBX: + _registers.__rbx = value; + _registerLocations.__rbx = location; + return; + case UNW_X86_64_RSI: + _registers.__rsi = value; + _registerLocations.__rsi = location; + return; + case UNW_X86_64_RDI: + _registers.__rdi = value; + _registerLocations.__rdi = location; + return; + case UNW_X86_64_RBP: + _registers.__rbp = value; + _registerLocations.__rbp = location; + return; + case UNW_X86_64_RSP: + _registers.__rsp = value; + _registerLocations.__rsp = location; + return; + case UNW_X86_64_R8: + _registers.__r8 = value; + _registerLocations.__r8 = location; + return; + case UNW_X86_64_R9: + _registers.__r9 = value; + _registerLocations.__r9 = location; + return; + case UNW_X86_64_R10: + _registers.__r10 = value; + _registerLocations.__r10 = location; + return; + case UNW_X86_64_R11: + _registers.__r11 = value; + _registerLocations.__r11 = location; + return; + case UNW_X86_64_R12: + _registers.__r12 = value; + _registerLocations.__r12 = location; + return; + case UNW_X86_64_R13: + _registers.__r13 = value; + _registerLocations.__r13 = location; + return; + case UNW_X86_64_R14: + _registers.__r14 = value; + _registerLocations.__r14 = location; + return; + case UNW_X86_64_R15: + _registers.__r15 = value; + _registerLocations.__r15 = location; + return; + } + _LIBUNWIND_ABORT("unsupported x86_64 register"); +} + +inline const char *Registers_x86_64::getRegisterName(int regNum) { + switch (regNum) { + case UNW_REG_IP: + return "rip"; + case UNW_REG_SP: + return "rsp"; + case UNW_X86_64_RAX: + return "rax"; + case UNW_X86_64_RDX: + return "rdx"; + case UNW_X86_64_RCX: + return "rcx"; + case UNW_X86_64_RBX: + return "rbx"; + case UNW_X86_64_RSI: + return "rsi"; + case UNW_X86_64_RDI: + return "rdi"; + case UNW_X86_64_RBP: + return "rbp"; + case UNW_X86_64_RSP: + return "rsp"; + case UNW_X86_64_R8: + return "r8"; + case UNW_X86_64_R9: + return "r9"; + case UNW_X86_64_R10: + return "r10"; + case UNW_X86_64_R11: + return "r11"; + case UNW_X86_64_R12: + return "r12"; + case UNW_X86_64_R13: + return "r13"; + case UNW_X86_64_R14: + return "r14"; + case UNW_X86_64_R15: + return "r15"; + case UNW_X86_64_XMM0: + return "xmm0"; + case UNW_X86_64_XMM1: + return "xmm1"; + case UNW_X86_64_XMM2: + return "xmm2"; + case UNW_X86_64_XMM3: + return "xmm3"; + case UNW_X86_64_XMM4: + return "xmm4"; + case UNW_X86_64_XMM5: + return "xmm5"; + case UNW_X86_64_XMM6: + return "xmm6"; + case UNW_X86_64_XMM7: + return "xmm7"; + case UNW_X86_64_XMM8: + return "xmm8"; + case UNW_X86_64_XMM9: + return "xmm9"; + case UNW_X86_64_XMM10: + return "xmm10"; + case UNW_X86_64_XMM11: + return "xmm11"; + case UNW_X86_64_XMM12: + return "xmm12"; + case UNW_X86_64_XMM13: + return "xmm13"; + case UNW_X86_64_XMM14: + return "xmm14"; + case UNW_X86_64_XMM15: + return "xmm15"; + default: + return "unknown register"; + } +} + +inline double Registers_x86_64::getFloatRegister(int) const { + _LIBUNWIND_ABORT("no x86_64 float registers"); +} + +inline void Registers_x86_64::setFloatRegister(int, double) { + _LIBUNWIND_ABORT("no x86_64 float registers"); +} + +inline bool Registers_x86_64::validVectorRegister(int regNum) const { +#if defined(_WIN64) + if (regNum < UNW_X86_64_XMM0) + return false; + if (regNum > UNW_X86_64_XMM15) + return false; + return true; +#else + (void)regNum; // suppress unused parameter warning + return false; +#endif +} + +inline v128 Registers_x86_64::getVectorRegister(int regNum) const { +#if defined(_WIN64) + assert(validVectorRegister(regNum)); + return _xmm[regNum - UNW_X86_64_XMM0]; +#else + (void)regNum; // suppress unused parameter warning + _LIBUNWIND_ABORT("no x86_64 vector registers"); +#endif +} + +inline void Registers_x86_64::setVectorRegister(int regNum, v128 value) { +#if defined(_WIN64) + assert(validVectorRegister(regNum)); + _xmm[regNum - UNW_X86_64_XMM0] = value; +#else + (void)regNum; (void)value; // suppress unused parameter warnings + _LIBUNWIND_ABORT("no x86_64 vector registers"); +#endif +} +#endif // _LIBUNWIND_TARGET_X86_64 + + +#if defined(_LIBUNWIND_TARGET_PPC) +/// Registers_ppc holds the register state of a thread in a 32-bit PowerPC +/// process. +class _LIBUNWIND_HIDDEN Registers_ppc { +public: + Registers_ppc(); + Registers_ppc(const void *registers); + + bool validRegister(int num) const; + uint32_t getRegister(int num) const; + void setRegister(int num, uint32_t value); + bool validFloatRegister(int num) const; + double getFloatRegister(int num) const; + void setFloatRegister(int num, double value); + bool validVectorRegister(int num) const; + v128 getVectorRegister(int num) const; + void setVectorRegister(int num, v128 value); + static const char *getRegisterName(int num); + void jumpto(); + static int lastDwarfRegNum() { return _LIBUNWIND_HIGHEST_DWARF_REGISTER_PPC; } + static int getArch() { return REGISTERS_PPC; } + + uint64_t getSP() const { return _registers.__r1; } + void setSP(uint32_t value) { _registers.__r1 = value; } + uint64_t getIP() const { return _registers.__srr0; } + void setIP(uint32_t value) { _registers.__srr0 = value; } + +private: + struct ppc_thread_state_t { + unsigned int __srr0; /* Instruction address register (PC) */ + unsigned int __srr1; /* Machine state register (supervisor) */ + unsigned int __r0; + unsigned int __r1; + unsigned int __r2; + unsigned int __r3; + unsigned int __r4; + unsigned int __r5; + unsigned int __r6; + unsigned int __r7; + unsigned int __r8; + unsigned int __r9; + unsigned int __r10; + unsigned int __r11; + unsigned int __r12; + unsigned int __r13; + unsigned int __r14; + unsigned int __r15; + unsigned int __r16; + unsigned int __r17; + unsigned int __r18; + unsigned int __r19; + unsigned int __r20; + unsigned int __r21; + unsigned int __r22; + unsigned int __r23; + unsigned int __r24; + unsigned int __r25; + unsigned int __r26; + unsigned int __r27; + unsigned int __r28; + unsigned int __r29; + unsigned int __r30; + unsigned int __r31; + unsigned int __cr; /* Condition register */ + unsigned int __xer; /* User's integer exception register */ + unsigned int __lr; /* Link register */ + unsigned int __ctr; /* Count register */ + unsigned int __mq; /* MQ register (601 only) */ + unsigned int __vrsave; /* Vector Save Register */ + }; + + struct ppc_float_state_t { + double __fpregs[32]; + + unsigned int __fpscr_pad; /* fpscr is 64 bits, 32 bits of rubbish */ + unsigned int __fpscr; /* floating point status register */ + }; + + ppc_thread_state_t _registers; + ppc_float_state_t _floatRegisters; + v128 _vectorRegisters[32]; // offset 424 +}; + +inline Registers_ppc::Registers_ppc(const void *registers) { + static_assert((check_fit::does_fit), + "ppc registers do not fit into unw_context_t"); + memcpy(&_registers, static_cast(registers), + sizeof(_registers)); + static_assert(sizeof(ppc_thread_state_t) == 160, + "expected float register offset to be 160"); + memcpy(&_floatRegisters, + static_cast(registers) + sizeof(ppc_thread_state_t), + sizeof(_floatRegisters)); + static_assert(sizeof(ppc_thread_state_t) + sizeof(ppc_float_state_t) == 424, + "expected vector register offset to be 424 bytes"); + memcpy(_vectorRegisters, + static_cast(registers) + sizeof(ppc_thread_state_t) + + sizeof(ppc_float_state_t), + sizeof(_vectorRegisters)); +} + +inline Registers_ppc::Registers_ppc() { + memset(&_registers, 0, sizeof(_registers)); + memset(&_floatRegisters, 0, sizeof(_floatRegisters)); + memset(&_vectorRegisters, 0, sizeof(_vectorRegisters)); +} + +inline bool Registers_ppc::validRegister(int regNum) const { + if (regNum == UNW_REG_IP) + return true; + if (regNum == UNW_REG_SP) + return true; + if (regNum == UNW_PPC_VRSAVE) + return true; + if (regNum < 0) + return false; + if (regNum <= UNW_PPC_R31) + return true; + if (regNum == UNW_PPC_MQ) + return true; + if (regNum == UNW_PPC_LR) + return true; + if (regNum == UNW_PPC_CTR) + return true; + if ((UNW_PPC_CR0 <= regNum) && (regNum <= UNW_PPC_CR7)) + return true; + return false; +} + +inline uint32_t Registers_ppc::getRegister(int regNum) const { + switch (regNum) { + case UNW_REG_IP: + return _registers.__srr0; + case UNW_REG_SP: + return _registers.__r1; + case UNW_PPC_R0: + return _registers.__r0; + case UNW_PPC_R1: + return _registers.__r1; + case UNW_PPC_R2: + return _registers.__r2; + case UNW_PPC_R3: + return _registers.__r3; + case UNW_PPC_R4: + return _registers.__r4; + case UNW_PPC_R5: + return _registers.__r5; + case UNW_PPC_R6: + return _registers.__r6; + case UNW_PPC_R7: + return _registers.__r7; + case UNW_PPC_R8: + return _registers.__r8; + case UNW_PPC_R9: + return _registers.__r9; + case UNW_PPC_R10: + return _registers.__r10; + case UNW_PPC_R11: + return _registers.__r11; + case UNW_PPC_R12: + return _registers.__r12; + case UNW_PPC_R13: + return _registers.__r13; + case UNW_PPC_R14: + return _registers.__r14; + case UNW_PPC_R15: + return _registers.__r15; + case UNW_PPC_R16: + return _registers.__r16; + case UNW_PPC_R17: + return _registers.__r17; + case UNW_PPC_R18: + return _registers.__r18; + case UNW_PPC_R19: + return _registers.__r19; + case UNW_PPC_R20: + return _registers.__r20; + case UNW_PPC_R21: + return _registers.__r21; + case UNW_PPC_R22: + return _registers.__r22; + case UNW_PPC_R23: + return _registers.__r23; + case UNW_PPC_R24: + return _registers.__r24; + case UNW_PPC_R25: + return _registers.__r25; + case UNW_PPC_R26: + return _registers.__r26; + case UNW_PPC_R27: + return _registers.__r27; + case UNW_PPC_R28: + return _registers.__r28; + case UNW_PPC_R29: + return _registers.__r29; + case UNW_PPC_R30: + return _registers.__r30; + case UNW_PPC_R31: + return _registers.__r31; + case UNW_PPC_LR: + return _registers.__lr; + case UNW_PPC_CR0: + return (_registers.__cr & 0xF0000000); + case UNW_PPC_CR1: + return (_registers.__cr & 0x0F000000); + case UNW_PPC_CR2: + return (_registers.__cr & 0x00F00000); + case UNW_PPC_CR3: + return (_registers.__cr & 0x000F0000); + case UNW_PPC_CR4: + return (_registers.__cr & 0x0000F000); + case UNW_PPC_CR5: + return (_registers.__cr & 0x00000F00); + case UNW_PPC_CR6: + return (_registers.__cr & 0x000000F0); + case UNW_PPC_CR7: + return (_registers.__cr & 0x0000000F); + case UNW_PPC_VRSAVE: + return _registers.__vrsave; + } + _LIBUNWIND_ABORT("unsupported ppc register"); +} + +inline void Registers_ppc::setRegister(int regNum, uint32_t value) { + //fprintf(stderr, "Registers_ppc::setRegister(%d, 0x%08X)\n", regNum, value); + switch (regNum) { + case UNW_REG_IP: + _registers.__srr0 = value; + return; + case UNW_REG_SP: + _registers.__r1 = value; + return; + case UNW_PPC_R0: + _registers.__r0 = value; + return; + case UNW_PPC_R1: + _registers.__r1 = value; + return; + case UNW_PPC_R2: + _registers.__r2 = value; + return; + case UNW_PPC_R3: + _registers.__r3 = value; + return; + case UNW_PPC_R4: + _registers.__r4 = value; + return; + case UNW_PPC_R5: + _registers.__r5 = value; + return; + case UNW_PPC_R6: + _registers.__r6 = value; + return; + case UNW_PPC_R7: + _registers.__r7 = value; + return; + case UNW_PPC_R8: + _registers.__r8 = value; + return; + case UNW_PPC_R9: + _registers.__r9 = value; + return; + case UNW_PPC_R10: + _registers.__r10 = value; + return; + case UNW_PPC_R11: + _registers.__r11 = value; + return; + case UNW_PPC_R12: + _registers.__r12 = value; + return; + case UNW_PPC_R13: + _registers.__r13 = value; + return; + case UNW_PPC_R14: + _registers.__r14 = value; + return; + case UNW_PPC_R15: + _registers.__r15 = value; + return; + case UNW_PPC_R16: + _registers.__r16 = value; + return; + case UNW_PPC_R17: + _registers.__r17 = value; + return; + case UNW_PPC_R18: + _registers.__r18 = value; + return; + case UNW_PPC_R19: + _registers.__r19 = value; + return; + case UNW_PPC_R20: + _registers.__r20 = value; + return; + case UNW_PPC_R21: + _registers.__r21 = value; + return; + case UNW_PPC_R22: + _registers.__r22 = value; + return; + case UNW_PPC_R23: + _registers.__r23 = value; + return; + case UNW_PPC_R24: + _registers.__r24 = value; + return; + case UNW_PPC_R25: + _registers.__r25 = value; + return; + case UNW_PPC_R26: + _registers.__r26 = value; + return; + case UNW_PPC_R27: + _registers.__r27 = value; + return; + case UNW_PPC_R28: + _registers.__r28 = value; + return; + case UNW_PPC_R29: + _registers.__r29 = value; + return; + case UNW_PPC_R30: + _registers.__r30 = value; + return; + case UNW_PPC_R31: + _registers.__r31 = value; + return; + case UNW_PPC_MQ: + _registers.__mq = value; + return; + case UNW_PPC_LR: + _registers.__lr = value; + return; + case UNW_PPC_CTR: + _registers.__ctr = value; + return; + case UNW_PPC_CR0: + _registers.__cr &= 0x0FFFFFFF; + _registers.__cr |= (value & 0xF0000000); + return; + case UNW_PPC_CR1: + _registers.__cr &= 0xF0FFFFFF; + _registers.__cr |= (value & 0x0F000000); + return; + case UNW_PPC_CR2: + _registers.__cr &= 0xFF0FFFFF; + _registers.__cr |= (value & 0x00F00000); + return; + case UNW_PPC_CR3: + _registers.__cr &= 0xFFF0FFFF; + _registers.__cr |= (value & 0x000F0000); + return; + case UNW_PPC_CR4: + _registers.__cr &= 0xFFFF0FFF; + _registers.__cr |= (value & 0x0000F000); + return; + case UNW_PPC_CR5: + _registers.__cr &= 0xFFFFF0FF; + _registers.__cr |= (value & 0x00000F00); + return; + case UNW_PPC_CR6: + _registers.__cr &= 0xFFFFFF0F; + _registers.__cr |= (value & 0x000000F0); + return; + case UNW_PPC_CR7: + _registers.__cr &= 0xFFFFFFF0; + _registers.__cr |= (value & 0x0000000F); + return; + case UNW_PPC_VRSAVE: + _registers.__vrsave = value; + return; + // not saved + return; + case UNW_PPC_XER: + _registers.__xer = value; + return; + case UNW_PPC_AP: + case UNW_PPC_VSCR: + case UNW_PPC_SPEFSCR: + // not saved + return; + } + _LIBUNWIND_ABORT("unsupported ppc register"); +} + +inline bool Registers_ppc::validFloatRegister(int regNum) const { + if (regNum < UNW_PPC_F0) + return false; + if (regNum > UNW_PPC_F31) + return false; + return true; +} + +inline double Registers_ppc::getFloatRegister(int regNum) const { + assert(validFloatRegister(regNum)); + return _floatRegisters.__fpregs[regNum - UNW_PPC_F0]; +} + +inline void Registers_ppc::setFloatRegister(int regNum, double value) { + assert(validFloatRegister(regNum)); + _floatRegisters.__fpregs[regNum - UNW_PPC_F0] = value; +} + +inline bool Registers_ppc::validVectorRegister(int regNum) const { + if (regNum < UNW_PPC_V0) + return false; + if (regNum > UNW_PPC_V31) + return false; + return true; +} + +inline v128 Registers_ppc::getVectorRegister(int regNum) const { + assert(validVectorRegister(regNum)); + v128 result = _vectorRegisters[regNum - UNW_PPC_V0]; + return result; +} + +inline void Registers_ppc::setVectorRegister(int regNum, v128 value) { + assert(validVectorRegister(regNum)); + _vectorRegisters[regNum - UNW_PPC_V0] = value; +} + +inline const char *Registers_ppc::getRegisterName(int regNum) { + switch (regNum) { + case UNW_REG_IP: + return "ip"; + case UNW_REG_SP: + return "sp"; + case UNW_PPC_R0: + return "r0"; + case UNW_PPC_R1: + return "r1"; + case UNW_PPC_R2: + return "r2"; + case UNW_PPC_R3: + return "r3"; + case UNW_PPC_R4: + return "r4"; + case UNW_PPC_R5: + return "r5"; + case UNW_PPC_R6: + return "r6"; + case UNW_PPC_R7: + return "r7"; + case UNW_PPC_R8: + return "r8"; + case UNW_PPC_R9: + return "r9"; + case UNW_PPC_R10: + return "r10"; + case UNW_PPC_R11: + return "r11"; + case UNW_PPC_R12: + return "r12"; + case UNW_PPC_R13: + return "r13"; + case UNW_PPC_R14: + return "r14"; + case UNW_PPC_R15: + return "r15"; + case UNW_PPC_R16: + return "r16"; + case UNW_PPC_R17: + return "r17"; + case UNW_PPC_R18: + return "r18"; + case UNW_PPC_R19: + return "r19"; + case UNW_PPC_R20: + return "r20"; + case UNW_PPC_R21: + return "r21"; + case UNW_PPC_R22: + return "r22"; + case UNW_PPC_R23: + return "r23"; + case UNW_PPC_R24: + return "r24"; + case UNW_PPC_R25: + return "r25"; + case UNW_PPC_R26: + return "r26"; + case UNW_PPC_R27: + return "r27"; + case UNW_PPC_R28: + return "r28"; + case UNW_PPC_R29: + return "r29"; + case UNW_PPC_R30: + return "r30"; + case UNW_PPC_R31: + return "r31"; + case UNW_PPC_F0: + return "fp0"; + case UNW_PPC_F1: + return "fp1"; + case UNW_PPC_F2: + return "fp2"; + case UNW_PPC_F3: + return "fp3"; + case UNW_PPC_F4: + return "fp4"; + case UNW_PPC_F5: + return "fp5"; + case UNW_PPC_F6: + return "fp6"; + case UNW_PPC_F7: + return "fp7"; + case UNW_PPC_F8: + return "fp8"; + case UNW_PPC_F9: + return "fp9"; + case UNW_PPC_F10: + return "fp10"; + case UNW_PPC_F11: + return "fp11"; + case UNW_PPC_F12: + return "fp12"; + case UNW_PPC_F13: + return "fp13"; + case UNW_PPC_F14: + return "fp14"; + case UNW_PPC_F15: + return "fp15"; + case UNW_PPC_F16: + return "fp16"; + case UNW_PPC_F17: + return "fp17"; + case UNW_PPC_F18: + return "fp18"; + case UNW_PPC_F19: + return "fp19"; + case UNW_PPC_F20: + return "fp20"; + case UNW_PPC_F21: + return "fp21"; + case UNW_PPC_F22: + return "fp22"; + case UNW_PPC_F23: + return "fp23"; + case UNW_PPC_F24: + return "fp24"; + case UNW_PPC_F25: + return "fp25"; + case UNW_PPC_F26: + return "fp26"; + case UNW_PPC_F27: + return "fp27"; + case UNW_PPC_F28: + return "fp28"; + case UNW_PPC_F29: + return "fp29"; + case UNW_PPC_F30: + return "fp30"; + case UNW_PPC_F31: + return "fp31"; + case UNW_PPC_LR: + return "lr"; + default: + return "unknown register"; + } + +} +#endif // _LIBUNWIND_TARGET_PPC + +#if defined(_LIBUNWIND_TARGET_PPC64) +/// Registers_ppc64 holds the register state of a thread in a 64-bit PowerPC +/// process. +class _LIBUNWIND_HIDDEN Registers_ppc64 { +public: + Registers_ppc64(); + Registers_ppc64(const void *registers); + + bool validRegister(int num) const; + uint64_t getRegister(int num) const; + void setRegister(int num, uint64_t value, uint64_t location); + uint64_t getRegisterLocation(int num) const; + bool validFloatRegister(int num) const; + double getFloatRegister(int num) const; + void setFloatRegister(int num, double value); + bool validVectorRegister(int num) const; + v128 getVectorRegister(int num) const; + void setVectorRegister(int num, v128 value); + static const char *getRegisterName(int num); + void jumpto(); + static int lastDwarfRegNum() { return _LIBUNWIND_HIGHEST_DWARF_REGISTER_PPC64; } + static int getArch() { return REGISTERS_PPC64; } + + uint64_t getSP() const { return _registers.__r1; } + void setSP(uint64_t value) { _registers.__r1 = value; } + uint64_t getIP() const { return _registers.__srr0; } + void setIP(uint64_t value) { _registers.__srr0 = value; } + +private: + struct ppc64_thread_state_t { + uint64_t __srr0; // Instruction address register (PC) + uint64_t __srr1; // Machine state register (supervisor) + uint64_t __r0; + uint64_t __r1; + uint64_t __r2; + uint64_t __r3; + uint64_t __r4; + uint64_t __r5; + uint64_t __r6; + uint64_t __r7; + uint64_t __r8; + uint64_t __r9; + uint64_t __r10; + uint64_t __r11; + uint64_t __r12; + uint64_t __r13; + uint64_t __r14; + uint64_t __r15; + uint64_t __r16; + uint64_t __r17; + uint64_t __r18; + uint64_t __r19; + uint64_t __r20; + uint64_t __r21; + uint64_t __r22; + uint64_t __r23; + uint64_t __r24; + uint64_t __r25; + uint64_t __r26; + uint64_t __r27; + uint64_t __r28; + uint64_t __r29; + uint64_t __r30; + uint64_t __r31; + uint64_t __cr; // Condition register + uint64_t __xer; // User's integer exception register + uint64_t __lr; // Link register + uint64_t __ctr; // Count register + uint64_t __vrsave; // Vector Save Register + }; + + union ppc64_vsr_t { + struct asfloat_s { + double f; + uint64_t v2; + } asfloat; + v128 v; + }; + + ppc64_thread_state_t _registers; + ppc64_vsr_t _vectorScalarRegisters[64]; + + static int getVectorRegNum(int num); +}; + +inline Registers_ppc64::Registers_ppc64(const void *registers) { + static_assert((check_fit::does_fit), + "ppc64 registers do not fit into unw_context_t"); + memcpy(&_registers, static_cast(registers), + sizeof(_registers)); + static_assert(sizeof(_registers) == 312, + "expected vector scalar register offset to be 312"); + memcpy(&_vectorScalarRegisters, + static_cast(registers) + sizeof(_registers), + sizeof(_vectorScalarRegisters)); + static_assert(sizeof(_registers) + + sizeof(_vectorScalarRegisters) == 1336, + "expected vector register offset to be 1336 bytes"); +} + +inline Registers_ppc64::Registers_ppc64() { + memset(&_registers, 0, sizeof(_registers)); + memset(&_vectorScalarRegisters, 0, sizeof(_vectorScalarRegisters)); +} + +inline bool Registers_ppc64::validRegister(int regNum) const { + switch (regNum) { + case UNW_REG_IP: + case UNW_REG_SP: + case UNW_PPC64_XER: + case UNW_PPC64_LR: + case UNW_PPC64_CTR: + case UNW_PPC64_VRSAVE: + return true; + } + + if (regNum >= UNW_PPC64_R0 && regNum <= UNW_PPC64_R31) + return true; + if (regNum >= UNW_PPC64_CR0 && regNum <= UNW_PPC64_CR7) + return true; + + return false; +} + +inline uint64_t Registers_ppc64::getRegister(int regNum) const { + switch (regNum) { + case UNW_REG_IP: + return _registers.__srr0; + case UNW_PPC64_R0: + return _registers.__r0; + case UNW_PPC64_R1: + case UNW_REG_SP: + return _registers.__r1; + case UNW_PPC64_R2: + return _registers.__r2; + case UNW_PPC64_R3: + return _registers.__r3; + case UNW_PPC64_R4: + return _registers.__r4; + case UNW_PPC64_R5: + return _registers.__r5; + case UNW_PPC64_R6: + return _registers.__r6; + case UNW_PPC64_R7: + return _registers.__r7; + case UNW_PPC64_R8: + return _registers.__r8; + case UNW_PPC64_R9: + return _registers.__r9; + case UNW_PPC64_R10: + return _registers.__r10; + case UNW_PPC64_R11: + return _registers.__r11; + case UNW_PPC64_R12: + return _registers.__r12; + case UNW_PPC64_R13: + return _registers.__r13; + case UNW_PPC64_R14: + return _registers.__r14; + case UNW_PPC64_R15: + return _registers.__r15; + case UNW_PPC64_R16: + return _registers.__r16; + case UNW_PPC64_R17: + return _registers.__r17; + case UNW_PPC64_R18: + return _registers.__r18; + case UNW_PPC64_R19: + return _registers.__r19; + case UNW_PPC64_R20: + return _registers.__r20; + case UNW_PPC64_R21: + return _registers.__r21; + case UNW_PPC64_R22: + return _registers.__r22; + case UNW_PPC64_R23: + return _registers.__r23; + case UNW_PPC64_R24: + return _registers.__r24; + case UNW_PPC64_R25: + return _registers.__r25; + case UNW_PPC64_R26: + return _registers.__r26; + case UNW_PPC64_R27: + return _registers.__r27; + case UNW_PPC64_R28: + return _registers.__r28; + case UNW_PPC64_R29: + return _registers.__r29; + case UNW_PPC64_R30: + return _registers.__r30; + case UNW_PPC64_R31: + return _registers.__r31; + case UNW_PPC64_CR0: + return (_registers.__cr & 0xF0000000); + case UNW_PPC64_CR1: + return (_registers.__cr & 0x0F000000); + case UNW_PPC64_CR2: + return (_registers.__cr & 0x00F00000); + case UNW_PPC64_CR3: + return (_registers.__cr & 0x000F0000); + case UNW_PPC64_CR4: + return (_registers.__cr & 0x0000F000); + case UNW_PPC64_CR5: + return (_registers.__cr & 0x00000F00); + case UNW_PPC64_CR6: + return (_registers.__cr & 0x000000F0); + case UNW_PPC64_CR7: + return (_registers.__cr & 0x0000000F); + case UNW_PPC64_XER: + return _registers.__xer; + case UNW_PPC64_LR: + return _registers.__lr; + case UNW_PPC64_CTR: + return _registers.__ctr; + case UNW_PPC64_VRSAVE: + return _registers.__vrsave; + } + _LIBUNWIND_ABORT("unsupported ppc64 register"); +} + +inline void Registers_ppc64::setRegister(int regNum, uint64_t value) { + switch (regNum) { + case UNW_REG_IP: + _registers.__srr0 = value; + return; + case UNW_PPC64_R0: + _registers.__r0 = value; + return; + case UNW_PPC64_R1: + case UNW_REG_SP: + _registers.__r1 = value; + return; + case UNW_PPC64_R2: + _registers.__r2 = value; + return; + case UNW_PPC64_R3: + _registers.__r3 = value; + return; + case UNW_PPC64_R4: + _registers.__r4 = value; + return; + case UNW_PPC64_R5: + _registers.__r5 = value; + return; + case UNW_PPC64_R6: + _registers.__r6 = value; + return; + case UNW_PPC64_R7: + _registers.__r7 = value; + return; + case UNW_PPC64_R8: + _registers.__r8 = value; + return; + case UNW_PPC64_R9: + _registers.__r9 = value; + return; + case UNW_PPC64_R10: + _registers.__r10 = value; + return; + case UNW_PPC64_R11: + _registers.__r11 = value; + return; + case UNW_PPC64_R12: + _registers.__r12 = value; + return; + case UNW_PPC64_R13: + _registers.__r13 = value; + return; + case UNW_PPC64_R14: + _registers.__r14 = value; + return; + case UNW_PPC64_R15: + _registers.__r15 = value; + return; + case UNW_PPC64_R16: + _registers.__r16 = value; + return; + case UNW_PPC64_R17: + _registers.__r17 = value; + return; + case UNW_PPC64_R18: + _registers.__r18 = value; + return; + case UNW_PPC64_R19: + _registers.__r19 = value; + return; + case UNW_PPC64_R20: + _registers.__r20 = value; + return; + case UNW_PPC64_R21: + _registers.__r21 = value; + return; + case UNW_PPC64_R22: + _registers.__r22 = value; + return; + case UNW_PPC64_R23: + _registers.__r23 = value; + return; + case UNW_PPC64_R24: + _registers.__r24 = value; + return; + case UNW_PPC64_R25: + _registers.__r25 = value; + return; + case UNW_PPC64_R26: + _registers.__r26 = value; + return; + case UNW_PPC64_R27: + _registers.__r27 = value; + return; + case UNW_PPC64_R28: + _registers.__r28 = value; + return; + case UNW_PPC64_R29: + _registers.__r29 = value; + return; + case UNW_PPC64_R30: + _registers.__r30 = value; + return; + case UNW_PPC64_R31: + _registers.__r31 = value; + return; + case UNW_PPC64_CR0: + _registers.__cr &= 0x0FFFFFFF; + _registers.__cr |= (value & 0xF0000000); + return; + case UNW_PPC64_CR1: + _registers.__cr &= 0xF0FFFFFF; + _registers.__cr |= (value & 0x0F000000); + return; + case UNW_PPC64_CR2: + _registers.__cr &= 0xFF0FFFFF; + _registers.__cr |= (value & 0x00F00000); + return; + case UNW_PPC64_CR3: + _registers.__cr &= 0xFFF0FFFF; + _registers.__cr |= (value & 0x000F0000); + return; + case UNW_PPC64_CR4: + _registers.__cr &= 0xFFFF0FFF; + _registers.__cr |= (value & 0x0000F000); + return; + case UNW_PPC64_CR5: + _registers.__cr &= 0xFFFFF0FF; + _registers.__cr |= (value & 0x00000F00); + return; + case UNW_PPC64_CR6: + _registers.__cr &= 0xFFFFFF0F; + _registers.__cr |= (value & 0x000000F0); + return; + case UNW_PPC64_CR7: + _registers.__cr &= 0xFFFFFFF0; + _registers.__cr |= (value & 0x0000000F); + return; + case UNW_PPC64_XER: + _registers.__xer = value; + return; + case UNW_PPC64_LR: + _registers.__lr = value; + return; + case UNW_PPC64_CTR: + _registers.__ctr = value; + return; + case UNW_PPC64_VRSAVE: + _registers.__vrsave = value; + return; + } + _LIBUNWIND_ABORT("unsupported ppc64 register"); +} + +inline bool Registers_ppc64::validFloatRegister(int regNum) const { + return regNum >= UNW_PPC64_F0 && regNum <= UNW_PPC64_F31; +} + +inline double Registers_ppc64::getFloatRegister(int regNum) const { + assert(validFloatRegister(regNum)); + return _vectorScalarRegisters[regNum - UNW_PPC64_F0].asfloat.f; +} + +inline void Registers_ppc64::setFloatRegister(int regNum, double value) { + assert(validFloatRegister(regNum)); + _vectorScalarRegisters[regNum - UNW_PPC64_F0].asfloat.f = value; +} + +inline bool Registers_ppc64::validVectorRegister(int regNum) const { +#ifdef PPC64_HAS_VMX + if (regNum >= UNW_PPC64_VS0 && regNum <= UNW_PPC64_VS31) + return true; + if (regNum >= UNW_PPC64_VS32 && regNum <= UNW_PPC64_VS63) + return true; +#else + if (regNum >= UNW_PPC64_V0 && regNum <= UNW_PPC64_V31) + return true; +#endif + return false; +} + +inline int Registers_ppc64::getVectorRegNum(int num) +{ + if (num >= UNW_PPC64_VS0 && num <= UNW_PPC64_VS31) + return num - UNW_PPC64_VS0; + else + return num - UNW_PPC64_VS32 + 32; +} + +inline v128 Registers_ppc64::getVectorRegister(int regNum) const { + assert(validVectorRegister(regNum)); + return _vectorScalarRegisters[getVectorRegNum(regNum)].v; +} + +inline void Registers_ppc64::setVectorRegister(int regNum, v128 value) { + assert(validVectorRegister(regNum)); + _vectorScalarRegisters[getVectorRegNum(regNum)].v = value; +} + +inline const char *Registers_ppc64::getRegisterName(int regNum) { + switch (regNum) { + case UNW_REG_IP: + return "ip"; + case UNW_REG_SP: + return "sp"; + case UNW_PPC64_R0: + return "r0"; + case UNW_PPC64_R1: + return "r1"; + case UNW_PPC64_R2: + return "r2"; + case UNW_PPC64_R3: + return "r3"; + case UNW_PPC64_R4: + return "r4"; + case UNW_PPC64_R5: + return "r5"; + case UNW_PPC64_R6: + return "r6"; + case UNW_PPC64_R7: + return "r7"; + case UNW_PPC64_R8: + return "r8"; + case UNW_PPC64_R9: + return "r9"; + case UNW_PPC64_R10: + return "r10"; + case UNW_PPC64_R11: + return "r11"; + case UNW_PPC64_R12: + return "r12"; + case UNW_PPC64_R13: + return "r13"; + case UNW_PPC64_R14: + return "r14"; + case UNW_PPC64_R15: + return "r15"; + case UNW_PPC64_R16: + return "r16"; + case UNW_PPC64_R17: + return "r17"; + case UNW_PPC64_R18: + return "r18"; + case UNW_PPC64_R19: + return "r19"; + case UNW_PPC64_R20: + return "r20"; + case UNW_PPC64_R21: + return "r21"; + case UNW_PPC64_R22: + return "r22"; + case UNW_PPC64_R23: + return "r23"; + case UNW_PPC64_R24: + return "r24"; + case UNW_PPC64_R25: + return "r25"; + case UNW_PPC64_R26: + return "r26"; + case UNW_PPC64_R27: + return "r27"; + case UNW_PPC64_R28: + return "r28"; + case UNW_PPC64_R29: + return "r29"; + case UNW_PPC64_R30: + return "r30"; + case UNW_PPC64_R31: + return "r31"; + case UNW_PPC64_CR0: + return "cr0"; + case UNW_PPC64_CR1: + return "cr1"; + case UNW_PPC64_CR2: + return "cr2"; + case UNW_PPC64_CR3: + return "cr3"; + case UNW_PPC64_CR4: + return "cr4"; + case UNW_PPC64_CR5: + return "cr5"; + case UNW_PPC64_CR6: + return "cr6"; + case UNW_PPC64_CR7: + return "cr7"; + case UNW_PPC64_XER: + return "xer"; + case UNW_PPC64_LR: + return "lr"; + case UNW_PPC64_CTR: + return "ctr"; + case UNW_PPC64_VRSAVE: + return "vrsave"; + case UNW_PPC64_F0: + return "fp0"; + case UNW_PPC64_F1: + return "fp1"; + case UNW_PPC64_F2: + return "fp2"; + case UNW_PPC64_F3: + return "fp3"; + case UNW_PPC64_F4: + return "fp4"; + case UNW_PPC64_F5: + return "fp5"; + case UNW_PPC64_F6: + return "fp6"; + case UNW_PPC64_F7: + return "fp7"; + case UNW_PPC64_F8: + return "fp8"; + case UNW_PPC64_F9: + return "fp9"; + case UNW_PPC64_F10: + return "fp10"; + case UNW_PPC64_F11: + return "fp11"; + case UNW_PPC64_F12: + return "fp12"; + case UNW_PPC64_F13: + return "fp13"; + case UNW_PPC64_F14: + return "fp14"; + case UNW_PPC64_F15: + return "fp15"; + case UNW_PPC64_F16: + return "fp16"; + case UNW_PPC64_F17: + return "fp17"; + case UNW_PPC64_F18: + return "fp18"; + case UNW_PPC64_F19: + return "fp19"; + case UNW_PPC64_F20: + return "fp20"; + case UNW_PPC64_F21: + return "fp21"; + case UNW_PPC64_F22: + return "fp22"; + case UNW_PPC64_F23: + return "fp23"; + case UNW_PPC64_F24: + return "fp24"; + case UNW_PPC64_F25: + return "fp25"; + case UNW_PPC64_F26: + return "fp26"; + case UNW_PPC64_F27: + return "fp27"; + case UNW_PPC64_F28: + return "fp28"; + case UNW_PPC64_F29: + return "fp29"; + case UNW_PPC64_F30: + return "fp30"; + case UNW_PPC64_F31: + return "fp31"; + case UNW_PPC64_V0: + return "v0"; + case UNW_PPC64_V1: + return "v1"; + case UNW_PPC64_V2: + return "v2"; + case UNW_PPC64_V3: + return "v3"; + case UNW_PPC64_V4: + return "v4"; + case UNW_PPC64_V5: + return "v5"; + case UNW_PPC64_V6: + return "v6"; + case UNW_PPC64_V7: + return "v7"; + case UNW_PPC64_V8: + return "v8"; + case UNW_PPC64_V9: + return "v9"; + case UNW_PPC64_V10: + return "v10"; + case UNW_PPC64_V11: + return "v11"; + case UNW_PPC64_V12: + return "v12"; + case UNW_PPC64_V13: + return "v13"; + case UNW_PPC64_V14: + return "v14"; + case UNW_PPC64_V15: + return "v15"; + case UNW_PPC64_V16: + return "v16"; + case UNW_PPC64_V17: + return "v17"; + case UNW_PPC64_V18: + return "v18"; + case UNW_PPC64_V19: + return "v19"; + case UNW_PPC64_V20: + return "v20"; + case UNW_PPC64_V21: + return "v21"; + case UNW_PPC64_V22: + return "v22"; + case UNW_PPC64_V23: + return "v23"; + case UNW_PPC64_V24: + return "v24"; + case UNW_PPC64_V25: + return "v25"; + case UNW_PPC64_V26: + return "v26"; + case UNW_PPC64_V27: + return "v27"; + case UNW_PPC64_V28: + return "v28"; + case UNW_PPC64_V29: + return "v29"; + case UNW_PPC64_V30: + return "v30"; + case UNW_PPC64_V31: + return "v31"; + } + return "unknown register"; +} +#endif // _LIBUNWIND_TARGET_PPC64 + + +#if defined(_LIBUNWIND_TARGET_AARCH64) +/// Registers_arm64 holds the register state of a thread in a 64-bit arm +/// process. +class _LIBUNWIND_HIDDEN Registers_arm64 { +public: + Registers_arm64(); + Registers_arm64(const void *registers); + + bool validRegister(int num) const; + uint64_t getRegister(int num) const; + void setRegister(int num, uint64_t value, uint64_t location); + bool validFloatRegister(int num) const; + double getFloatRegister(int num) const; + void setFloatRegister(int num, double value); + bool validVectorRegister(int num) const; + v128 getVectorRegister(int num) const; + void setVectorRegister(int num, v128 value); + uint64_t getRegisterLocation(int regNum) const; + static const char *getRegisterName(int num); + void jumpto(); + static int lastDwarfRegNum() { return _LIBUNWIND_HIGHEST_DWARF_REGISTER_ARM64; } + static int getArch() { return REGISTERS_ARM64; } + + uint64_t getSP() const { return _registers.__sp; } + void setSP(uint64_t value, uint64_t location) { _registers.__sp = value; } + uint64_t getIP() const { return _registers.__pc; } + void setIP(uint64_t value, uint64_t location) { _registers.__pc = value; } + uint64_t getFP() const { return _registers.__fp; } + void setFP(uint64_t value, uint64_t location) { _registers.__fp = value; } + +private: + struct GPRs { + uint64_t __x[29]; // x0-x28 + uint64_t __fp; // Frame pointer x29 + uint64_t __lr; // Link register x30 + uint64_t __sp; // Stack pointer x31 + uint64_t __pc; // Program counter + uint64_t __ra_sign_state; // RA sign state register + }; + + struct GPRLocations { + uint64_t __x[29]; // x0-x28 + uint64_t __fp; // Frame pointer x29 + uint64_t __lr; // Link register x30 + uint64_t __sp; // Stack pointer x31 + uint64_t __pc; // Program counter + uint64_t padding; // 16-byte align + }; + + GPRs _registers; + GPRLocations _registerLocations; + double _vectorHalfRegisters[32]; + // Currently only the lower double in 128-bit vectore registers + // is perserved during unwinding. We could define new register + // numbers (> 96) which mean whole vector registers, then this + // struct would need to change to contain whole vector registers. +}; + +inline Registers_arm64::Registers_arm64(const void *registers) { + static_assert((check_fit::does_fit), + "arm64 registers do not fit into unw_context_t"); + memcpy(&_registers, registers, sizeof(_registers)); + memset(&_registerLocations, 0, sizeof(_registerLocations)); + static_assert( + sizeof(GPRs) == 0x110, + "expected VFP registers to be at offset 272"); + memcpy(_vectorHalfRegisters, + static_cast(registers) + sizeof(GPRs), + sizeof(_vectorHalfRegisters)); +} + +inline Registers_arm64::Registers_arm64() { + memset(&_registers, 0, sizeof(_registers)); + memset(&_registerLocations, 0, sizeof(_registerLocations)); + memset(&_vectorHalfRegisters, 0, sizeof(_vectorHalfRegisters)); +} + +inline bool Registers_arm64::validRegister(int regNum) const { + if (regNum == UNW_REG_IP) + return true; + if (regNum == UNW_REG_SP) + return true; + if (regNum < 0) + return false; + if (regNum > 95) + return false; + if (regNum == UNW_ARM64_RA_SIGN_STATE) + return true; + if ((regNum > 31) && (regNum < 64)) + return false; + return true; +} + +inline uint64_t Registers_arm64::getRegister(int regNum) const { + if (regNum == UNW_REG_IP) + return _registers.__pc; + if (regNum == UNW_REG_SP) + return _registers.__sp; + if (regNum == UNW_ARM64_RA_SIGN_STATE) + return _registers.__ra_sign_state; + if ((regNum >= 0) && (regNum < 32)) + return _registers.__x[regNum]; + _LIBUNWIND_ABORT("unsupported arm64 register"); +} + +inline void Registers_arm64::setRegister(int regNum, uint64_t value, uint64_t location) { + if (regNum == UNW_REG_IP) { + _registers.__pc = value; + _registerLocations.__pc = location; + } + else if (regNum == UNW_REG_SP) { + _registers.__sp = value; + _registerLocations.__sp = location; + } + else if (regNum == UNW_ARM64_RA_SIGN_STATE) + _registers.__ra_sign_state = value; + else if ((regNum >= 0) && (regNum < 32)) { + _registers.__x[regNum] = value; + _registerLocations.__x[regNum] = location; + } + else + _LIBUNWIND_ABORT("unsupported arm64 register"); +} + +inline uint64_t Registers_arm64::getRegisterLocation(int regNum) const { + if (regNum == UNW_REG_IP) + return _registerLocations.__pc; + if (regNum == UNW_REG_SP) + return _registerLocations.__sp; + if ((regNum >= 0) && (regNum < 32)) + return _registerLocations.__x[regNum]; + _LIBUNWIND_ABORT("unsupported arm64 register"); +} + +inline const char *Registers_arm64::getRegisterName(int regNum) { + switch (regNum) { + case UNW_REG_IP: + return "pc"; + case UNW_REG_SP: + return "sp"; + case UNW_ARM64_X0: + return "x0"; + case UNW_ARM64_X1: + return "x1"; + case UNW_ARM64_X2: + return "x2"; + case UNW_ARM64_X3: + return "x3"; + case UNW_ARM64_X4: + return "x4"; + case UNW_ARM64_X5: + return "x5"; + case UNW_ARM64_X6: + return "x6"; + case UNW_ARM64_X7: + return "x7"; + case UNW_ARM64_X8: + return "x8"; + case UNW_ARM64_X9: + return "x9"; + case UNW_ARM64_X10: + return "x10"; + case UNW_ARM64_X11: + return "x11"; + case UNW_ARM64_X12: + return "x12"; + case UNW_ARM64_X13: + return "x13"; + case UNW_ARM64_X14: + return "x14"; + case UNW_ARM64_X15: + return "x15"; + case UNW_ARM64_X16: + return "x16"; + case UNW_ARM64_X17: + return "x17"; + case UNW_ARM64_X18: + return "x18"; + case UNW_ARM64_X19: + return "x19"; + case UNW_ARM64_X20: + return "x20"; + case UNW_ARM64_X21: + return "x21"; + case UNW_ARM64_X22: + return "x22"; + case UNW_ARM64_X23: + return "x23"; + case UNW_ARM64_X24: + return "x24"; + case UNW_ARM64_X25: + return "x25"; + case UNW_ARM64_X26: + return "x26"; + case UNW_ARM64_X27: + return "x27"; + case UNW_ARM64_X28: + return "x28"; + case UNW_ARM64_X29: + return "fp"; + case UNW_ARM64_X30: + return "lr"; + case UNW_ARM64_X31: + return "sp"; + case UNW_ARM64_D0: + return "d0"; + case UNW_ARM64_D1: + return "d1"; + case UNW_ARM64_D2: + return "d2"; + case UNW_ARM64_D3: + return "d3"; + case UNW_ARM64_D4: + return "d4"; + case UNW_ARM64_D5: + return "d5"; + case UNW_ARM64_D6: + return "d6"; + case UNW_ARM64_D7: + return "d7"; + case UNW_ARM64_D8: + return "d8"; + case UNW_ARM64_D9: + return "d9"; + case UNW_ARM64_D10: + return "d10"; + case UNW_ARM64_D11: + return "d11"; + case UNW_ARM64_D12: + return "d12"; + case UNW_ARM64_D13: + return "d13"; + case UNW_ARM64_D14: + return "d14"; + case UNW_ARM64_D15: + return "d15"; + case UNW_ARM64_D16: + return "d16"; + case UNW_ARM64_D17: + return "d17"; + case UNW_ARM64_D18: + return "d18"; + case UNW_ARM64_D19: + return "d19"; + case UNW_ARM64_D20: + return "d20"; + case UNW_ARM64_D21: + return "d21"; + case UNW_ARM64_D22: + return "d22"; + case UNW_ARM64_D23: + return "d23"; + case UNW_ARM64_D24: + return "d24"; + case UNW_ARM64_D25: + return "d25"; + case UNW_ARM64_D26: + return "d26"; + case UNW_ARM64_D27: + return "d27"; + case UNW_ARM64_D28: + return "d28"; + case UNW_ARM64_D29: + return "d29"; + case UNW_ARM64_D30: + return "d30"; + case UNW_ARM64_D31: + return "d31"; + default: + return "unknown register"; + } +} + +inline bool Registers_arm64::validFloatRegister(int regNum) const { + if (regNum < UNW_ARM64_D0) + return false; + if (regNum > UNW_ARM64_D31) + return false; + return true; +} + +inline double Registers_arm64::getFloatRegister(int regNum) const { + assert(validFloatRegister(regNum)); + return _vectorHalfRegisters[regNum - UNW_ARM64_D0]; +} + +inline void Registers_arm64::setFloatRegister(int regNum, double value) { + assert(validFloatRegister(regNum)); + _vectorHalfRegisters[regNum - UNW_ARM64_D0] = value; +} + +inline bool Registers_arm64::validVectorRegister(int) const { + return false; +} + +inline v128 Registers_arm64::getVectorRegister(int) const { + _LIBUNWIND_ABORT("no arm64 vector register support yet"); +} + +inline void Registers_arm64::setVectorRegister(int, v128) { + _LIBUNWIND_ABORT("no arm64 vector register support yet"); +} +#endif // _LIBUNWIND_TARGET_AARCH64 + +#if defined(_LIBUNWIND_TARGET_ARM) +/// Registers_arm holds the register state of a thread in a 32-bit arm +/// process. +/// +/// NOTE: Assumes VFPv3. On ARM processors without a floating point unit, +/// this uses more memory than required. +class _LIBUNWIND_HIDDEN Registers_arm { +public: + Registers_arm(); + Registers_arm(const void *registers); + + bool validRegister(int num) const; + uint32_t getRegister(int num) const; + void setRegister(int num, uint32_t value, uint32_t location); + uint32_t getRegisterLocation(int num) const; + bool validFloatRegister(int num) const; + unw_fpreg_t getFloatRegister(int num); + void setFloatRegister(int num, unw_fpreg_t value); + bool validVectorRegister(int num) const; + v128 getVectorRegister(int num) const; + void setVectorRegister(int num, v128 value); + static const char *getRegisterName(int num); + void jumpto() { + restoreSavedFloatRegisters(); + restoreCoreAndJumpTo(); + } + static int lastDwarfRegNum() { return _LIBUNWIND_HIGHEST_DWARF_REGISTER_ARM; } + static int getArch() { return REGISTERS_ARM; } + + uint32_t getSP() const { return _registers.__sp; } + void setSP(uint32_t value, uint32_t location) { _registers.__sp = value; _registerLocations.__sp = location; } + uint32_t getIP() const { return _registers.__pc; } + void setIP(uint32_t value, uint32_t location) { _registers.__pc = value; _registerLocations.__pc = location; } + + void saveVFPAsX() { + assert(_use_X_for_vfp_save || !_saved_vfp_d0_d15); + _use_X_for_vfp_save = true; + } + + void restoreSavedFloatRegisters() { + if (_saved_vfp_d0_d15) { + if (_use_X_for_vfp_save) + restoreVFPWithFLDMX(_vfp_d0_d15_pad); + else + restoreVFPWithFLDMD(_vfp_d0_d15_pad); + } + if (_saved_vfp_d16_d31) + restoreVFPv3(_vfp_d16_d31); +#if defined(__ARM_WMMX) + if (_saved_iwmmx) + restoreiWMMX(_iwmmx); + if (_saved_iwmmx_control) + restoreiWMMXControl(_iwmmx_control); +#endif + } + +private: + struct GPRs { + uint32_t __r[13]; // r0-r12 + uint32_t __sp; // Stack pointer r13 + uint32_t __lr; // Link register r14 + uint32_t __pc; // Program counter r15 + }; + + struct GPRLocations { + uint32_t __r[13]; // r0-r12 + uint32_t __sp; // Stack pointer r13 + uint32_t __lr; // Link register r14 + uint32_t __pc; // Program counter r15 + }; + + static void saveVFPWithFSTMD(void *); + static void saveVFPWithFSTMX(void*); + static void saveVFPv3(void*); + static void restoreVFPWithFLDMD(void*); + static void restoreVFPWithFLDMX(void*); + static void restoreVFPv3(void*); +#if defined(__ARM_WMMX) + static void saveiWMMX(void*); + static void saveiWMMXControl(uint32_t*); + static void restoreiWMMX(void*); + static void restoreiWMMXControl(uint32_t*); +#endif + void restoreCoreAndJumpTo(); + + // ARM registers + GPRs _registers; + GPRLocations _registerLocations; + + // We save floating point registers lazily because we can't know ahead of + // time which ones are used. See EHABI #4.7. + + // Whether D0-D15 are saved in the FTSMX instead of FSTMD format. + // + // See EHABI #7.5 that explains how matching instruction sequences for load + // and store need to be used to correctly restore the exact register bits. + bool _use_X_for_vfp_save; + // Whether VFP D0-D15 are saved. + bool _saved_vfp_d0_d15; + // Whether VFPv3 D16-D31 are saved. + bool _saved_vfp_d16_d31; + // VFP registers D0-D15, + padding if saved using FSTMX + unw_fpreg_t _vfp_d0_d15_pad[17]; + // VFPv3 registers D16-D31, always saved using FSTMD + unw_fpreg_t _vfp_d16_d31[16]; +#if defined(__ARM_WMMX) + // Whether iWMMX data registers are saved. + bool _saved_iwmmx; + // Whether iWMMX control registers are saved. + mutable bool _saved_iwmmx_control; + // iWMMX registers + unw_fpreg_t _iwmmx[16]; + // iWMMX control registers + mutable uint32_t _iwmmx_control[4]; +#endif +}; + +inline Registers_arm::Registers_arm(const void *registers) + : _use_X_for_vfp_save(false), + _saved_vfp_d0_d15(false), + _saved_vfp_d16_d31(false) { + static_assert((check_fit::does_fit), + "arm registers do not fit into unw_context_t"); + // See __unw_getcontext() note about data. + memcpy(&_registers, registers, sizeof(_registers)); + memset(&_registerLocations, 0, sizeof(_registerLocations)); + memset(&_vfp_d0_d15_pad, 0, sizeof(_vfp_d0_d15_pad)); + memset(&_vfp_d16_d31, 0, sizeof(_vfp_d16_d31)); +#if defined(__ARM_WMMX) + _saved_iwmmx = false; + _saved_iwmmx_control = false; + memset(&_iwmmx, 0, sizeof(_iwmmx)); + memset(&_iwmmx_control, 0, sizeof(_iwmmx_control)); +#endif +} + +inline Registers_arm::Registers_arm() + : _use_X_for_vfp_save(false), + _saved_vfp_d0_d15(false), + _saved_vfp_d16_d31(false) { + memset(&_registers, 0, sizeof(_registers)); + memset(&_registerLocations, 0, sizeof(_registerLocations)); + memset(&_vfp_d0_d15_pad, 0, sizeof(_vfp_d0_d15_pad)); + memset(&_vfp_d16_d31, 0, sizeof(_vfp_d16_d31)); +#if defined(__ARM_WMMX) + _saved_iwmmx = false; + _saved_iwmmx_control = false; + memset(&_iwmmx, 0, sizeof(_iwmmx)); + memset(&_iwmmx_control, 0, sizeof(_iwmmx_control)); +#endif +} + +inline bool Registers_arm::validRegister(int regNum) const { + // Returns true for all non-VFP registers supported by the EHABI + // virtual register set (VRS). + if (regNum == UNW_REG_IP) + return true; + + if (regNum == UNW_REG_SP) + return true; + + if (regNum >= UNW_ARM_R0 && regNum <= UNW_ARM_R15) + return true; + +#if defined(__ARM_WMMX) + if (regNum >= UNW_ARM_WC0 && regNum <= UNW_ARM_WC3) + return true; +#endif + + return false; +} + +inline uint32_t Registers_arm::getRegister(int regNum) const { + if (regNum == UNW_REG_SP || regNum == UNW_ARM_SP) + return _registers.__sp; + + if (regNum == UNW_ARM_LR) + return _registers.__lr; + + if (regNum == UNW_REG_IP || regNum == UNW_ARM_IP) + return _registers.__pc; + + if (regNum >= UNW_ARM_R0 && regNum <= UNW_ARM_R12) + return _registers.__r[regNum]; + +#if defined(__ARM_WMMX) + if (regNum >= UNW_ARM_WC0 && regNum <= UNW_ARM_WC3) { + if (!_saved_iwmmx_control) { + _saved_iwmmx_control = true; + saveiWMMXControl(_iwmmx_control); + } + return _iwmmx_control[regNum - UNW_ARM_WC0]; + } +#endif + + _LIBUNWIND_ABORT("unsupported arm register"); +} + +inline void Registers_arm::setRegister(int regNum, uint32_t value, uint32_t location) { + if (regNum == UNW_REG_SP || regNum == UNW_ARM_SP) { + _registers.__sp = value; + _registerLocations.__sp = location; + return; + } + + if (regNum == UNW_ARM_LR) { + _registers.__lr = value; + _registerLocations.__lr = location; + return; + } + + if (regNum == UNW_REG_IP || regNum == UNW_ARM_IP) { + _registers.__pc = value; + _registerLocations.__pc = location; + return; + } + + if (regNum >= UNW_ARM_R0 && regNum <= UNW_ARM_R12) { + _registers.__r[regNum] = value; + _registerLocations.__r[regNum] = location; + return; + } + +#if defined(__ARM_WMMX) + if (regNum >= UNW_ARM_WC0 && regNum <= UNW_ARM_WC3) { + if (!_saved_iwmmx_control) { + _saved_iwmmx_control = true; + saveiWMMXControl(_iwmmx_control); + } + _iwmmx_control[regNum - UNW_ARM_WC0] = value; + return; + } +#endif + + _LIBUNWIND_ABORT("unsupported arm register"); +} + +inline uint32_t Registers_arm::getRegisterLocation(int regNum) const { + if (regNum == UNW_REG_SP || regNum == UNW_ARM_SP) + return _registerLocations.__sp; + + if (regNum == UNW_ARM_LR) + return _registerLocations.__lr; + + if (regNum == UNW_REG_IP || regNum == UNW_ARM_IP) + return _registerLocations.__pc; + + if (regNum >= UNW_ARM_R0 && regNum <= UNW_ARM_R12) + return _registerLocations.__r[regNum]; + + _LIBUNWIND_ABORT("unsupported arm register"); +} + +inline const char *Registers_arm::getRegisterName(int regNum) { + switch (regNum) { + case UNW_REG_IP: + case UNW_ARM_IP: // UNW_ARM_R15 is alias + return "pc"; + case UNW_ARM_LR: // UNW_ARM_R14 is alias + return "lr"; + case UNW_REG_SP: + case UNW_ARM_SP: // UNW_ARM_R13 is alias + return "sp"; + case UNW_ARM_R0: + return "r0"; + case UNW_ARM_R1: + return "r1"; + case UNW_ARM_R2: + return "r2"; + case UNW_ARM_R3: + return "r3"; + case UNW_ARM_R4: + return "r4"; + case UNW_ARM_R5: + return "r5"; + case UNW_ARM_R6: + return "r6"; + case UNW_ARM_R7: + return "r7"; + case UNW_ARM_R8: + return "r8"; + case UNW_ARM_R9: + return "r9"; + case UNW_ARM_R10: + return "r10"; + case UNW_ARM_R11: + return "r11"; + case UNW_ARM_R12: + return "r12"; + case UNW_ARM_S0: + return "s0"; + case UNW_ARM_S1: + return "s1"; + case UNW_ARM_S2: + return "s2"; + case UNW_ARM_S3: + return "s3"; + case UNW_ARM_S4: + return "s4"; + case UNW_ARM_S5: + return "s5"; + case UNW_ARM_S6: + return "s6"; + case UNW_ARM_S7: + return "s7"; + case UNW_ARM_S8: + return "s8"; + case UNW_ARM_S9: + return "s9"; + case UNW_ARM_S10: + return "s10"; + case UNW_ARM_S11: + return "s11"; + case UNW_ARM_S12: + return "s12"; + case UNW_ARM_S13: + return "s13"; + case UNW_ARM_S14: + return "s14"; + case UNW_ARM_S15: + return "s15"; + case UNW_ARM_S16: + return "s16"; + case UNW_ARM_S17: + return "s17"; + case UNW_ARM_S18: + return "s18"; + case UNW_ARM_S19: + return "s19"; + case UNW_ARM_S20: + return "s20"; + case UNW_ARM_S21: + return "s21"; + case UNW_ARM_S22: + return "s22"; + case UNW_ARM_S23: + return "s23"; + case UNW_ARM_S24: + return "s24"; + case UNW_ARM_S25: + return "s25"; + case UNW_ARM_S26: + return "s26"; + case UNW_ARM_S27: + return "s27"; + case UNW_ARM_S28: + return "s28"; + case UNW_ARM_S29: + return "s29"; + case UNW_ARM_S30: + return "s30"; + case UNW_ARM_S31: + return "s31"; + case UNW_ARM_D0: + return "d0"; + case UNW_ARM_D1: + return "d1"; + case UNW_ARM_D2: + return "d2"; + case UNW_ARM_D3: + return "d3"; + case UNW_ARM_D4: + return "d4"; + case UNW_ARM_D5: + return "d5"; + case UNW_ARM_D6: + return "d6"; + case UNW_ARM_D7: + return "d7"; + case UNW_ARM_D8: + return "d8"; + case UNW_ARM_D9: + return "d9"; + case UNW_ARM_D10: + return "d10"; + case UNW_ARM_D11: + return "d11"; + case UNW_ARM_D12: + return "d12"; + case UNW_ARM_D13: + return "d13"; + case UNW_ARM_D14: + return "d14"; + case UNW_ARM_D15: + return "d15"; + case UNW_ARM_D16: + return "d16"; + case UNW_ARM_D17: + return "d17"; + case UNW_ARM_D18: + return "d18"; + case UNW_ARM_D19: + return "d19"; + case UNW_ARM_D20: + return "d20"; + case UNW_ARM_D21: + return "d21"; + case UNW_ARM_D22: + return "d22"; + case UNW_ARM_D23: + return "d23"; + case UNW_ARM_D24: + return "d24"; + case UNW_ARM_D25: + return "d25"; + case UNW_ARM_D26: + return "d26"; + case UNW_ARM_D27: + return "d27"; + case UNW_ARM_D28: + return "d28"; + case UNW_ARM_D29: + return "d29"; + case UNW_ARM_D30: + return "d30"; + case UNW_ARM_D31: + return "d31"; + default: + return "unknown register"; + } +} + +inline bool Registers_arm::validFloatRegister(int regNum) const { + // NOTE: Consider the intel MMX registers floating points so the + // __unw_get_fpreg can be used to transmit the 64-bit data back. + return ((regNum >= UNW_ARM_D0) && (regNum <= UNW_ARM_D31)) +#if defined(__ARM_WMMX) + || ((regNum >= UNW_ARM_WR0) && (regNum <= UNW_ARM_WR15)) +#endif + ; +} + +inline unw_fpreg_t Registers_arm::getFloatRegister(int regNum) { + if (regNum >= UNW_ARM_D0 && regNum <= UNW_ARM_D15) { + if (!_saved_vfp_d0_d15) { + _saved_vfp_d0_d15 = true; + if (_use_X_for_vfp_save) + saveVFPWithFSTMX(_vfp_d0_d15_pad); + else + saveVFPWithFSTMD(_vfp_d0_d15_pad); + } + return _vfp_d0_d15_pad[regNum - UNW_ARM_D0]; + } + + if (regNum >= UNW_ARM_D16 && regNum <= UNW_ARM_D31) { + if (!_saved_vfp_d16_d31) { + _saved_vfp_d16_d31 = true; + saveVFPv3(_vfp_d16_d31); + } + return _vfp_d16_d31[regNum - UNW_ARM_D16]; + } + +#if defined(__ARM_WMMX) + if (regNum >= UNW_ARM_WR0 && regNum <= UNW_ARM_WR15) { + if (!_saved_iwmmx) { + _saved_iwmmx = true; + saveiWMMX(_iwmmx); + } + return _iwmmx[regNum - UNW_ARM_WR0]; + } +#endif + + _LIBUNWIND_ABORT("Unknown ARM float register"); +} + +inline void Registers_arm::setFloatRegister(int regNum, unw_fpreg_t value) { + if (regNum >= UNW_ARM_D0 && regNum <= UNW_ARM_D15) { + if (!_saved_vfp_d0_d15) { + _saved_vfp_d0_d15 = true; + if (_use_X_for_vfp_save) + saveVFPWithFSTMX(_vfp_d0_d15_pad); + else + saveVFPWithFSTMD(_vfp_d0_d15_pad); + } + _vfp_d0_d15_pad[regNum - UNW_ARM_D0] = value; + return; + } + + if (regNum >= UNW_ARM_D16 && regNum <= UNW_ARM_D31) { + if (!_saved_vfp_d16_d31) { + _saved_vfp_d16_d31 = true; + saveVFPv3(_vfp_d16_d31); + } + _vfp_d16_d31[regNum - UNW_ARM_D16] = value; + return; + } + +#if defined(__ARM_WMMX) + if (regNum >= UNW_ARM_WR0 && regNum <= UNW_ARM_WR15) { + if (!_saved_iwmmx) { + _saved_iwmmx = true; + saveiWMMX(_iwmmx); + } + _iwmmx[regNum - UNW_ARM_WR0] = value; + return; + } +#endif + + _LIBUNWIND_ABORT("Unknown ARM float register"); +} + +inline bool Registers_arm::validVectorRegister(int) const { + return false; +} + +inline v128 Registers_arm::getVectorRegister(int) const { + _LIBUNWIND_ABORT("ARM vector support not implemented"); +} + +inline void Registers_arm::setVectorRegister(int, v128) { + _LIBUNWIND_ABORT("ARM vector support not implemented"); +} +#endif // _LIBUNWIND_TARGET_ARM + + +#if defined(_LIBUNWIND_TARGET_OR1K) +/// Registers_or1k holds the register state of a thread in an OpenRISC1000 +/// process. +class _LIBUNWIND_HIDDEN Registers_or1k { +public: + Registers_or1k(); + Registers_or1k(const void *registers); + + bool validRegister(int num) const; + uint32_t getRegister(int num) const; + void setRegister(int num, uint32_t value); + bool validFloatRegister(int num) const; + double getFloatRegister(int num) const; + void setFloatRegister(int num, double value); + bool validVectorRegister(int num) const; + v128 getVectorRegister(int num) const; + void setVectorRegister(int num, v128 value); + static const char *getRegisterName(int num); + void jumpto(); + static int lastDwarfRegNum() { return _LIBUNWIND_HIGHEST_DWARF_REGISTER_OR1K; } + static int getArch() { return REGISTERS_OR1K; } + + uint64_t getSP() const { return _registers.__r[1]; } + void setSP(uint32_t value) { _registers.__r[1] = value; } + uint64_t getIP() const { return _registers.__pc; } + void setIP(uint32_t value) { _registers.__pc = value; } + +private: + struct or1k_thread_state_t { + unsigned int __r[32]; // r0-r31 + unsigned int __pc; // Program counter + unsigned int __epcr; // Program counter at exception + }; + + or1k_thread_state_t _registers; +}; + +inline Registers_or1k::Registers_or1k(const void *registers) { + static_assert((check_fit::does_fit), + "or1k registers do not fit into unw_context_t"); + memcpy(&_registers, static_cast(registers), + sizeof(_registers)); +} + +inline Registers_or1k::Registers_or1k() { + memset(&_registers, 0, sizeof(_registers)); +} + +inline bool Registers_or1k::validRegister(int regNum) const { + if (regNum == UNW_REG_IP) + return true; + if (regNum == UNW_REG_SP) + return true; + if (regNum < 0) + return false; + if (regNum <= UNW_OR1K_R31) + return true; + if (regNum == UNW_OR1K_EPCR) + return true; + return false; +} + +inline uint32_t Registers_or1k::getRegister(int regNum) const { + if (regNum >= UNW_OR1K_R0 && regNum <= UNW_OR1K_R31) + return _registers.__r[regNum - UNW_OR1K_R0]; + + switch (regNum) { + case UNW_REG_IP: + return _registers.__pc; + case UNW_REG_SP: + return _registers.__r[1]; + case UNW_OR1K_EPCR: + return _registers.__epcr; + } + _LIBUNWIND_ABORT("unsupported or1k register"); +} + +inline void Registers_or1k::setRegister(int regNum, uint32_t value) { + if (regNum >= UNW_OR1K_R0 && regNum <= UNW_OR1K_R31) { + _registers.__r[regNum - UNW_OR1K_R0] = value; + return; + } + + switch (regNum) { + case UNW_REG_IP: + _registers.__pc = value; + return; + case UNW_REG_SP: + _registers.__r[1] = value; + return; + case UNW_OR1K_EPCR: + _registers.__epcr = value; + return; + } + _LIBUNWIND_ABORT("unsupported or1k register"); +} + +inline bool Registers_or1k::validFloatRegister(int /* regNum */) const { + return false; +} + +inline double Registers_or1k::getFloatRegister(int /* regNum */) const { + _LIBUNWIND_ABORT("or1k float support not implemented"); +} + +inline void Registers_or1k::setFloatRegister(int /* regNum */, + double /* value */) { + _LIBUNWIND_ABORT("or1k float support not implemented"); +} + +inline bool Registers_or1k::validVectorRegister(int /* regNum */) const { + return false; +} + +inline v128 Registers_or1k::getVectorRegister(int /* regNum */) const { + _LIBUNWIND_ABORT("or1k vector support not implemented"); +} + +inline void Registers_or1k::setVectorRegister(int /* regNum */, v128 /* value */) { + _LIBUNWIND_ABORT("or1k vector support not implemented"); +} + +inline const char *Registers_or1k::getRegisterName(int regNum) { + switch (regNum) { + case UNW_OR1K_R0: + return "r0"; + case UNW_OR1K_R1: + return "r1"; + case UNW_OR1K_R2: + return "r2"; + case UNW_OR1K_R3: + return "r3"; + case UNW_OR1K_R4: + return "r4"; + case UNW_OR1K_R5: + return "r5"; + case UNW_OR1K_R6: + return "r6"; + case UNW_OR1K_R7: + return "r7"; + case UNW_OR1K_R8: + return "r8"; + case UNW_OR1K_R9: + return "r9"; + case UNW_OR1K_R10: + return "r10"; + case UNW_OR1K_R11: + return "r11"; + case UNW_OR1K_R12: + return "r12"; + case UNW_OR1K_R13: + return "r13"; + case UNW_OR1K_R14: + return "r14"; + case UNW_OR1K_R15: + return "r15"; + case UNW_OR1K_R16: + return "r16"; + case UNW_OR1K_R17: + return "r17"; + case UNW_OR1K_R18: + return "r18"; + case UNW_OR1K_R19: + return "r19"; + case UNW_OR1K_R20: + return "r20"; + case UNW_OR1K_R21: + return "r21"; + case UNW_OR1K_R22: + return "r22"; + case UNW_OR1K_R23: + return "r23"; + case UNW_OR1K_R24: + return "r24"; + case UNW_OR1K_R25: + return "r25"; + case UNW_OR1K_R26: + return "r26"; + case UNW_OR1K_R27: + return "r27"; + case UNW_OR1K_R28: + return "r28"; + case UNW_OR1K_R29: + return "r29"; + case UNW_OR1K_R30: + return "r30"; + case UNW_OR1K_R31: + return "r31"; + case UNW_OR1K_EPCR: + return "EPCR"; + default: + return "unknown register"; + } + +} +#endif // _LIBUNWIND_TARGET_OR1K + +#if defined(_LIBUNWIND_TARGET_MIPS_O32) +/// Registers_mips_o32 holds the register state of a thread in a 32-bit MIPS +/// process. +class _LIBUNWIND_HIDDEN Registers_mips_o32 { +public: + Registers_mips_o32(); + Registers_mips_o32(const void *registers); + + bool validRegister(int num) const; + uint32_t getRegister(int num) const; + void setRegister(int num, uint32_t value); + bool validFloatRegister(int num) const; + double getFloatRegister(int num) const; + void setFloatRegister(int num, double value); + bool validVectorRegister(int num) const; + v128 getVectorRegister(int num) const; + void setVectorRegister(int num, v128 value); + static const char *getRegisterName(int num); + void jumpto(); + static int lastDwarfRegNum() { return _LIBUNWIND_HIGHEST_DWARF_REGISTER_MIPS; } + static int getArch() { return REGISTERS_MIPS_O32; } + + uint32_t getSP() const { return _registers.__r[29]; } + void setSP(uint32_t value) { _registers.__r[29] = value; } + uint32_t getIP() const { return _registers.__pc; } + void setIP(uint32_t value) { _registers.__pc = value; } + +private: + struct mips_o32_thread_state_t { + uint32_t __r[32]; + uint32_t __pc; + uint32_t __hi; + uint32_t __lo; + }; + + mips_o32_thread_state_t _registers; +#ifdef __mips_hard_float + /// O32 with 32-bit floating point registers only uses half of this + /// space. However, using the same layout for 32-bit vs 64-bit + /// floating point registers results in a single context size for + /// O32 with hard float. + uint32_t _padding; + double _floats[32]; +#endif +}; + +inline Registers_mips_o32::Registers_mips_o32(const void *registers) { + static_assert((check_fit::does_fit), + "mips_o32 registers do not fit into unw_context_t"); + memcpy(&_registers, static_cast(registers), + sizeof(_registers)); +} + +inline Registers_mips_o32::Registers_mips_o32() { + memset(&_registers, 0, sizeof(_registers)); +} + +inline bool Registers_mips_o32::validRegister(int regNum) const { + if (regNum == UNW_REG_IP) + return true; + if (regNum == UNW_REG_SP) + return true; + if (regNum < 0) + return false; + if (regNum <= UNW_MIPS_R31) + return true; +#if __mips_isa_rev != 6 + if (regNum == UNW_MIPS_HI) + return true; + if (regNum == UNW_MIPS_LO) + return true; +#endif +#if defined(__mips_hard_float) && __mips_fpr == 32 + if (regNum >= UNW_MIPS_F0 && regNum <= UNW_MIPS_F31) + return true; +#endif + // FIXME: DSP accumulator registers, MSA registers + return false; +} + +inline uint32_t Registers_mips_o32::getRegister(int regNum) const { + if (regNum >= UNW_MIPS_R0 && regNum <= UNW_MIPS_R31) + return _registers.__r[regNum - UNW_MIPS_R0]; +#if defined(__mips_hard_float) && __mips_fpr == 32 + if (regNum >= UNW_MIPS_F0 && regNum <= UNW_MIPS_F31) { + uint32_t *p; + + if (regNum % 2 == 0) + p = (uint32_t *)&_floats[regNum - UNW_MIPS_F0]; + else + p = (uint32_t *)&_floats[(regNum - 1) - UNW_MIPS_F0] + 1; + return *p; + } +#endif + + switch (regNum) { + case UNW_REG_IP: + return _registers.__pc; + case UNW_REG_SP: + return _registers.__r[29]; + case UNW_MIPS_HI: + return _registers.__hi; + case UNW_MIPS_LO: + return _registers.__lo; + } + _LIBUNWIND_ABORT("unsupported mips_o32 register"); +} + +inline void Registers_mips_o32::setRegister(int regNum, uint32_t value) { + if (regNum >= UNW_MIPS_R0 && regNum <= UNW_MIPS_R31) { + _registers.__r[regNum - UNW_MIPS_R0] = value; + return; + } +#if defined(__mips_hard_float) && __mips_fpr == 32 + if (regNum >= UNW_MIPS_F0 && regNum <= UNW_MIPS_F31) { + uint32_t *p; + + if (regNum % 2 == 0) + p = (uint32_t *)&_floats[regNum - UNW_MIPS_F0]; + else + p = (uint32_t *)&_floats[(regNum - 1) - UNW_MIPS_F0] + 1; + *p = value; + return; + } +#endif + + switch (regNum) { + case UNW_REG_IP: + _registers.__pc = value; + return; + case UNW_REG_SP: + _registers.__r[29] = value; + return; + case UNW_MIPS_HI: + _registers.__hi = value; + return; + case UNW_MIPS_LO: + _registers.__lo = value; + return; + } + _LIBUNWIND_ABORT("unsupported mips_o32 register"); +} + +inline bool Registers_mips_o32::validFloatRegister(int regNum) const { +#if defined(__mips_hard_float) && __mips_fpr == 64 + if (regNum >= UNW_MIPS_F0 && regNum <= UNW_MIPS_F31) + return true; +#endif + return false; +} + +inline double Registers_mips_o32::getFloatRegister(int regNum) const { +#if defined(__mips_hard_float) && __mips_fpr == 64 + assert(validFloatRegister(regNum)); + return _floats[regNum - UNW_MIPS_F0]; +#else + _LIBUNWIND_ABORT("mips_o32 float support not implemented"); +#endif +} + +inline void Registers_mips_o32::setFloatRegister(int regNum, + double value) { +#if defined(__mips_hard_float) && __mips_fpr == 64 + assert(validFloatRegister(regNum)); + _floats[regNum - UNW_MIPS_F0] = value; +#else + _LIBUNWIND_ABORT("mips_o32 float support not implemented"); +#endif +} + +inline bool Registers_mips_o32::validVectorRegister(int /* regNum */) const { + return false; +} + +inline v128 Registers_mips_o32::getVectorRegister(int /* regNum */) const { + _LIBUNWIND_ABORT("mips_o32 vector support not implemented"); +} + +inline void Registers_mips_o32::setVectorRegister(int /* regNum */, v128 /* value */) { + _LIBUNWIND_ABORT("mips_o32 vector support not implemented"); +} + +inline const char *Registers_mips_o32::getRegisterName(int regNum) { + switch (regNum) { + case UNW_MIPS_R0: + return "$0"; + case UNW_MIPS_R1: + return "$1"; + case UNW_MIPS_R2: + return "$2"; + case UNW_MIPS_R3: + return "$3"; + case UNW_MIPS_R4: + return "$4"; + case UNW_MIPS_R5: + return "$5"; + case UNW_MIPS_R6: + return "$6"; + case UNW_MIPS_R7: + return "$7"; + case UNW_MIPS_R8: + return "$8"; + case UNW_MIPS_R9: + return "$9"; + case UNW_MIPS_R10: + return "$10"; + case UNW_MIPS_R11: + return "$11"; + case UNW_MIPS_R12: + return "$12"; + case UNW_MIPS_R13: + return "$13"; + case UNW_MIPS_R14: + return "$14"; + case UNW_MIPS_R15: + return "$15"; + case UNW_MIPS_R16: + return "$16"; + case UNW_MIPS_R17: + return "$17"; + case UNW_MIPS_R18: + return "$18"; + case UNW_MIPS_R19: + return "$19"; + case UNW_MIPS_R20: + return "$20"; + case UNW_MIPS_R21: + return "$21"; + case UNW_MIPS_R22: + return "$22"; + case UNW_MIPS_R23: + return "$23"; + case UNW_MIPS_R24: + return "$24"; + case UNW_MIPS_R25: + return "$25"; + case UNW_MIPS_R26: + return "$26"; + case UNW_MIPS_R27: + return "$27"; + case UNW_MIPS_R28: + return "$28"; + case UNW_MIPS_R29: + return "$29"; + case UNW_MIPS_R30: + return "$30"; + case UNW_MIPS_R31: + return "$31"; + case UNW_MIPS_F0: + return "$f0"; + case UNW_MIPS_F1: + return "$f1"; + case UNW_MIPS_F2: + return "$f2"; + case UNW_MIPS_F3: + return "$f3"; + case UNW_MIPS_F4: + return "$f4"; + case UNW_MIPS_F5: + return "$f5"; + case UNW_MIPS_F6: + return "$f6"; + case UNW_MIPS_F7: + return "$f7"; + case UNW_MIPS_F8: + return "$f8"; + case UNW_MIPS_F9: + return "$f9"; + case UNW_MIPS_F10: + return "$f10"; + case UNW_MIPS_F11: + return "$f11"; + case UNW_MIPS_F12: + return "$f12"; + case UNW_MIPS_F13: + return "$f13"; + case UNW_MIPS_F14: + return "$f14"; + case UNW_MIPS_F15: + return "$f15"; + case UNW_MIPS_F16: + return "$f16"; + case UNW_MIPS_F17: + return "$f17"; + case UNW_MIPS_F18: + return "$f18"; + case UNW_MIPS_F19: + return "$f19"; + case UNW_MIPS_F20: + return "$f20"; + case UNW_MIPS_F21: + return "$f21"; + case UNW_MIPS_F22: + return "$f22"; + case UNW_MIPS_F23: + return "$f23"; + case UNW_MIPS_F24: + return "$f24"; + case UNW_MIPS_F25: + return "$f25"; + case UNW_MIPS_F26: + return "$f26"; + case UNW_MIPS_F27: + return "$f27"; + case UNW_MIPS_F28: + return "$f28"; + case UNW_MIPS_F29: + return "$f29"; + case UNW_MIPS_F30: + return "$f30"; + case UNW_MIPS_F31: + return "$f31"; + case UNW_MIPS_HI: + return "$hi"; + case UNW_MIPS_LO: + return "$lo"; + default: + return "unknown register"; + } +} +#endif // _LIBUNWIND_TARGET_MIPS_O32 + +#if defined(_LIBUNWIND_TARGET_MIPS_NEWABI) +/// Registers_mips_newabi holds the register state of a thread in a +/// MIPS process using NEWABI (the N32 or N64 ABIs). +class _LIBUNWIND_HIDDEN Registers_mips_newabi { +public: + Registers_mips_newabi(); + Registers_mips_newabi(const void *registers); + + bool validRegister(int num) const; + uint64_t getRegister(int num) const; + void setRegister(int num, uint64_t value); + bool validFloatRegister(int num) const; + double getFloatRegister(int num) const; + void setFloatRegister(int num, double value); + bool validVectorRegister(int num) const; + v128 getVectorRegister(int num) const; + void setVectorRegister(int num, v128 value); + static const char *getRegisterName(int num); + void jumpto(); + static int lastDwarfRegNum() { return _LIBUNWIND_HIGHEST_DWARF_REGISTER_MIPS; } + static int getArch() { return REGISTERS_MIPS_NEWABI; } + + uint64_t getSP() const { return _registers.__r[29]; } + void setSP(uint64_t value) { _registers.__r[29] = value; } + uint64_t getIP() const { return _registers.__pc; } + void setIP(uint64_t value) { _registers.__pc = value; } + +private: + struct mips_newabi_thread_state_t { + uint64_t __r[32]; + uint64_t __pc; + uint64_t __hi; + uint64_t __lo; + }; + + mips_newabi_thread_state_t _registers; +#ifdef __mips_hard_float + double _floats[32]; +#endif +}; + +inline Registers_mips_newabi::Registers_mips_newabi(const void *registers) { + static_assert((check_fit::does_fit), + "mips_newabi registers do not fit into unw_context_t"); + memcpy(&_registers, static_cast(registers), + sizeof(_registers)); +} + +inline Registers_mips_newabi::Registers_mips_newabi() { + memset(&_registers, 0, sizeof(_registers)); +} + +inline bool Registers_mips_newabi::validRegister(int regNum) const { + if (regNum == UNW_REG_IP) + return true; + if (regNum == UNW_REG_SP) + return true; + if (regNum < 0) + return false; + if (regNum <= UNW_MIPS_R31) + return true; +#if __mips_isa_rev != 6 + if (regNum == UNW_MIPS_HI) + return true; + if (regNum == UNW_MIPS_LO) + return true; +#endif + // FIXME: Hard float, DSP accumulator registers, MSA registers + return false; +} + +inline uint64_t Registers_mips_newabi::getRegister(int regNum) const { + if (regNum >= UNW_MIPS_R0 && regNum <= UNW_MIPS_R31) + return _registers.__r[regNum - UNW_MIPS_R0]; + + switch (regNum) { + case UNW_REG_IP: + return _registers.__pc; + case UNW_REG_SP: + return _registers.__r[29]; + case UNW_MIPS_HI: + return _registers.__hi; + case UNW_MIPS_LO: + return _registers.__lo; + } + _LIBUNWIND_ABORT("unsupported mips_newabi register"); +} + +inline void Registers_mips_newabi::setRegister(int regNum, uint64_t value) { + if (regNum >= UNW_MIPS_R0 && regNum <= UNW_MIPS_R31) { + _registers.__r[regNum - UNW_MIPS_R0] = value; + return; + } + + switch (regNum) { + case UNW_REG_IP: + _registers.__pc = value; + return; + case UNW_REG_SP: + _registers.__r[29] = value; + return; + case UNW_MIPS_HI: + _registers.__hi = value; + return; + case UNW_MIPS_LO: + _registers.__lo = value; + return; + } + _LIBUNWIND_ABORT("unsupported mips_newabi register"); +} + +inline bool Registers_mips_newabi::validFloatRegister(int regNum) const { +#ifdef __mips_hard_float + if (regNum >= UNW_MIPS_F0 && regNum <= UNW_MIPS_F31) + return true; +#endif + return false; +} + +inline double Registers_mips_newabi::getFloatRegister(int regNum) const { +#ifdef __mips_hard_float + assert(validFloatRegister(regNum)); + return _floats[regNum - UNW_MIPS_F0]; +#else + _LIBUNWIND_ABORT("mips_newabi float support not implemented"); +#endif +} + +inline void Registers_mips_newabi::setFloatRegister(int regNum, + double value) { +#ifdef __mips_hard_float + assert(validFloatRegister(regNum)); + _floats[regNum - UNW_MIPS_F0] = value; +#else + _LIBUNWIND_ABORT("mips_newabi float support not implemented"); +#endif +} + +inline bool Registers_mips_newabi::validVectorRegister(int /* regNum */) const { + return false; +} + +inline v128 Registers_mips_newabi::getVectorRegister(int /* regNum */) const { + _LIBUNWIND_ABORT("mips_newabi vector support not implemented"); +} + +inline void Registers_mips_newabi::setVectorRegister(int /* regNum */, v128 /* value */) { + _LIBUNWIND_ABORT("mips_newabi vector support not implemented"); +} + +inline const char *Registers_mips_newabi::getRegisterName(int regNum) { + switch (regNum) { + case UNW_MIPS_R0: + return "$0"; + case UNW_MIPS_R1: + return "$1"; + case UNW_MIPS_R2: + return "$2"; + case UNW_MIPS_R3: + return "$3"; + case UNW_MIPS_R4: + return "$4"; + case UNW_MIPS_R5: + return "$5"; + case UNW_MIPS_R6: + return "$6"; + case UNW_MIPS_R7: + return "$7"; + case UNW_MIPS_R8: + return "$8"; + case UNW_MIPS_R9: + return "$9"; + case UNW_MIPS_R10: + return "$10"; + case UNW_MIPS_R11: + return "$11"; + case UNW_MIPS_R12: + return "$12"; + case UNW_MIPS_R13: + return "$13"; + case UNW_MIPS_R14: + return "$14"; + case UNW_MIPS_R15: + return "$15"; + case UNW_MIPS_R16: + return "$16"; + case UNW_MIPS_R17: + return "$17"; + case UNW_MIPS_R18: + return "$18"; + case UNW_MIPS_R19: + return "$19"; + case UNW_MIPS_R20: + return "$20"; + case UNW_MIPS_R21: + return "$21"; + case UNW_MIPS_R22: + return "$22"; + case UNW_MIPS_R23: + return "$23"; + case UNW_MIPS_R24: + return "$24"; + case UNW_MIPS_R25: + return "$25"; + case UNW_MIPS_R26: + return "$26"; + case UNW_MIPS_R27: + return "$27"; + case UNW_MIPS_R28: + return "$28"; + case UNW_MIPS_R29: + return "$29"; + case UNW_MIPS_R30: + return "$30"; + case UNW_MIPS_R31: + return "$31"; + case UNW_MIPS_F0: + return "$f0"; + case UNW_MIPS_F1: + return "$f1"; + case UNW_MIPS_F2: + return "$f2"; + case UNW_MIPS_F3: + return "$f3"; + case UNW_MIPS_F4: + return "$f4"; + case UNW_MIPS_F5: + return "$f5"; + case UNW_MIPS_F6: + return "$f6"; + case UNW_MIPS_F7: + return "$f7"; + case UNW_MIPS_F8: + return "$f8"; + case UNW_MIPS_F9: + return "$f9"; + case UNW_MIPS_F10: + return "$f10"; + case UNW_MIPS_F11: + return "$f11"; + case UNW_MIPS_F12: + return "$f12"; + case UNW_MIPS_F13: + return "$f13"; + case UNW_MIPS_F14: + return "$f14"; + case UNW_MIPS_F15: + return "$f15"; + case UNW_MIPS_F16: + return "$f16"; + case UNW_MIPS_F17: + return "$f17"; + case UNW_MIPS_F18: + return "$f18"; + case UNW_MIPS_F19: + return "$f19"; + case UNW_MIPS_F20: + return "$f20"; + case UNW_MIPS_F21: + return "$f21"; + case UNW_MIPS_F22: + return "$f22"; + case UNW_MIPS_F23: + return "$f23"; + case UNW_MIPS_F24: + return "$f24"; + case UNW_MIPS_F25: + return "$f25"; + case UNW_MIPS_F26: + return "$f26"; + case UNW_MIPS_F27: + return "$f27"; + case UNW_MIPS_F28: + return "$f28"; + case UNW_MIPS_F29: + return "$f29"; + case UNW_MIPS_F30: + return "$f30"; + case UNW_MIPS_F31: + return "$f31"; + case UNW_MIPS_HI: + return "$hi"; + case UNW_MIPS_LO: + return "$lo"; + default: + return "unknown register"; + } +} +#endif // _LIBUNWIND_TARGET_MIPS_NEWABI + +#if defined(_LIBUNWIND_TARGET_SPARC) +/// Registers_sparc holds the register state of a thread in a 32-bit Sparc +/// process. +class _LIBUNWIND_HIDDEN Registers_sparc { +public: + Registers_sparc(); + Registers_sparc(const void *registers); + + bool validRegister(int num) const; + uint32_t getRegister(int num) const; + void setRegister(int num, uint32_t value); + bool validFloatRegister(int num) const; + double getFloatRegister(int num) const; + void setFloatRegister(int num, double value); + bool validVectorRegister(int num) const; + v128 getVectorRegister(int num) const; + void setVectorRegister(int num, v128 value); + static const char *getRegisterName(int num); + void jumpto(); + static int lastDwarfRegNum() { return _LIBUNWIND_HIGHEST_DWARF_REGISTER_SPARC; } + static int getArch() { return REGISTERS_SPARC; } + + uint64_t getSP() const { return _registers.__regs[UNW_SPARC_O6]; } + void setSP(uint32_t value) { _registers.__regs[UNW_SPARC_O6] = value; } + uint64_t getIP() const { return _registers.__regs[UNW_SPARC_O7]; } + void setIP(uint32_t value) { _registers.__regs[UNW_SPARC_O7] = value; } + +private: + struct sparc_thread_state_t { + unsigned int __regs[32]; + }; + + sparc_thread_state_t _registers; +}; + +inline Registers_sparc::Registers_sparc(const void *registers) { + static_assert((check_fit::does_fit), + "sparc registers do not fit into unw_context_t"); + memcpy(&_registers, static_cast(registers), + sizeof(_registers)); +} + +inline Registers_sparc::Registers_sparc() { + memset(&_registers, 0, sizeof(_registers)); +} + +inline bool Registers_sparc::validRegister(int regNum) const { + if (regNum == UNW_REG_IP) + return true; + if (regNum == UNW_REG_SP) + return true; + if (regNum < 0) + return false; + if (regNum <= UNW_SPARC_I7) + return true; + return false; +} + +inline uint32_t Registers_sparc::getRegister(int regNum) const { + if ((UNW_SPARC_G0 <= regNum) && (regNum <= UNW_SPARC_I7)) { + return _registers.__regs[regNum]; + } + + switch (regNum) { + case UNW_REG_IP: + return _registers.__regs[UNW_SPARC_O7]; + case UNW_REG_SP: + return _registers.__regs[UNW_SPARC_O6]; + } + _LIBUNWIND_ABORT("unsupported sparc register"); +} + +inline void Registers_sparc::setRegister(int regNum, uint32_t value) { + if ((UNW_SPARC_G0 <= regNum) && (regNum <= UNW_SPARC_I7)) { + _registers.__regs[regNum] = value; + return; + } + + switch (regNum) { + case UNW_REG_IP: + _registers.__regs[UNW_SPARC_O7] = value; + return; + case UNW_REG_SP: + _registers.__regs[UNW_SPARC_O6] = value; + return; + } + _LIBUNWIND_ABORT("unsupported sparc register"); +} + +inline bool Registers_sparc::validFloatRegister(int) const { return false; } + +inline double Registers_sparc::getFloatRegister(int) const { + _LIBUNWIND_ABORT("no Sparc float registers"); +} + +inline void Registers_sparc::setFloatRegister(int, double) { + _LIBUNWIND_ABORT("no Sparc float registers"); +} + +inline bool Registers_sparc::validVectorRegister(int) const { return false; } + +inline v128 Registers_sparc::getVectorRegister(int) const { + _LIBUNWIND_ABORT("no Sparc vector registers"); +} + +inline void Registers_sparc::setVectorRegister(int, v128) { + _LIBUNWIND_ABORT("no Sparc vector registers"); +} + +inline const char *Registers_sparc::getRegisterName(int regNum) { + switch (regNum) { + case UNW_REG_IP: + return "pc"; + case UNW_SPARC_G0: + return "g0"; + case UNW_SPARC_G1: + return "g1"; + case UNW_SPARC_G2: + return "g2"; + case UNW_SPARC_G3: + return "g3"; + case UNW_SPARC_G4: + return "g4"; + case UNW_SPARC_G5: + return "g5"; + case UNW_SPARC_G6: + return "g6"; + case UNW_SPARC_G7: + return "g7"; + case UNW_SPARC_O0: + return "o0"; + case UNW_SPARC_O1: + return "o1"; + case UNW_SPARC_O2: + return "o2"; + case UNW_SPARC_O3: + return "o3"; + case UNW_SPARC_O4: + return "o4"; + case UNW_SPARC_O5: + return "o5"; + case UNW_REG_SP: + case UNW_SPARC_O6: + return "sp"; + case UNW_SPARC_O7: + return "o7"; + case UNW_SPARC_L0: + return "l0"; + case UNW_SPARC_L1: + return "l1"; + case UNW_SPARC_L2: + return "l2"; + case UNW_SPARC_L3: + return "l3"; + case UNW_SPARC_L4: + return "l4"; + case UNW_SPARC_L5: + return "l5"; + case UNW_SPARC_L6: + return "l6"; + case UNW_SPARC_L7: + return "l7"; + case UNW_SPARC_I0: + return "i0"; + case UNW_SPARC_I1: + return "i1"; + case UNW_SPARC_I2: + return "i2"; + case UNW_SPARC_I3: + return "i3"; + case UNW_SPARC_I4: + return "i4"; + case UNW_SPARC_I5: + return "i5"; + case UNW_SPARC_I6: + return "fp"; + case UNW_SPARC_I7: + return "i7"; + default: + return "unknown register"; + } +} +#endif // _LIBUNWIND_TARGET_SPARC + +} // namespace libunwind + +#endif // __REGISTERS_HPP__ diff --git a/src/coreclr/src/nativeaot/libunwind/src/Unwind-EHABI.cpp b/src/coreclr/src/nativeaot/libunwind/src/Unwind-EHABI.cpp new file mode 100644 index 0000000000000..3e0bb0c9de825 --- /dev/null +++ b/src/coreclr/src/nativeaot/libunwind/src/Unwind-EHABI.cpp @@ -0,0 +1,992 @@ +//===--------------------------- Unwind-EHABI.cpp -------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// +// Implements ARM zero-cost C++ exceptions +// +//===----------------------------------------------------------------------===// + +#include "Unwind-EHABI.h" + +#if defined(_LIBUNWIND_ARM_EHABI) + +#include +#include +#include +#include +#include +#include + +#include "config.h" +#include "libunwind.h" +#include "libunwind_ext.h" +#include "unwind.h" + +namespace { + +// Strange order: take words in order, but inside word, take from most to least +// signinficant byte. +uint8_t getByte(const uint32_t* data, size_t offset) { + const uint8_t* byteData = reinterpret_cast(data); + return byteData[(offset & ~(size_t)0x03) + (3 - (offset & (size_t)0x03))]; +} + +const char* getNextWord(const char* data, uint32_t* out) { + *out = *reinterpret_cast(data); + return data + 4; +} + +const char* getNextNibble(const char* data, uint32_t* out) { + *out = *reinterpret_cast(data); + return data + 2; +} + +struct Descriptor { + // See # 9.2 + typedef enum { + SU16 = 0, // Short descriptor, 16-bit entries + LU16 = 1, // Long descriptor, 16-bit entries + LU32 = 3, // Long descriptor, 32-bit entries + RESERVED0 = 4, RESERVED1 = 5, RESERVED2 = 6, RESERVED3 = 7, + RESERVED4 = 8, RESERVED5 = 9, RESERVED6 = 10, RESERVED7 = 11, + RESERVED8 = 12, RESERVED9 = 13, RESERVED10 = 14, RESERVED11 = 15 + } Format; + + // See # 9.2 + typedef enum { + CLEANUP = 0x0, + FUNC = 0x1, + CATCH = 0x2, + INVALID = 0x4 + } Kind; +}; + +_Unwind_Reason_Code ProcessDescriptors( + _Unwind_State state, + _Unwind_Control_Block* ucbp, + struct _Unwind_Context* context, + Descriptor::Format format, + const char* descriptorStart, + uint32_t flags) { + + // EHT is inlined in the index using compact form. No descriptors. #5 + if (flags & 0x1) + return _URC_CONTINUE_UNWIND; + + // TODO: We should check the state here, and determine whether we need to + // perform phase1 or phase2 unwinding. + (void)state; + + const char* descriptor = descriptorStart; + uint32_t descriptorWord; + getNextWord(descriptor, &descriptorWord); + while (descriptorWord) { + // Read descriptor based on # 9.2. + uint32_t length; + uint32_t offset; + switch (format) { + case Descriptor::LU32: + descriptor = getNextWord(descriptor, &length); + descriptor = getNextWord(descriptor, &offset); + case Descriptor::LU16: + descriptor = getNextNibble(descriptor, &length); + descriptor = getNextNibble(descriptor, &offset); + default: + assert(false); + return _URC_FAILURE; + } + + // See # 9.2 table for decoding the kind of descriptor. It's a 2-bit value. + Descriptor::Kind kind = + static_cast((length & 0x1) | ((offset & 0x1) << 1)); + + // Clear off flag from last bit. + length &= ~1u; + offset &= ~1u; + uintptr_t scopeStart = ucbp->pr_cache.fnstart + offset; + uintptr_t scopeEnd = scopeStart + length; + uintptr_t pc = _Unwind_GetIP(context); + bool isInScope = (scopeStart <= pc) && (pc < scopeEnd); + + switch (kind) { + case Descriptor::CLEANUP: { + // TODO(ajwong): Handle cleanup descriptors. + break; + } + case Descriptor::FUNC: { + // TODO(ajwong): Handle function descriptors. + break; + } + case Descriptor::CATCH: { + // Catch descriptors require gobbling one more word. + uint32_t landing_pad; + descriptor = getNextWord(descriptor, &landing_pad); + + if (isInScope) { + // TODO(ajwong): This is only phase1 compatible logic. Implement + // phase2. + landing_pad = signExtendPrel31(landing_pad & ~0x80000000); + if (landing_pad == 0xffffffff) { + return _URC_HANDLER_FOUND; + } else if (landing_pad == 0xfffffffe) { + return _URC_FAILURE; + } else { + /* + bool is_reference_type = landing_pad & 0x80000000; + void* matched_object; + if (__cxxabiv1::__cxa_type_match( + ucbp, reinterpret_cast(landing_pad), + is_reference_type, + &matched_object) != __cxxabiv1::ctm_failed) + return _URC_HANDLER_FOUND; + */ + _LIBUNWIND_ABORT("Type matching not implemented"); + } + } + break; + } + default: + _LIBUNWIND_ABORT("Invalid descriptor kind found."); + } + + getNextWord(descriptor, &descriptorWord); + } + + return _URC_CONTINUE_UNWIND; +} + +static _Unwind_Reason_Code unwindOneFrame(_Unwind_State state, + _Unwind_Control_Block* ucbp, + struct _Unwind_Context* context) { + // Read the compact model EHT entry's header # 6.3 + const uint32_t* unwindingData = ucbp->pr_cache.ehtp; + assert((*unwindingData & 0xf0000000) == 0x80000000 && "Must be a compact entry"); + Descriptor::Format format = + static_cast((*unwindingData & 0x0f000000) >> 24); + + const char *lsda = + reinterpret_cast(_Unwind_GetLanguageSpecificData(context)); + + // Handle descriptors before unwinding so they are processed in the context + // of the correct stack frame. + _Unwind_Reason_Code result = + ProcessDescriptors(state, ucbp, context, format, lsda, + ucbp->pr_cache.additional); + + if (result != _URC_CONTINUE_UNWIND) + return result; + + if (__unw_step(reinterpret_cast(context)) != UNW_STEP_SUCCESS) + return _URC_FAILURE; + return _URC_CONTINUE_UNWIND; +} + +// Generates mask discriminator for _Unwind_VRS_Pop, e.g. for _UVRSC_CORE / +// _UVRSD_UINT32. +uint32_t RegisterMask(uint8_t start, uint8_t count_minus_one) { + return ((1U << (count_minus_one + 1)) - 1) << start; +} + +// Generates mask discriminator for _Unwind_VRS_Pop, e.g. for _UVRSC_VFP / +// _UVRSD_DOUBLE. +uint32_t RegisterRange(uint8_t start, uint8_t count_minus_one) { + return ((uint32_t)start << 16) | ((uint32_t)count_minus_one + 1); +} + +} // end anonymous namespace + +/** + * Decodes an EHT entry. + * + * @param data Pointer to EHT. + * @param[out] off Offset from return value (in bytes) to begin interpretation. + * @param[out] len Number of bytes in unwind code. + * @return Pointer to beginning of unwind code. + */ +extern "C" const uint32_t* +decode_eht_entry(const uint32_t* data, size_t* off, size_t* len) { + if ((*data & 0x80000000) == 0) { + // 6.2: Generic Model + // + // EHT entry is a prel31 pointing to the PR, followed by data understood + // only by the personality routine. Fortunately, all existing assembler + // implementations, including GNU assembler, LLVM integrated assembler, + // and ARM assembler, assume that the unwind opcodes come after the + // personality routine address. + *off = 1; // First byte is size data. + *len = (((data[1] >> 24) & 0xff) + 1) * 4; + data++; // Skip the first word, which is the prel31 offset. + } else { + // 6.3: ARM Compact Model + // + // EHT entries here correspond to the __aeabi_unwind_cpp_pr[012] PRs indeded + // by format: + Descriptor::Format format = + static_cast((*data & 0x0f000000) >> 24); + switch (format) { + case Descriptor::SU16: + *len = 4; + *off = 1; + break; + case Descriptor::LU16: + case Descriptor::LU32: + *len = 4 + 4 * ((*data & 0x00ff0000) >> 16); + *off = 2; + break; + default: + return nullptr; + } + } + return data; +} + +_LIBUNWIND_EXPORT _Unwind_Reason_Code +_Unwind_VRS_Interpret(_Unwind_Context *context, const uint32_t *data, + size_t offset, size_t len) { + bool wrotePC = false; + bool finish = false; + while (offset < len && !finish) { + uint8_t byte = getByte(data, offset++); + if ((byte & 0x80) == 0) { + uint32_t sp; + _Unwind_VRS_Get(context, _UVRSC_CORE, UNW_ARM_SP, _UVRSD_UINT32, &sp); + if (byte & 0x40) + sp -= (((uint32_t)byte & 0x3f) << 2) + 4; + else + sp += ((uint32_t)byte << 2) + 4; + _Unwind_VRS_Set(context, _UVRSC_CORE, UNW_ARM_SP, _UVRSD_UINT32, &sp, NULL); + } else { + switch (byte & 0xf0) { + case 0x80: { + if (offset >= len) + return _URC_FAILURE; + uint32_t registers = + (((uint32_t)byte & 0x0f) << 12) | + (((uint32_t)getByte(data, offset++)) << 4); + if (!registers) + return _URC_FAILURE; + if (registers & (1 << 15)) + wrotePC = true; + _Unwind_VRS_Pop(context, _UVRSC_CORE, registers, _UVRSD_UINT32); + break; + } + case 0x90: { + uint8_t reg = byte & 0x0f; + if (reg == 13 || reg == 15) + return _URC_FAILURE; + uint32_t sp; + _Unwind_VRS_Get(context, _UVRSC_CORE, UNW_ARM_R0 + reg, + _UVRSD_UINT32, &sp); + _Unwind_VRS_Set(context, _UVRSC_CORE, UNW_ARM_SP, _UVRSD_UINT32, + &sp, NULL); + break; + } + case 0xa0: { + uint32_t registers = RegisterMask(4, byte & 0x07); + if (byte & 0x08) + registers |= 1 << 14; + _Unwind_VRS_Pop(context, _UVRSC_CORE, registers, _UVRSD_UINT32); + break; + } + case 0xb0: { + switch (byte) { + case 0xb0: + finish = true; + break; + case 0xb1: { + if (offset >= len) + return _URC_FAILURE; + uint8_t registers = getByte(data, offset++); + if (registers & 0xf0 || !registers) + return _URC_FAILURE; + _Unwind_VRS_Pop(context, _UVRSC_CORE, registers, _UVRSD_UINT32); + break; + } + case 0xb2: { + uint32_t addend = 0; + uint32_t shift = 0; + // This decodes a uleb128 value. + while (true) { + if (offset >= len) + return _URC_FAILURE; + uint32_t v = getByte(data, offset++); + addend |= (v & 0x7f) << shift; + if ((v & 0x80) == 0) + break; + shift += 7; + } + uint32_t sp; + _Unwind_VRS_Get(context, _UVRSC_CORE, UNW_ARM_SP, _UVRSD_UINT32, + &sp); + sp += 0x204 + (addend << 2); + _Unwind_VRS_Set(context, _UVRSC_CORE, UNW_ARM_SP, _UVRSD_UINT32, + &sp, NULL); + break; + } + case 0xb3: { + uint8_t v = getByte(data, offset++); + _Unwind_VRS_Pop(context, _UVRSC_VFP, + RegisterRange(static_cast(v >> 4), + v & 0x0f), _UVRSD_VFPX); + break; + } + case 0xb4: + case 0xb5: + case 0xb6: + case 0xb7: + return _URC_FAILURE; + default: + _Unwind_VRS_Pop(context, _UVRSC_VFP, + RegisterRange(8, byte & 0x07), _UVRSD_VFPX); + break; + } + break; + } + case 0xc0: { + switch (byte) { +#if defined(__ARM_WMMX) + case 0xc0: + case 0xc1: + case 0xc2: + case 0xc3: + case 0xc4: + case 0xc5: + _Unwind_VRS_Pop(context, _UVRSC_WMMXD, + RegisterRange(10, byte & 0x7), _UVRSD_DOUBLE); + break; + case 0xc6: { + uint8_t v = getByte(data, offset++); + uint8_t start = static_cast(v >> 4); + uint8_t count_minus_one = v & 0xf; + if (start + count_minus_one >= 16) + return _URC_FAILURE; + _Unwind_VRS_Pop(context, _UVRSC_WMMXD, + RegisterRange(start, count_minus_one), + _UVRSD_DOUBLE); + break; + } + case 0xc7: { + uint8_t v = getByte(data, offset++); + if (!v || v & 0xf0) + return _URC_FAILURE; + _Unwind_VRS_Pop(context, _UVRSC_WMMXC, v, _UVRSD_DOUBLE); + break; + } +#endif + case 0xc8: + case 0xc9: { + uint8_t v = getByte(data, offset++); + uint8_t start = + static_cast(((byte == 0xc8) ? 16 : 0) + (v >> 4)); + uint8_t count_minus_one = v & 0xf; + if (start + count_minus_one >= 32) + return _URC_FAILURE; + _Unwind_VRS_Pop(context, _UVRSC_VFP, + RegisterRange(start, count_minus_one), + _UVRSD_DOUBLE); + break; + } + default: + return _URC_FAILURE; + } + break; + } + case 0xd0: { + if (byte & 0x08) + return _URC_FAILURE; + _Unwind_VRS_Pop(context, _UVRSC_VFP, RegisterRange(8, byte & 0x7), + _UVRSD_DOUBLE); + break; + } + default: + return _URC_FAILURE; + } + } + } + if (!wrotePC) { + uint32_t lr; + _Unwind_VRS_Get(context, _UVRSC_CORE, UNW_ARM_LR, _UVRSD_UINT32, &lr); + _Unwind_VRS_Set(context, _UVRSC_CORE, UNW_ARM_IP, _UVRSD_UINT32, &lr, NULL); + } + return _URC_CONTINUE_UNWIND; +} + +extern "C" _LIBUNWIND_EXPORT _Unwind_Reason_Code +__aeabi_unwind_cpp_pr0(_Unwind_State state, _Unwind_Control_Block *ucbp, + _Unwind_Context *context) { + return unwindOneFrame(state, ucbp, context); +} + +extern "C" _LIBUNWIND_EXPORT _Unwind_Reason_Code +__aeabi_unwind_cpp_pr1(_Unwind_State state, _Unwind_Control_Block *ucbp, + _Unwind_Context *context) { + return unwindOneFrame(state, ucbp, context); +} + +extern "C" _LIBUNWIND_EXPORT _Unwind_Reason_Code +__aeabi_unwind_cpp_pr2(_Unwind_State state, _Unwind_Control_Block *ucbp, + _Unwind_Context *context) { + return unwindOneFrame(state, ucbp, context); +} + +static _Unwind_Reason_Code +unwind_phase1(unw_context_t *uc, unw_cursor_t *cursor, _Unwind_Exception *exception_object) { + // EHABI #7.3 discusses preserving the VRS in a "temporary VRS" during + // phase 1 and then restoring it to the "primary VRS" for phase 2. The + // effect is phase 2 doesn't see any of the VRS manipulations from phase 1. + // In this implementation, the phases don't share the VRS backing store. + // Instead, they are passed the original |uc| and they create a new VRS + // from scratch thus achieving the same effect. + __unw_init_local(cursor, uc); + + // Walk each frame looking for a place to stop. + for (bool handlerNotFound = true; handlerNotFound;) { + + // See if frame has code to run (has personality routine). + unw_proc_info_t frameInfo; + if (__unw_get_proc_info(cursor, &frameInfo) != UNW_ESUCCESS) { + _LIBUNWIND_TRACE_UNWINDING( + "unwind_phase1(ex_ojb=%p): __unw_get_proc_info " + "failed => _URC_FATAL_PHASE1_ERROR", + static_cast(exception_object)); + return _URC_FATAL_PHASE1_ERROR; + } + + // When tracing, print state information. + if (_LIBUNWIND_TRACING_UNWINDING) { + char functionBuf[512]; + const char *functionName = functionBuf; + unw_word_t offset; + if ((__unw_get_proc_name(cursor, functionBuf, sizeof(functionBuf), + &offset) != UNW_ESUCCESS) || + (frameInfo.start_ip + offset > frameInfo.end_ip)) + functionName = ".anonymous."; + unw_word_t pc; + __unw_get_reg(cursor, UNW_REG_IP, &pc); + _LIBUNWIND_TRACE_UNWINDING( + "unwind_phase1(ex_ojb=%p): pc=0x%" PRIxPTR ", start_ip=0x%" PRIxPTR ", func=%s, " + "lsda=0x%" PRIxPTR ", personality=0x%" PRIxPTR, + static_cast(exception_object), pc, + frameInfo.start_ip, functionName, + frameInfo.lsda, frameInfo.handler); + } + + // If there is a personality routine, ask it if it will want to stop at + // this frame. + if (frameInfo.handler != 0) { + __personality_routine p = + (__personality_routine)(long)(frameInfo.handler); + _LIBUNWIND_TRACE_UNWINDING( + "unwind_phase1(ex_ojb=%p): calling personality function %p", + static_cast(exception_object), + reinterpret_cast(reinterpret_cast(p))); + struct _Unwind_Context *context = (struct _Unwind_Context *)(cursor); + exception_object->pr_cache.fnstart = frameInfo.start_ip; + exception_object->pr_cache.ehtp = + (_Unwind_EHT_Header *)frameInfo.unwind_info; + exception_object->pr_cache.additional = frameInfo.flags; + _Unwind_Reason_Code personalityResult = + (*p)(_US_VIRTUAL_UNWIND_FRAME, exception_object, context); + _LIBUNWIND_TRACE_UNWINDING( + "unwind_phase1(ex_ojb=%p): personality result %d start_ip %x ehtp %p " + "additional %x", + static_cast(exception_object), personalityResult, + exception_object->pr_cache.fnstart, + static_cast(exception_object->pr_cache.ehtp), + exception_object->pr_cache.additional); + switch (personalityResult) { + case _URC_HANDLER_FOUND: + // found a catch clause or locals that need destructing in this frame + // stop search and remember stack pointer at the frame + handlerNotFound = false; + // p should have initialized barrier_cache. EHABI #7.3.5 + _LIBUNWIND_TRACE_UNWINDING( + "unwind_phase1(ex_ojb=%p): _URC_HANDLER_FOUND", + static_cast(exception_object)); + return _URC_NO_REASON; + + case _URC_CONTINUE_UNWIND: + _LIBUNWIND_TRACE_UNWINDING( + "unwind_phase1(ex_ojb=%p): _URC_CONTINUE_UNWIND", + static_cast(exception_object)); + // continue unwinding + break; + + // EHABI #7.3.3 + case _URC_FAILURE: + return _URC_FAILURE; + + default: + // something went wrong + _LIBUNWIND_TRACE_UNWINDING( + "unwind_phase1(ex_ojb=%p): _URC_FATAL_PHASE1_ERROR", + static_cast(exception_object)); + return _URC_FATAL_PHASE1_ERROR; + } + } + } + return _URC_NO_REASON; +} + +static _Unwind_Reason_Code unwind_phase2(unw_context_t *uc, unw_cursor_t *cursor, + _Unwind_Exception *exception_object, + bool resume) { + // See comment at the start of unwind_phase1 regarding VRS integrity. + __unw_init_local(cursor, uc); + + _LIBUNWIND_TRACE_UNWINDING("unwind_phase2(ex_ojb=%p)", + static_cast(exception_object)); + int frame_count = 0; + + // Walk each frame until we reach where search phase said to stop. + while (true) { + // Ask libunwind to get next frame (skip over first which is + // _Unwind_RaiseException or _Unwind_Resume). + // + // Resume only ever makes sense for 1 frame. + _Unwind_State state = + resume ? _US_UNWIND_FRAME_RESUME : _US_UNWIND_FRAME_STARTING; + if (resume && frame_count == 1) { + // On a resume, first unwind the _Unwind_Resume() frame. The next frame + // is now the landing pad for the cleanup from a previous execution of + // phase2. To continue unwindingly correctly, replace VRS[15] with the + // IP of the frame that the previous run of phase2 installed the context + // for. After this, continue unwinding as if normal. + // + // See #7.4.6 for details. + __unw_set_reg(cursor, UNW_REG_IP, + exception_object->unwinder_cache.reserved2, NULL); + resume = false; + } + + // Get info about this frame. + unw_word_t sp; + unw_proc_info_t frameInfo; + __unw_get_reg(cursor, UNW_REG_SP, &sp); + if (__unw_get_proc_info(cursor, &frameInfo) != UNW_ESUCCESS) { + _LIBUNWIND_TRACE_UNWINDING( + "unwind_phase2(ex_ojb=%p): __unw_get_proc_info " + "failed => _URC_FATAL_PHASE2_ERROR", + static_cast(exception_object)); + return _URC_FATAL_PHASE2_ERROR; + } + + // When tracing, print state information. + if (_LIBUNWIND_TRACING_UNWINDING) { + char functionBuf[512]; + const char *functionName = functionBuf; + unw_word_t offset; + if ((__unw_get_proc_name(cursor, functionBuf, sizeof(functionBuf), + &offset) != UNW_ESUCCESS) || + (frameInfo.start_ip + offset > frameInfo.end_ip)) + functionName = ".anonymous."; + _LIBUNWIND_TRACE_UNWINDING( + "unwind_phase2(ex_ojb=%p): start_ip=0x%" PRIxPTR ", func=%s, sp=0x%" PRIxPTR ", " + "lsda=0x%" PRIxPTR ", personality=0x%" PRIxPTR "", + static_cast(exception_object), frameInfo.start_ip, + functionName, sp, frameInfo.lsda, + frameInfo.handler); + } + + // If there is a personality routine, tell it we are unwinding. + if (frameInfo.handler != 0) { + __personality_routine p = + (__personality_routine)(long)(frameInfo.handler); + struct _Unwind_Context *context = (struct _Unwind_Context *)(cursor); + // EHABI #7.2 + exception_object->pr_cache.fnstart = frameInfo.start_ip; + exception_object->pr_cache.ehtp = + (_Unwind_EHT_Header *)frameInfo.unwind_info; + exception_object->pr_cache.additional = frameInfo.flags; + _Unwind_Reason_Code personalityResult = + (*p)(state, exception_object, context); + switch (personalityResult) { + case _URC_CONTINUE_UNWIND: + // Continue unwinding + _LIBUNWIND_TRACE_UNWINDING( + "unwind_phase2(ex_ojb=%p): _URC_CONTINUE_UNWIND", + static_cast(exception_object)); + // EHABI #7.2 + if (sp == exception_object->barrier_cache.sp) { + // Phase 1 said we would stop at this frame, but we did not... + _LIBUNWIND_ABORT("during phase1 personality function said it would " + "stop here, but now in phase2 it did not stop here"); + } + break; + case _URC_INSTALL_CONTEXT: + _LIBUNWIND_TRACE_UNWINDING( + "unwind_phase2(ex_ojb=%p): _URC_INSTALL_CONTEXT", + static_cast(exception_object)); + // Personality routine says to transfer control to landing pad. + // We may get control back if landing pad calls _Unwind_Resume(). + if (_LIBUNWIND_TRACING_UNWINDING) { + unw_word_t pc; + __unw_get_reg(cursor, UNW_REG_IP, &pc); + __unw_get_reg(cursor, UNW_REG_SP, &sp); + _LIBUNWIND_TRACE_UNWINDING("unwind_phase2(ex_ojb=%p): re-entering " + "user code with ip=0x%" PRIxPTR ", sp=0x%" PRIxPTR, + static_cast(exception_object), + pc, sp); + } + + { + // EHABI #7.4.1 says we need to preserve pc for when _Unwind_Resume + // is called back, to find this same frame. + unw_word_t pc; + __unw_get_reg(cursor, UNW_REG_IP, &pc); + exception_object->unwinder_cache.reserved2 = (uint32_t)pc; + } + __unw_resume(cursor); + // __unw_resume() only returns if there was an error. + return _URC_FATAL_PHASE2_ERROR; + + // # EHABI #7.4.3 + case _URC_FAILURE: + abort(); + + default: + // Personality routine returned an unknown result code. + _LIBUNWIND_DEBUG_LOG("personality function returned unknown result %d", + personalityResult); + return _URC_FATAL_PHASE2_ERROR; + } + } + frame_count++; + } + + // Clean up phase did not resume at the frame that the search phase + // said it would... + return _URC_FATAL_PHASE2_ERROR; +} + +/// Called by __cxa_throw. Only returns if there is a fatal error. +_LIBUNWIND_EXPORT _Unwind_Reason_Code +_Unwind_RaiseException(_Unwind_Exception *exception_object) { + _LIBUNWIND_TRACE_API("_Unwind_RaiseException(ex_obj=%p)", + static_cast(exception_object)); + unw_context_t uc; + unw_cursor_t cursor; + __unw_getcontext(&uc); + + // This field for is for compatibility with GCC to say this isn't a forced + // unwind. EHABI #7.2 + exception_object->unwinder_cache.reserved1 = 0; + + // phase 1: the search phase + _Unwind_Reason_Code phase1 = unwind_phase1(&uc, &cursor, exception_object); + if (phase1 != _URC_NO_REASON) + return phase1; + + // phase 2: the clean up phase + return unwind_phase2(&uc, &cursor, exception_object, false); +} + +_LIBUNWIND_EXPORT void _Unwind_Complete(_Unwind_Exception* exception_object) { + // This is to be called when exception handling completes to give us a chance + // to perform any housekeeping. EHABI #7.2. But we have nothing to do here. + (void)exception_object; +} + +/// When _Unwind_RaiseException() is in phase2, it hands control +/// to the personality function at each frame. The personality +/// may force a jump to a landing pad in that function, the landing +/// pad code may then call _Unwind_Resume() to continue with the +/// unwinding. Note: the call to _Unwind_Resume() is from compiler +/// generated user code. All other _Unwind_* routines are called +/// by the C++ runtime __cxa_* routines. +/// +/// Note: re-throwing an exception (as opposed to continuing the unwind) +/// is implemented by having the code call __cxa_rethrow() which +/// in turn calls _Unwind_Resume_or_Rethrow(). +_LIBUNWIND_EXPORT void +_Unwind_Resume(_Unwind_Exception *exception_object) { + _LIBUNWIND_TRACE_API("_Unwind_Resume(ex_obj=%p)", + static_cast(exception_object)); + unw_context_t uc; + unw_cursor_t cursor; + __unw_getcontext(&uc); + + // _Unwind_RaiseException on EHABI will always set the reserved1 field to 0, + // which is in the same position as private_1 below. + // TODO(ajwong): Who wronte the above? Why is it true? + unwind_phase2(&uc, &cursor, exception_object, true); + + // Clients assume _Unwind_Resume() does not return, so all we can do is abort. + _LIBUNWIND_ABORT("_Unwind_Resume() can't return"); +} + +/// Called by personality handler during phase 2 to get LSDA for current frame. +_LIBUNWIND_EXPORT uintptr_t +_Unwind_GetLanguageSpecificData(struct _Unwind_Context *context) { + unw_cursor_t *cursor = (unw_cursor_t *)context; + unw_proc_info_t frameInfo; + uintptr_t result = 0; + if (__unw_get_proc_info(cursor, &frameInfo) == UNW_ESUCCESS) + result = (uintptr_t)frameInfo.lsda; + _LIBUNWIND_TRACE_API( + "_Unwind_GetLanguageSpecificData(context=%p) => 0x%llx", + static_cast(context), (long long)result); + return result; +} + +static uint64_t ValueAsBitPattern(_Unwind_VRS_DataRepresentation representation, + void* valuep) { + uint64_t value = 0; + switch (representation) { + case _UVRSD_UINT32: + case _UVRSD_FLOAT: + memcpy(&value, valuep, sizeof(uint32_t)); + break; + + case _UVRSD_VFPX: + case _UVRSD_UINT64: + case _UVRSD_DOUBLE: + memcpy(&value, valuep, sizeof(uint64_t)); + break; + } + return value; +} + +_LIBUNWIND_EXPORT _Unwind_VRS_Result +_Unwind_VRS_Set(_Unwind_Context *context, _Unwind_VRS_RegClass regclass, + uint32_t regno, _Unwind_VRS_DataRepresentation representation, + void *valuep, unw_word_t *pos) { + _LIBUNWIND_TRACE_API("_Unwind_VRS_Set(context=%p, regclass=%d, reg=%d, " + "rep=%d, value=0x%llX)", + static_cast(context), regclass, regno, + representation, + ValueAsBitPattern(representation, valuep)); + unw_cursor_t *cursor = (unw_cursor_t *)context; + switch (regclass) { + case _UVRSC_CORE: + if (representation != _UVRSD_UINT32 || regno > 15) + return _UVRSR_FAILED; + return __unw_set_reg(cursor, (unw_regnum_t)(UNW_ARM_R0 + regno), + *(unw_word_t *)valuep,(unw_word_t *)pos) == UNW_ESUCCESS + ? _UVRSR_OK + : _UVRSR_FAILED; + case _UVRSC_VFP: + if (representation != _UVRSD_VFPX && representation != _UVRSD_DOUBLE) + return _UVRSR_FAILED; + if (representation == _UVRSD_VFPX) { + // Can only touch d0-15 with FSTMFDX. + if (regno > 15) + return _UVRSR_FAILED; + __unw_save_vfp_as_X(cursor); + } else { + if (regno > 31) + return _UVRSR_FAILED; + } + return __unw_set_fpreg(cursor, (unw_regnum_t)(UNW_ARM_D0 + regno), + *(unw_fpreg_t *)valuep) == UNW_ESUCCESS + ? _UVRSR_OK + : _UVRSR_FAILED; +#if defined(__ARM_WMMX) + case _UVRSC_WMMXC: + if (representation != _UVRSD_UINT32 || regno > 3) + return _UVRSR_FAILED; + return __unw_set_reg(cursor, (unw_regnum_t)(UNW_ARM_WC0 + regno), + *(unw_word_t *)valuep) == UNW_ESUCCESS + ? _UVRSR_OK + : _UVRSR_FAILED; + case _UVRSC_WMMXD: + if (representation != _UVRSD_DOUBLE || regno > 31) + return _UVRSR_FAILED; + return __unw_set_fpreg(cursor, (unw_regnum_t)(UNW_ARM_WR0 + regno), + *(unw_fpreg_t *)valuep) == UNW_ESUCCESS + ? _UVRSR_OK + : _UVRSR_FAILED; +#else + case _UVRSC_WMMXC: + case _UVRSC_WMMXD: + break; +#endif + } + _LIBUNWIND_ABORT("unsupported register class"); +} + +static _Unwind_VRS_Result +_Unwind_VRS_Get_Internal(_Unwind_Context *context, + _Unwind_VRS_RegClass regclass, uint32_t regno, + _Unwind_VRS_DataRepresentation representation, + void *valuep) { + unw_cursor_t *cursor = (unw_cursor_t *)context; + switch (regclass) { + case _UVRSC_CORE: + if (representation != _UVRSD_UINT32 || regno > 15) + return _UVRSR_FAILED; + return __unw_get_reg(cursor, (unw_regnum_t)(UNW_ARM_R0 + regno), + (unw_word_t *)valuep) == UNW_ESUCCESS + ? _UVRSR_OK + : _UVRSR_FAILED; + case _UVRSC_VFP: + if (representation != _UVRSD_VFPX && representation != _UVRSD_DOUBLE) + return _UVRSR_FAILED; + if (representation == _UVRSD_VFPX) { + // Can only touch d0-15 with FSTMFDX. + if (regno > 15) + return _UVRSR_FAILED; + __unw_save_vfp_as_X(cursor); + } else { + if (regno > 31) + return _UVRSR_FAILED; + } + return __unw_get_fpreg(cursor, (unw_regnum_t)(UNW_ARM_D0 + regno), + (unw_fpreg_t *)valuep) == UNW_ESUCCESS + ? _UVRSR_OK + : _UVRSR_FAILED; +#if defined(__ARM_WMMX) + case _UVRSC_WMMXC: + if (representation != _UVRSD_UINT32 || regno > 3) + return _UVRSR_FAILED; + return __unw_get_reg(cursor, (unw_regnum_t)(UNW_ARM_WC0 + regno), + (unw_word_t *)valuep) == UNW_ESUCCESS + ? _UVRSR_OK + : _UVRSR_FAILED; + case _UVRSC_WMMXD: + if (representation != _UVRSD_DOUBLE || regno > 31) + return _UVRSR_FAILED; + return __unw_get_fpreg(cursor, (unw_regnum_t)(UNW_ARM_WR0 + regno), + (unw_fpreg_t *)valuep) == UNW_ESUCCESS + ? _UVRSR_OK + : _UVRSR_FAILED; +#else + case _UVRSC_WMMXC: + case _UVRSC_WMMXD: + break; +#endif + } + _LIBUNWIND_ABORT("unsupported register class"); +} + +_LIBUNWIND_EXPORT _Unwind_VRS_Result +_Unwind_VRS_Get(_Unwind_Context *context, _Unwind_VRS_RegClass regclass, + uint32_t regno, _Unwind_VRS_DataRepresentation representation, + void *valuep) { + _Unwind_VRS_Result result = + _Unwind_VRS_Get_Internal(context, regclass, regno, representation, + valuep); + _LIBUNWIND_TRACE_API("_Unwind_VRS_Get(context=%p, regclass=%d, reg=%d, " + "rep=%d, value=0x%llX, result = %d)", + static_cast(context), regclass, regno, + representation, + ValueAsBitPattern(representation, valuep), result); + return result; +} + +_Unwind_VRS_Result +_Unwind_VRS_Pop(_Unwind_Context *context, _Unwind_VRS_RegClass regclass, + uint32_t discriminator, + _Unwind_VRS_DataRepresentation representation) { + _LIBUNWIND_TRACE_API("_Unwind_VRS_Pop(context=%p, regclass=%d, " + "discriminator=%d, representation=%d)", + static_cast(context), regclass, discriminator, + representation); + switch (regclass) { + case _UVRSC_WMMXC: +#if !defined(__ARM_WMMX) + break; +#endif + case _UVRSC_CORE: { + if (representation != _UVRSD_UINT32) + return _UVRSR_FAILED; + // When popping SP from the stack, we don't want to override it from the + // computed new stack location. See EHABI #7.5.4 table 3. + bool poppedSP = false; + uint32_t* sp; + uint32_t* pos; + if (_Unwind_VRS_Get(context, _UVRSC_CORE, UNW_ARM_SP, + _UVRSD_UINT32, &sp) != _UVRSR_OK) { + return _UVRSR_FAILED; + } + for (uint32_t i = 0; i < 16; ++i) { + if (!(discriminator & static_cast(1 << i))) + continue; + pos = sp; + uint32_t value = *sp++; + if (regclass == _UVRSC_CORE && i == 13) + poppedSP = true; + if (_Unwind_VRS_Set(context, regclass, i, + _UVRSD_UINT32, &value, pos) != _UVRSR_OK) { + return _UVRSR_FAILED; + } + } + if (!poppedSP) { + return _Unwind_VRS_Set(context, _UVRSC_CORE, UNW_ARM_SP, + _UVRSD_UINT32, &sp, NULL); + } + return _UVRSR_OK; + } + case _UVRSC_WMMXD: +#if !defined(__ARM_WMMX) + break; +#endif + case _UVRSC_VFP: { + if (representation != _UVRSD_VFPX && representation != _UVRSD_DOUBLE) + return _UVRSR_FAILED; + uint32_t first = discriminator >> 16; + uint32_t count = discriminator & 0xffff; + uint32_t end = first+count; + uint32_t* sp; + if (_Unwind_VRS_Get(context, _UVRSC_CORE, UNW_ARM_SP, + _UVRSD_UINT32, &sp) != _UVRSR_OK) { + return _UVRSR_FAILED; + } + // For _UVRSD_VFPX, we're assuming the data is stored in FSTMX "standard + // format 1", which is equivalent to FSTMD + a padding word. + for (uint32_t i = first; i < end; ++i) { + // SP is only 32-bit aligned so don't copy 64-bit at a time. + uint64_t value = *sp++; + value |= ((uint64_t)(*sp++)) << 32; + if (_Unwind_VRS_Set(context, regclass, i, representation, &value, NULL) != + _UVRSR_OK) + return _UVRSR_FAILED; + } + if (representation == _UVRSD_VFPX) + ++sp; + return _Unwind_VRS_Set(context, _UVRSC_CORE, UNW_ARM_SP, _UVRSD_UINT32, + &sp, NULL); + } + } + _LIBUNWIND_ABORT("unsupported register class"); +} + +/// Called by personality handler during phase 2 to find the start of the +/// function. +_LIBUNWIND_EXPORT uintptr_t +_Unwind_GetRegionStart(struct _Unwind_Context *context) { + unw_cursor_t *cursor = (unw_cursor_t *)context; + unw_proc_info_t frameInfo; + uintptr_t result = 0; + if (__unw_get_proc_info(cursor, &frameInfo) == UNW_ESUCCESS) + result = (uintptr_t)frameInfo.start_ip; + _LIBUNWIND_TRACE_API("_Unwind_GetRegionStart(context=%p) => 0x%llX", + static_cast(context), (long long)result); + return result; +} + + +/// Called by personality handler during phase 2 if a foreign exception +// is caught. +_LIBUNWIND_EXPORT void +_Unwind_DeleteException(_Unwind_Exception *exception_object) { + _LIBUNWIND_TRACE_API("_Unwind_DeleteException(ex_obj=%p)", + static_cast(exception_object)); + if (exception_object->exception_cleanup != NULL) + (*exception_object->exception_cleanup)(_URC_FOREIGN_EXCEPTION_CAUGHT, + exception_object); +} + +extern "C" _LIBUNWIND_EXPORT _Unwind_Reason_Code +__gnu_unwind_frame(_Unwind_Exception *exception_object, + struct _Unwind_Context *context) { + unw_cursor_t *cursor = (unw_cursor_t *)context; + if (__unw_step(cursor) != UNW_STEP_SUCCESS) + return _URC_FAILURE; + return _URC_OK; +} + +#endif // defined(_LIBUNWIND_ARM_EHABI) diff --git a/src/coreclr/src/nativeaot/libunwind/src/Unwind-EHABI.h b/src/coreclr/src/nativeaot/libunwind/src/Unwind-EHABI.h new file mode 100644 index 0000000000000..6897082a337f3 --- /dev/null +++ b/src/coreclr/src/nativeaot/libunwind/src/Unwind-EHABI.h @@ -0,0 +1,50 @@ +//===------------------------- Unwind-EHABI.hpp ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// +//===----------------------------------------------------------------------===// + +#ifndef __UNWIND_EHABI_H__ +#define __UNWIND_EHABI_H__ + +#include <__libunwind_config.h> + +#if defined(_LIBUNWIND_ARM_EHABI) + +#include +#include + +// Unable to unwind in the ARM index table (section 5 EHABI). +#define UNW_EXIDX_CANTUNWIND 0x1 + +static inline uint32_t signExtendPrel31(uint32_t data) { + return data | ((data & 0x40000000u) << 1); +} + +static inline uint32_t readPrel31(const uint32_t *data) { + return (((uint32_t)(uintptr_t)data) + signExtendPrel31(*data)); +} + +#if defined(__cplusplus) +extern "C" { +#endif + +extern _Unwind_Reason_Code __aeabi_unwind_cpp_pr0( + _Unwind_State state, _Unwind_Control_Block *ucbp, _Unwind_Context *context); + +extern _Unwind_Reason_Code __aeabi_unwind_cpp_pr1( + _Unwind_State state, _Unwind_Control_Block *ucbp, _Unwind_Context *context); + +extern _Unwind_Reason_Code __aeabi_unwind_cpp_pr2( + _Unwind_State state, _Unwind_Control_Block *ucbp, _Unwind_Context *context); + +#if defined(__cplusplus) +} // extern "C" +#endif + +#endif // defined(_LIBUNWIND_ARM_EHABI) + +#endif // __UNWIND_EHABI_H__ diff --git a/src/coreclr/src/nativeaot/libunwind/src/Unwind-seh.cpp b/src/coreclr/src/nativeaot/libunwind/src/Unwind-seh.cpp new file mode 100644 index 0000000000000..7647f2e0db0bf --- /dev/null +++ b/src/coreclr/src/nativeaot/libunwind/src/Unwind-seh.cpp @@ -0,0 +1,501 @@ +//===--------------------------- Unwind-seh.cpp ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Implements SEH-based Itanium C++ exceptions. +// +//===----------------------------------------------------------------------===// + +#include "config.h" + +#if defined(_LIBUNWIND_SUPPORT_SEH_UNWIND) + +#include + +#include +#include +#include + +#include +#include +#include +#include + +#include "libunwind_ext.h" +#include "UnwindCursor.hpp" + +using namespace libunwind; + +#define STATUS_USER_DEFINED (1u << 29) + +#define STATUS_GCC_MAGIC (('G' << 16) | ('C' << 8) | 'C') + +#define MAKE_CUSTOM_STATUS(s, c) \ + ((NTSTATUS)(((s) << 30) | STATUS_USER_DEFINED | (c))) +#define MAKE_GCC_EXCEPTION(c) \ + MAKE_CUSTOM_STATUS(STATUS_SEVERITY_SUCCESS, STATUS_GCC_MAGIC | ((c) << 24)) + +/// SEH exception raised by libunwind when the program calls +/// \c _Unwind_RaiseException. +#define STATUS_GCC_THROW MAKE_GCC_EXCEPTION(0) // 0x20474343 +/// SEH exception raised by libunwind to initiate phase 2 of exception +/// handling. +#define STATUS_GCC_UNWIND MAKE_GCC_EXCEPTION(1) // 0x21474343 + +/// Class of foreign exceptions based on unrecognized SEH exceptions. +static const uint64_t kSEHExceptionClass = 0x434C4E4753454800; // CLNGSEH\0 + +/// Exception cleanup routine used by \c _GCC_specific_handler to +/// free foreign exceptions. +static void seh_exc_cleanup(_Unwind_Reason_Code urc, _Unwind_Exception *exc) { + (void)urc; + if (exc->exception_class != kSEHExceptionClass) + _LIBUNWIND_ABORT("SEH cleanup called on non-SEH exception"); + free(exc); +} + +static int __unw_init_seh(unw_cursor_t *cursor, CONTEXT *ctx); +static DISPATCHER_CONTEXT *__unw_seh_get_disp_ctx(unw_cursor_t *cursor); +static void __unw_seh_set_disp_ctx(unw_cursor_t *cursor, + DISPATCHER_CONTEXT *disp); + +/// Common implementation of SEH-style handler functions used by Itanium- +/// style frames. Depending on how and why it was called, it may do one of: +/// a) Delegate to the given Itanium-style personality function; or +/// b) Initiate a collided unwind to halt unwinding. +_LIBUNWIND_EXPORT EXCEPTION_DISPOSITION +_GCC_specific_handler(PEXCEPTION_RECORD ms_exc, PVOID frame, PCONTEXT ms_ctx, + DISPATCHER_CONTEXT *disp, __personality_routine pers) { + unw_cursor_t cursor; + _Unwind_Exception *exc; + _Unwind_Action action; + struct _Unwind_Context *ctx = nullptr; + _Unwind_Reason_Code urc; + uintptr_t retval, target; + bool ours = false; + + _LIBUNWIND_TRACE_UNWINDING("_GCC_specific_handler(%#010lx(%lx), %p)", + ms_exc->ExceptionCode, ms_exc->ExceptionFlags, + (void *)frame); + if (ms_exc->ExceptionCode == STATUS_GCC_UNWIND) { + if (IS_TARGET_UNWIND(ms_exc->ExceptionFlags)) { + // Set up the upper return value (the lower one and the target PC + // were set in the call to RtlUnwindEx()) for the landing pad. +#ifdef __x86_64__ + disp->ContextRecord->Rdx = ms_exc->ExceptionInformation[3]; +#elif defined(__arm__) + disp->ContextRecord->R1 = ms_exc->ExceptionInformation[3]; +#elif defined(__aarch64__) + disp->ContextRecord->X1 = ms_exc->ExceptionInformation[3]; +#endif + } + // This is the collided unwind to the landing pad. Nothing to do. + return ExceptionContinueSearch; + } + + if (ms_exc->ExceptionCode == STATUS_GCC_THROW) { + // This is (probably) a libunwind-controlled exception/unwind. Recover the + // parameters which we set below, and pass them to the personality function. + ours = true; + exc = (_Unwind_Exception *)ms_exc->ExceptionInformation[0]; + if (!IS_UNWINDING(ms_exc->ExceptionFlags) && ms_exc->NumberParameters > 1) { + ctx = (struct _Unwind_Context *)ms_exc->ExceptionInformation[1]; + action = (_Unwind_Action)ms_exc->ExceptionInformation[2]; + } + } else { + // Foreign exception. + exc = (_Unwind_Exception *)malloc(sizeof(_Unwind_Exception)); + exc->exception_class = kSEHExceptionClass; + exc->exception_cleanup = seh_exc_cleanup; + memset(exc->private_, 0, sizeof(exc->private_)); + } + if (!ctx) { + __unw_init_seh(&cursor, disp->ContextRecord); + __unw_seh_set_disp_ctx(&cursor, disp); + __unw_set_reg(&cursor, UNW_REG_IP, disp->ControlPc - 1); + ctx = (struct _Unwind_Context *)&cursor; + + if (!IS_UNWINDING(ms_exc->ExceptionFlags)) { + if (ours && ms_exc->NumberParameters > 1) + action = (_Unwind_Action)(_UA_CLEANUP_PHASE | _UA_FORCE_UNWIND); + else + action = _UA_SEARCH_PHASE; + } else { + if (ours && ms_exc->ExceptionInformation[1] == (ULONG_PTR)frame) + action = (_Unwind_Action)(_UA_CLEANUP_PHASE | _UA_HANDLER_FRAME); + else + action = _UA_CLEANUP_PHASE; + } + } + + _LIBUNWIND_TRACE_UNWINDING("_GCC_specific_handler() calling personality " + "function %p(1, %d, %llx, %p, %p)", + (void *)pers, action, exc->exception_class, + (void *)exc, (void *)ctx); + urc = pers(1, action, exc->exception_class, exc, ctx); + _LIBUNWIND_TRACE_UNWINDING("_GCC_specific_handler() personality returned %d", urc); + switch (urc) { + case _URC_CONTINUE_UNWIND: + // If we're in phase 2, and the personality routine said to continue + // at the target frame, we're in real trouble. + if (action & _UA_HANDLER_FRAME) + _LIBUNWIND_ABORT("Personality continued unwind at the target frame!"); + return ExceptionContinueSearch; + case _URC_HANDLER_FOUND: + // If we were called by __libunwind_seh_personality(), indicate that + // a handler was found; otherwise, initiate phase 2 by unwinding. + if (ours && ms_exc->NumberParameters > 1) + return 4 /* ExecptionExecuteHandler in mingw */; + // This should never happen in phase 2. + if (IS_UNWINDING(ms_exc->ExceptionFlags)) + _LIBUNWIND_ABORT("Personality indicated exception handler in phase 2!"); + exc->private_[1] = (ULONG_PTR)frame; + if (ours) { + ms_exc->NumberParameters = 4; + ms_exc->ExceptionInformation[1] = (ULONG_PTR)frame; + } + // FIXME: Indicate target frame in foreign case! + // phase 2: the clean up phase + RtlUnwindEx(frame, (PVOID)disp->ControlPc, ms_exc, exc, ms_ctx, disp->HistoryTable); + _LIBUNWIND_ABORT("RtlUnwindEx() failed"); + case _URC_INSTALL_CONTEXT: { + // If we were called by __libunwind_seh_personality(), indicate that + // a handler was found; otherwise, it's time to initiate a collided + // unwind to the target. + if (ours && !IS_UNWINDING(ms_exc->ExceptionFlags) && ms_exc->NumberParameters > 1) + return 4 /* ExecptionExecuteHandler in mingw */; + // This should never happen in phase 1. + if (!IS_UNWINDING(ms_exc->ExceptionFlags)) + _LIBUNWIND_ABORT("Personality installed context during phase 1!"); +#ifdef __x86_64__ + exc->private_[2] = disp->TargetIp; + __unw_get_reg(&cursor, UNW_X86_64_RAX, &retval); + __unw_get_reg(&cursor, UNW_X86_64_RDX, &exc->private_[3]); +#elif defined(__arm__) + exc->private_[2] = disp->TargetPc; + __unw_get_reg(&cursor, UNW_ARM_R0, &retval); + __unw_get_reg(&cursor, UNW_ARM_R1, &exc->private_[3]); +#elif defined(__aarch64__) + exc->private_[2] = disp->TargetPc; + __unw_get_reg(&cursor, UNW_ARM64_X0, &retval); + __unw_get_reg(&cursor, UNW_ARM64_X1, &exc->private_[3]); +#endif + __unw_get_reg(&cursor, UNW_REG_IP, &target); + ms_exc->ExceptionCode = STATUS_GCC_UNWIND; +#ifdef __x86_64__ + ms_exc->ExceptionInformation[2] = disp->TargetIp; +#elif defined(__arm__) || defined(__aarch64__) + ms_exc->ExceptionInformation[2] = disp->TargetPc; +#endif + ms_exc->ExceptionInformation[3] = exc->private_[3]; + // Give NTRTL some scratch space to keep track of the collided unwind. + // Don't use the one that was passed in; we don't want to overwrite the + // context in the DISPATCHER_CONTEXT. + CONTEXT new_ctx; + RtlUnwindEx(frame, (PVOID)target, ms_exc, (PVOID)retval, &new_ctx, disp->HistoryTable); + _LIBUNWIND_ABORT("RtlUnwindEx() failed"); + } + // Anything else indicates a serious problem. + default: return ExceptionContinueExecution; + } +} + +/// Personality function returned by \c __unw_get_proc_info() in SEH contexts. +/// This is a wrapper that calls the real SEH handler function, which in +/// turn (at least, for Itanium-style frames) calls the real Itanium +/// personality function (see \c _GCC_specific_handler()). +extern "C" _Unwind_Reason_Code +__libunwind_seh_personality(int version, _Unwind_Action state, + uint64_t klass, _Unwind_Exception *exc, + struct _Unwind_Context *context) { + (void)version; + (void)klass; + EXCEPTION_RECORD ms_exc; + bool phase2 = (state & (_UA_SEARCH_PHASE|_UA_CLEANUP_PHASE)) == _UA_CLEANUP_PHASE; + ms_exc.ExceptionCode = STATUS_GCC_THROW; + ms_exc.ExceptionFlags = 0; + ms_exc.NumberParameters = 3; + ms_exc.ExceptionInformation[0] = (ULONG_PTR)exc; + ms_exc.ExceptionInformation[1] = (ULONG_PTR)context; + ms_exc.ExceptionInformation[2] = state; + DISPATCHER_CONTEXT *disp_ctx = + __unw_seh_get_disp_ctx((unw_cursor_t *)context); + EXCEPTION_DISPOSITION ms_act = disp_ctx->LanguageHandler(&ms_exc, + (PVOID)disp_ctx->EstablisherFrame, + disp_ctx->ContextRecord, + disp_ctx); + switch (ms_act) { + case ExceptionContinueSearch: return _URC_CONTINUE_UNWIND; + case 4 /*ExceptionExecuteHandler*/: + return phase2 ? _URC_INSTALL_CONTEXT : _URC_HANDLER_FOUND; + default: + return phase2 ? _URC_FATAL_PHASE2_ERROR : _URC_FATAL_PHASE1_ERROR; + } +} + +static _Unwind_Reason_Code +unwind_phase2_forced(unw_context_t *uc, + _Unwind_Exception *exception_object, + _Unwind_Stop_Fn stop, void *stop_parameter) { + unw_cursor_t cursor2; + __unw_init_local(&cursor2, uc); + + // Walk each frame until we reach where search phase said to stop + while (__unw_step(&cursor2) > 0) { + + // Update info about this frame. + unw_proc_info_t frameInfo; + if (__unw_get_proc_info(&cursor2, &frameInfo) != UNW_ESUCCESS) { + _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): __unw_step " + "failed => _URC_END_OF_STACK", + (void *)exception_object); + return _URC_FATAL_PHASE2_ERROR; + } + + // When tracing, print state information. + if (_LIBUNWIND_TRACING_UNWINDING) { + char functionBuf[512]; + const char *functionName = functionBuf; + unw_word_t offset; + if ((__unw_get_proc_name(&cursor2, functionBuf, sizeof(functionBuf), + &offset) != UNW_ESUCCESS) || + (frameInfo.start_ip + offset > frameInfo.end_ip)) + functionName = ".anonymous."; + _LIBUNWIND_TRACE_UNWINDING( + "unwind_phase2_forced(ex_ojb=%p): start_ip=0x%" PRIx64 + ", func=%s, lsda=0x%" PRIx64 ", personality=0x%" PRIx64, + (void *)exception_object, frameInfo.start_ip, functionName, + frameInfo.lsda, frameInfo.handler); + } + + // Call stop function at each frame. + _Unwind_Action action = + (_Unwind_Action)(_UA_FORCE_UNWIND | _UA_CLEANUP_PHASE); + _Unwind_Reason_Code stopResult = + (*stop)(1, action, exception_object->exception_class, exception_object, + (struct _Unwind_Context *)(&cursor2), stop_parameter); + _LIBUNWIND_TRACE_UNWINDING( + "unwind_phase2_forced(ex_ojb=%p): stop function returned %d", + (void *)exception_object, stopResult); + if (stopResult != _URC_NO_REASON) { + _LIBUNWIND_TRACE_UNWINDING( + "unwind_phase2_forced(ex_ojb=%p): stopped by stop function", + (void *)exception_object); + return _URC_FATAL_PHASE2_ERROR; + } + + // If there is a personality routine, tell it we are unwinding. + if (frameInfo.handler != 0) { + __personality_routine p = + (__personality_routine)(intptr_t)(frameInfo.handler); + _LIBUNWIND_TRACE_UNWINDING( + "unwind_phase2_forced(ex_ojb=%p): calling personality function %p", + (void *)exception_object, (void *)(uintptr_t)p); + _Unwind_Reason_Code personalityResult = + (*p)(1, action, exception_object->exception_class, exception_object, + (struct _Unwind_Context *)(&cursor2)); + switch (personalityResult) { + case _URC_CONTINUE_UNWIND: + _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): " + "personality returned " + "_URC_CONTINUE_UNWIND", + (void *)exception_object); + // Destructors called, continue unwinding + break; + case _URC_INSTALL_CONTEXT: + _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): " + "personality returned " + "_URC_INSTALL_CONTEXT", + (void *)exception_object); + // We may get control back if landing pad calls _Unwind_Resume(). + __unw_resume(&cursor2); + break; + default: + // Personality routine returned an unknown result code. + _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): " + "personality returned %d, " + "_URC_FATAL_PHASE2_ERROR", + (void *)exception_object, personalityResult); + return _URC_FATAL_PHASE2_ERROR; + } + } + } + + // Call stop function one last time and tell it we've reached the end + // of the stack. + _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): calling stop " + "function with _UA_END_OF_STACK", + (void *)exception_object); + _Unwind_Action lastAction = + (_Unwind_Action)(_UA_FORCE_UNWIND | _UA_CLEANUP_PHASE | _UA_END_OF_STACK); + (*stop)(1, lastAction, exception_object->exception_class, exception_object, + (struct _Unwind_Context *)(&cursor2), stop_parameter); + + // Clean up phase did not resume at the frame that the search phase said it + // would. + return _URC_FATAL_PHASE2_ERROR; +} + +/// Called by \c __cxa_throw(). Only returns if there is a fatal error. +_LIBUNWIND_EXPORT _Unwind_Reason_Code +_Unwind_RaiseException(_Unwind_Exception *exception_object) { + _LIBUNWIND_TRACE_API("_Unwind_RaiseException(ex_obj=%p)", + (void *)exception_object); + + // Mark that this is a non-forced unwind, so _Unwind_Resume() + // can do the right thing. + memset(exception_object->private_, 0, sizeof(exception_object->private_)); + + // phase 1: the search phase + // We'll let the system do that for us. + RaiseException(STATUS_GCC_THROW, 0, 1, (ULONG_PTR *)&exception_object); + + // If we get here, either something went horribly wrong or we reached the + // top of the stack. Either way, let libc++abi call std::terminate(). + return _URC_END_OF_STACK; +} + +/// When \c _Unwind_RaiseException() is in phase2, it hands control +/// to the personality function at each frame. The personality +/// may force a jump to a landing pad in that function; the landing +/// pad code may then call \c _Unwind_Resume() to continue with the +/// unwinding. Note: the call to \c _Unwind_Resume() is from compiler +/// geneated user code. All other \c _Unwind_* routines are called +/// by the C++ runtime \c __cxa_* routines. +/// +/// Note: re-throwing an exception (as opposed to continuing the unwind) +/// is implemented by having the code call \c __cxa_rethrow() which +/// in turn calls \c _Unwind_Resume_or_Rethrow(). +_LIBUNWIND_EXPORT void +_Unwind_Resume(_Unwind_Exception *exception_object) { + _LIBUNWIND_TRACE_API("_Unwind_Resume(ex_obj=%p)", (void *)exception_object); + + if (exception_object->private_[0] != 0) { + unw_context_t uc; + + __unw_getcontext(&uc); + unwind_phase2_forced(&uc, exception_object, + (_Unwind_Stop_Fn) exception_object->private_[0], + (void *)exception_object->private_[4]); + } else { + // Recover the parameters for the unwind from the exception object + // so we can start unwinding again. + EXCEPTION_RECORD ms_exc; + CONTEXT ms_ctx; + UNWIND_HISTORY_TABLE hist; + + memset(&ms_exc, 0, sizeof(ms_exc)); + memset(&hist, 0, sizeof(hist)); + ms_exc.ExceptionCode = STATUS_GCC_THROW; + ms_exc.ExceptionFlags = EXCEPTION_NONCONTINUABLE; + ms_exc.NumberParameters = 4; + ms_exc.ExceptionInformation[0] = (ULONG_PTR)exception_object; + ms_exc.ExceptionInformation[1] = exception_object->private_[1]; + ms_exc.ExceptionInformation[2] = exception_object->private_[2]; + ms_exc.ExceptionInformation[3] = exception_object->private_[3]; + RtlUnwindEx((PVOID)exception_object->private_[1], + (PVOID)exception_object->private_[2], &ms_exc, + exception_object, &ms_ctx, &hist); + } + + // Clients assume _Unwind_Resume() does not return, so all we can do is abort. + _LIBUNWIND_ABORT("_Unwind_Resume() can't return"); +} + +/// Not used by C++. +/// Unwinds stack, calling "stop" function at each frame. +/// Could be used to implement \c longjmp(). +_LIBUNWIND_EXPORT _Unwind_Reason_Code +_Unwind_ForcedUnwind(_Unwind_Exception *exception_object, + _Unwind_Stop_Fn stop, void *stop_parameter) { + _LIBUNWIND_TRACE_API("_Unwind_ForcedUnwind(ex_obj=%p, stop=%p)", + (void *)exception_object, (void *)(uintptr_t)stop); + unw_context_t uc; + __unw_getcontext(&uc); + + // Mark that this is a forced unwind, so _Unwind_Resume() can do + // the right thing. + exception_object->private_[0] = (uintptr_t) stop; + exception_object->private_[4] = (uintptr_t) stop_parameter; + + // do it + return unwind_phase2_forced(&uc, exception_object, stop, stop_parameter); +} + +/// Called by personality handler during phase 2 to get LSDA for current frame. +_LIBUNWIND_EXPORT uintptr_t +_Unwind_GetLanguageSpecificData(struct _Unwind_Context *context) { + uintptr_t result = + (uintptr_t)__unw_seh_get_disp_ctx((unw_cursor_t *)context)->HandlerData; + _LIBUNWIND_TRACE_API( + "_Unwind_GetLanguageSpecificData(context=%p) => 0x%" PRIxPTR, + (void *)context, result); + return result; +} + +/// Called by personality handler during phase 2 to find the start of the +/// function. +_LIBUNWIND_EXPORT uintptr_t +_Unwind_GetRegionStart(struct _Unwind_Context *context) { + DISPATCHER_CONTEXT *disp = __unw_seh_get_disp_ctx((unw_cursor_t *)context); + uintptr_t result = (uintptr_t)disp->FunctionEntry->BeginAddress + disp->ImageBase; + _LIBUNWIND_TRACE_API("_Unwind_GetRegionStart(context=%p) => 0x%" PRIxPTR, + (void *)context, result); + return result; +} + +static int __unw_init_seh(unw_cursor_t *cursor, CONTEXT *context) { +#ifdef _LIBUNWIND_TARGET_X86_64 + new (reinterpret_cast *>(cursor)) + UnwindCursor( + context, LocalAddressSpace::sThisAddressSpace); + auto *co = reinterpret_cast(cursor); + co->setInfoBasedOnIPRegister(); + return UNW_ESUCCESS; +#elif defined(_LIBUNWIND_TARGET_ARM) + new (reinterpret_cast *>(cursor)) + UnwindCursor( + context, LocalAddressSpace::sThisAddressSpace); + auto *co = reinterpret_cast(cursor); + co->setInfoBasedOnIPRegister(); + return UNW_ESUCCESS; +#elif defined(_LIBUNWIND_TARGET_AARCH64) + new (reinterpret_cast *>(cursor)) + UnwindCursor( + context, LocalAddressSpace::sThisAddressSpace); + auto *co = reinterpret_cast(cursor); + co->setInfoBasedOnIPRegister(); + return UNW_ESUCCESS; +#else + return UNW_EINVAL; +#endif +} + +static DISPATCHER_CONTEXT *__unw_seh_get_disp_ctx(unw_cursor_t *cursor) { +#ifdef _LIBUNWIND_TARGET_X86_64 + return reinterpret_cast *>(cursor)->getDispatcherContext(); +#elif defined(_LIBUNWIND_TARGET_ARM) + return reinterpret_cast *>(cursor)->getDispatcherContext(); +#elif defined(_LIBUNWIND_TARGET_AARCH64) + return reinterpret_cast *>(cursor)->getDispatcherContext(); +#else + return nullptr; +#endif +} + +static void __unw_seh_set_disp_ctx(unw_cursor_t *cursor, + DISPATCHER_CONTEXT *disp) { +#ifdef _LIBUNWIND_TARGET_X86_64 + reinterpret_cast *>(cursor)->setDispatcherContext(disp); +#elif defined(_LIBUNWIND_TARGET_ARM) + reinterpret_cast *>(cursor)->setDispatcherContext(disp); +#elif defined(_LIBUNWIND_TARGET_AARCH64) + reinterpret_cast *>(cursor)->setDispatcherContext(disp); +#endif +} + +#endif // defined(_LIBUNWIND_SUPPORT_SEH_UNWIND) diff --git a/src/coreclr/src/nativeaot/libunwind/src/Unwind-sjlj.c b/src/coreclr/src/nativeaot/libunwind/src/Unwind-sjlj.c new file mode 100644 index 0000000000000..b8bb7c83bdff7 --- /dev/null +++ b/src/coreclr/src/nativeaot/libunwind/src/Unwind-sjlj.c @@ -0,0 +1,516 @@ +//===--------------------------- Unwind-sjlj.c ----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// +// Implements setjump-longjump based C++ exceptions +// +//===----------------------------------------------------------------------===// + +#include + +#include +#include +#include +#include + +#include "config.h" + +/// With SJLJ based exceptions, any function that has a catch clause or needs to +/// do any clean up when an exception propagates through it, needs to call +/// \c _Unwind_SjLj_Register at the start of the function and +/// \c _Unwind_SjLj_Unregister at the end. The register function is called with +/// the address of a block of memory in the function's stack frame. The runtime +/// keeps a linked list (stack) of these blocks - one per thread. The calling +/// function also sets the personality and lsda fields of the block. + +#if defined(_LIBUNWIND_BUILD_SJLJ_APIS) + +struct _Unwind_FunctionContext { + // next function in stack of handlers + struct _Unwind_FunctionContext *prev; + + // set by calling function before registering to be the landing pad + uint32_t resumeLocation; + + // set by personality handler to be parameters passed to landing pad function + uint32_t resumeParameters[4]; + + // set by calling function before registering + __personality_routine personality; // arm offset=24 + uintptr_t lsda; // arm offset=28 + + // variable length array, contains registers to restore + // 0 = r7, 1 = pc, 2 = sp + void *jbuf[]; +}; + +#if defined(_LIBUNWIND_HAS_NO_THREADS) +# define _LIBUNWIND_THREAD_LOCAL +#else +# if __STDC_VERSION__ >= 201112L +# define _LIBUNWIND_THREAD_LOCAL _Thread_local +# elif defined(_MSC_VER) +# define _LIBUNWIND_THREAD_LOCAL __declspec(thread) +# elif defined(__GNUC__) || defined(__clang__) +# define _LIBUNWIND_THREAD_LOCAL __thread +# else +# error Unable to create thread local storage +# endif +#endif + + +#if !defined(FOR_DYLD) + +#if defined(__APPLE__) +#include +#else +static _LIBUNWIND_THREAD_LOCAL struct _Unwind_FunctionContext *stack = NULL; +#endif + +static struct _Unwind_FunctionContext *__Unwind_SjLj_GetTopOfFunctionStack() { +#if defined(__APPLE__) + return _pthread_getspecific_direct(__PTK_LIBC_DYLD_Unwind_SjLj_Key); +#else + return stack; +#endif +} + +static void +__Unwind_SjLj_SetTopOfFunctionStack(struct _Unwind_FunctionContext *fc) { +#if defined(__APPLE__) + _pthread_setspecific_direct(__PTK_LIBC_DYLD_Unwind_SjLj_Key, fc); +#else + stack = fc; +#endif +} + +#endif + + +/// Called at start of each function that catches exceptions +_LIBUNWIND_EXPORT void +_Unwind_SjLj_Register(struct _Unwind_FunctionContext *fc) { + fc->prev = __Unwind_SjLj_GetTopOfFunctionStack(); + __Unwind_SjLj_SetTopOfFunctionStack(fc); +} + + +/// Called at end of each function that catches exceptions +_LIBUNWIND_EXPORT void +_Unwind_SjLj_Unregister(struct _Unwind_FunctionContext *fc) { + __Unwind_SjLj_SetTopOfFunctionStack(fc->prev); +} + + +static _Unwind_Reason_Code +unwind_phase1(struct _Unwind_Exception *exception_object) { + _Unwind_FunctionContext_t c = __Unwind_SjLj_GetTopOfFunctionStack(); + _LIBUNWIND_TRACE_UNWINDING("unwind_phase1: initial function-context=%p", + (void *)c); + + // walk each frame looking for a place to stop + for (bool handlerNotFound = true; handlerNotFound; c = c->prev) { + + // check for no more frames + if (c == NULL) { + _LIBUNWIND_TRACE_UNWINDING("unwind_phase1(ex_ojb=%p): reached " + "bottom => _URC_END_OF_STACK", + (void *)exception_object); + return _URC_END_OF_STACK; + } + + _LIBUNWIND_TRACE_UNWINDING("unwind_phase1: function-context=%p", (void *)c); + // if there is a personality routine, ask it if it will want to stop at this + // frame + if (c->personality != NULL) { + _LIBUNWIND_TRACE_UNWINDING("unwind_phase1(ex_ojb=%p): calling " + "personality function %p", + (void *)exception_object, + (void *)c->personality); + _Unwind_Reason_Code personalityResult = (*c->personality)( + 1, _UA_SEARCH_PHASE, exception_object->exception_class, + exception_object, (struct _Unwind_Context *)c); + switch (personalityResult) { + case _URC_HANDLER_FOUND: + // found a catch clause or locals that need destructing in this frame + // stop search and remember function context + handlerNotFound = false; + exception_object->private_2 = (uintptr_t) c; + _LIBUNWIND_TRACE_UNWINDING("unwind_phase1(ex_ojb=%p): " + "_URC_HANDLER_FOUND", + (void *)exception_object); + return _URC_NO_REASON; + + case _URC_CONTINUE_UNWIND: + _LIBUNWIND_TRACE_UNWINDING("unwind_phase1(ex_ojb=%p): " + "_URC_CONTINUE_UNWIND", + (void *)exception_object); + // continue unwinding + break; + + default: + // something went wrong + _LIBUNWIND_TRACE_UNWINDING( + "unwind_phase1(ex_ojb=%p): _URC_FATAL_PHASE1_ERROR", + (void *)exception_object); + return _URC_FATAL_PHASE1_ERROR; + } + } + } + return _URC_NO_REASON; +} + + +static _Unwind_Reason_Code +unwind_phase2(struct _Unwind_Exception *exception_object) { + _LIBUNWIND_TRACE_UNWINDING("unwind_phase2(ex_ojb=%p)", + (void *)exception_object); + + // walk each frame until we reach where search phase said to stop + _Unwind_FunctionContext_t c = __Unwind_SjLj_GetTopOfFunctionStack(); + while (true) { + _LIBUNWIND_TRACE_UNWINDING("unwind_phase2s(ex_ojb=%p): context=%p", + (void *)exception_object, (void *)c); + + // check for no more frames + if (c == NULL) { + _LIBUNWIND_TRACE_UNWINDING( + "unwind_phase2(ex_ojb=%p): __unw_step() reached " + "bottom => _URC_END_OF_STACK", + (void *)exception_object); + return _URC_END_OF_STACK; + } + + // if there is a personality routine, tell it we are unwinding + if (c->personality != NULL) { + _Unwind_Action action = _UA_CLEANUP_PHASE; + if ((uintptr_t) c == exception_object->private_2) + action = (_Unwind_Action)( + _UA_CLEANUP_PHASE | + _UA_HANDLER_FRAME); // tell personality this was the frame it marked + // in phase 1 + _Unwind_Reason_Code personalityResult = + (*c->personality)(1, action, exception_object->exception_class, + exception_object, (struct _Unwind_Context *)c); + switch (personalityResult) { + case _URC_CONTINUE_UNWIND: + // continue unwinding + _LIBUNWIND_TRACE_UNWINDING( + "unwind_phase2(ex_ojb=%p): _URC_CONTINUE_UNWIND", + (void *)exception_object); + if ((uintptr_t) c == exception_object->private_2) { + // phase 1 said we would stop at this frame, but we did not... + _LIBUNWIND_ABORT("during phase1 personality function said it would " + "stop here, but now if phase2 it did not stop here"); + } + break; + case _URC_INSTALL_CONTEXT: + _LIBUNWIND_TRACE_UNWINDING("unwind_phase2(ex_ojb=%p): " + "_URC_INSTALL_CONTEXT, will resume at " + "landing pad %p", + (void *)exception_object, c->jbuf[1]); + // personality routine says to transfer control to landing pad + // we may get control back if landing pad calls _Unwind_Resume() + __Unwind_SjLj_SetTopOfFunctionStack(c); + __builtin_longjmp(c->jbuf, 1); + // __unw_resume() only returns if there was an error + return _URC_FATAL_PHASE2_ERROR; + default: + // something went wrong + _LIBUNWIND_DEBUG_LOG("personality function returned unknown result %d", + personalityResult); + return _URC_FATAL_PHASE2_ERROR; + } + } + c = c->prev; + } + + // clean up phase did not resume at the frame that the search phase said it + // would + return _URC_FATAL_PHASE2_ERROR; +} + + +static _Unwind_Reason_Code +unwind_phase2_forced(struct _Unwind_Exception *exception_object, + _Unwind_Stop_Fn stop, void *stop_parameter) { + // walk each frame until we reach where search phase said to stop + _Unwind_FunctionContext_t c = __Unwind_SjLj_GetTopOfFunctionStack(); + while (true) { + + // get next frame (skip over first which is _Unwind_RaiseException) + if (c == NULL) { + _LIBUNWIND_TRACE_UNWINDING( + "unwind_phase2(ex_ojb=%p): __unw_step() reached " + "bottom => _URC_END_OF_STACK", + (void *)exception_object); + return _URC_END_OF_STACK; + } + + // call stop function at each frame + _Unwind_Action action = + (_Unwind_Action)(_UA_FORCE_UNWIND | _UA_CLEANUP_PHASE); + _Unwind_Reason_Code stopResult = + (*stop)(1, action, exception_object->exception_class, exception_object, + (struct _Unwind_Context *)c, stop_parameter); + _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): " + "stop function returned %d", + (void *)exception_object, stopResult); + if (stopResult != _URC_NO_REASON) { + _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): " + "stopped by stop function", + (void *)exception_object); + return _URC_FATAL_PHASE2_ERROR; + } + + // if there is a personality routine, tell it we are unwinding + if (c->personality != NULL) { + __personality_routine p = (__personality_routine) c->personality; + _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): " + "calling personality function %p", + (void *)exception_object, (void *)p); + _Unwind_Reason_Code personalityResult = + (*p)(1, action, exception_object->exception_class, exception_object, + (struct _Unwind_Context *)c); + switch (personalityResult) { + case _URC_CONTINUE_UNWIND: + _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): " + "personality returned _URC_CONTINUE_UNWIND", + (void *)exception_object); + // destructors called, continue unwinding + break; + case _URC_INSTALL_CONTEXT: + _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): " + "personality returned _URC_INSTALL_CONTEXT", + (void *)exception_object); + // we may get control back if landing pad calls _Unwind_Resume() + __Unwind_SjLj_SetTopOfFunctionStack(c); + __builtin_longjmp(c->jbuf, 1); + break; + default: + // something went wrong + _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): " + "personality returned %d, " + "_URC_FATAL_PHASE2_ERROR", + (void *)exception_object, personalityResult); + return _URC_FATAL_PHASE2_ERROR; + } + } + c = c->prev; + } + + // call stop function one last time and tell it we've reached the end of the + // stack + _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): calling stop " + "function with _UA_END_OF_STACK", + (void *)exception_object); + _Unwind_Action lastAction = + (_Unwind_Action)(_UA_FORCE_UNWIND | _UA_CLEANUP_PHASE | _UA_END_OF_STACK); + (*stop)(1, lastAction, exception_object->exception_class, exception_object, + (struct _Unwind_Context *)c, stop_parameter); + + // clean up phase did not resume at the frame that the search phase said it + // would + return _URC_FATAL_PHASE2_ERROR; +} + + +/// Called by __cxa_throw. Only returns if there is a fatal error +_LIBUNWIND_EXPORT _Unwind_Reason_Code +_Unwind_SjLj_RaiseException(struct _Unwind_Exception *exception_object) { + _LIBUNWIND_TRACE_API("_Unwind_SjLj_RaiseException(ex_obj=%p)", + (void *)exception_object); + + // mark that this is a non-forced unwind, so _Unwind_Resume() can do the right + // thing + exception_object->private_1 = 0; + exception_object->private_2 = 0; + + // phase 1: the search phase + _Unwind_Reason_Code phase1 = unwind_phase1(exception_object); + if (phase1 != _URC_NO_REASON) + return phase1; + + // phase 2: the clean up phase + return unwind_phase2(exception_object); +} + + + +/// When _Unwind_RaiseException() is in phase2, it hands control +/// to the personality function at each frame. The personality +/// may force a jump to a landing pad in that function, the landing +/// pad code may then call _Unwind_Resume() to continue with the +/// unwinding. Note: the call to _Unwind_Resume() is from compiler +/// geneated user code. All other _Unwind_* routines are called +/// by the C++ runtime __cxa_* routines. +/// +/// Re-throwing an exception is implemented by having the code call +/// __cxa_rethrow() which in turn calls _Unwind_Resume_or_Rethrow() +_LIBUNWIND_EXPORT void +_Unwind_SjLj_Resume(struct _Unwind_Exception *exception_object) { + _LIBUNWIND_TRACE_API("_Unwind_SjLj_Resume(ex_obj=%p)", + (void *)exception_object); + + if (exception_object->private_1 != 0) + unwind_phase2_forced(exception_object, + (_Unwind_Stop_Fn) exception_object->private_1, + (void *)exception_object->private_2); + else + unwind_phase2(exception_object); + + // clients assume _Unwind_Resume() does not return, so all we can do is abort. + _LIBUNWIND_ABORT("_Unwind_SjLj_Resume() can't return"); +} + + +/// Called by __cxa_rethrow(). +_LIBUNWIND_EXPORT _Unwind_Reason_Code +_Unwind_SjLj_Resume_or_Rethrow(struct _Unwind_Exception *exception_object) { + _LIBUNWIND_TRACE_API("__Unwind_SjLj_Resume_or_Rethrow(ex_obj=%p), " + "private_1=%" PRIuPTR, + (void *)exception_object, exception_object->private_1); + // If this is non-forced and a stopping place was found, then this is a + // re-throw. + // Call _Unwind_RaiseException() as if this was a new exception. + if (exception_object->private_1 == 0) { + return _Unwind_SjLj_RaiseException(exception_object); + // should return if there is no catch clause, so that __cxa_rethrow can call + // std::terminate() + } + + // Call through to _Unwind_Resume() which distiguishes between forced and + // regular exceptions. + _Unwind_SjLj_Resume(exception_object); + _LIBUNWIND_ABORT("__Unwind_SjLj_Resume_or_Rethrow() called " + "_Unwind_SjLj_Resume() which unexpectedly returned"); +} + + +/// Called by personality handler during phase 2 to get LSDA for current frame. +_LIBUNWIND_EXPORT uintptr_t +_Unwind_GetLanguageSpecificData(struct _Unwind_Context *context) { + _Unwind_FunctionContext_t ufc = (_Unwind_FunctionContext_t) context; + _LIBUNWIND_TRACE_API("_Unwind_GetLanguageSpecificData(context=%p) " + "=> 0x%" PRIuPTR, + (void *)context, ufc->lsda); + return ufc->lsda; +} + + +/// Called by personality handler during phase 2 to get register values. +_LIBUNWIND_EXPORT uintptr_t _Unwind_GetGR(struct _Unwind_Context *context, + int index) { + _LIBUNWIND_TRACE_API("_Unwind_GetGR(context=%p, reg=%d)", (void *)context, + index); + _Unwind_FunctionContext_t ufc = (_Unwind_FunctionContext_t) context; + return ufc->resumeParameters[index]; +} + + +/// Called by personality handler during phase 2 to alter register values. +_LIBUNWIND_EXPORT void _Unwind_SetGR(struct _Unwind_Context *context, int index, + uintptr_t new_value) { + _LIBUNWIND_TRACE_API("_Unwind_SetGR(context=%p, reg=%d, value=0x%" PRIuPTR + ")", + (void *)context, index, new_value); + _Unwind_FunctionContext_t ufc = (_Unwind_FunctionContext_t) context; + ufc->resumeParameters[index] = new_value; +} + + +/// Called by personality handler during phase 2 to get instruction pointer. +_LIBUNWIND_EXPORT uintptr_t _Unwind_GetIP(struct _Unwind_Context *context) { + _Unwind_FunctionContext_t ufc = (_Unwind_FunctionContext_t) context; + _LIBUNWIND_TRACE_API("_Unwind_GetIP(context=%p) => 0x%" PRIu32, + (void *)context, ufc->resumeLocation + 1); + return ufc->resumeLocation + 1; +} + + +/// Called by personality handler during phase 2 to get instruction pointer. +/// ipBefore is a boolean that says if IP is already adjusted to be the call +/// site address. Normally IP is the return address. +_LIBUNWIND_EXPORT uintptr_t _Unwind_GetIPInfo(struct _Unwind_Context *context, + int *ipBefore) { + _Unwind_FunctionContext_t ufc = (_Unwind_FunctionContext_t) context; + *ipBefore = 0; + _LIBUNWIND_TRACE_API("_Unwind_GetIPInfo(context=%p, %p) => 0x%" PRIu32, + (void *)context, (void *)ipBefore, + ufc->resumeLocation + 1); + return ufc->resumeLocation + 1; +} + + +/// Called by personality handler during phase 2 to alter instruction pointer. +_LIBUNWIND_EXPORT void _Unwind_SetIP(struct _Unwind_Context *context, + uintptr_t new_value) { + _LIBUNWIND_TRACE_API("_Unwind_SetIP(context=%p, value=0x%" PRIuPTR ")", + (void *)context, new_value); + _Unwind_FunctionContext_t ufc = (_Unwind_FunctionContext_t) context; + ufc->resumeLocation = new_value - 1; +} + + +/// Called by personality handler during phase 2 to find the start of the +/// function. +_LIBUNWIND_EXPORT uintptr_t +_Unwind_GetRegionStart(struct _Unwind_Context *context) { + // Not supported or needed for sjlj based unwinding + (void)context; + _LIBUNWIND_TRACE_API("_Unwind_GetRegionStart(context=%p)", (void *)context); + return 0; +} + + +/// Called by personality handler during phase 2 if a foreign exception +/// is caught. +_LIBUNWIND_EXPORT void +_Unwind_DeleteException(struct _Unwind_Exception *exception_object) { + _LIBUNWIND_TRACE_API("_Unwind_DeleteException(ex_obj=%p)", + (void *)exception_object); + if (exception_object->exception_cleanup != NULL) + (*exception_object->exception_cleanup)(_URC_FOREIGN_EXCEPTION_CAUGHT, + exception_object); +} + + + +/// Called by personality handler during phase 2 to get base address for data +/// relative encodings. +_LIBUNWIND_EXPORT uintptr_t +_Unwind_GetDataRelBase(struct _Unwind_Context *context) { + // Not supported or needed for sjlj based unwinding + (void)context; + _LIBUNWIND_TRACE_API("_Unwind_GetDataRelBase(context=%p)", (void *)context); + _LIBUNWIND_ABORT("_Unwind_GetDataRelBase() not implemented"); +} + + +/// Called by personality handler during phase 2 to get base address for text +/// relative encodings. +_LIBUNWIND_EXPORT uintptr_t +_Unwind_GetTextRelBase(struct _Unwind_Context *context) { + // Not supported or needed for sjlj based unwinding + (void)context; + _LIBUNWIND_TRACE_API("_Unwind_GetTextRelBase(context=%p)", (void *)context); + _LIBUNWIND_ABORT("_Unwind_GetTextRelBase() not implemented"); +} + + +/// Called by personality handler to get "Call Frame Area" for current frame. +_LIBUNWIND_EXPORT uintptr_t _Unwind_GetCFA(struct _Unwind_Context *context) { + _LIBUNWIND_TRACE_API("_Unwind_GetCFA(context=%p)", (void *)context); + if (context != NULL) { + _Unwind_FunctionContext_t ufc = (_Unwind_FunctionContext_t) context; + // Setjmp/longjmp based exceptions don't have a true CFA. + // Instead, the SP in the jmpbuf is the closest approximation. + return (uintptr_t) ufc->jbuf[2]; + } + return 0; +} + +#endif // defined(_LIBUNWIND_BUILD_SJLJ_APIS) diff --git a/src/coreclr/src/nativeaot/libunwind/src/UnwindCursor.hpp b/src/coreclr/src/nativeaot/libunwind/src/UnwindCursor.hpp new file mode 100644 index 0000000000000..ae5cbe7479e6f --- /dev/null +++ b/src/coreclr/src/nativeaot/libunwind/src/UnwindCursor.hpp @@ -0,0 +1,2026 @@ +//===------------------------- UnwindCursor.hpp ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// +// C++ interface to lower levels of libunwind +//===----------------------------------------------------------------------===// + +#ifndef __UNWINDCURSOR_HPP__ +#define __UNWINDCURSOR_HPP__ + +#include +#include +#include +#include + +#ifdef _WIN32 + #include + #include +#endif +#ifdef __APPLE__ + #include +#endif + +#if defined(_LIBUNWIND_SUPPORT_SEH_UNWIND) +// Provide a definition for the DISPATCHER_CONTEXT struct for old (Win7 and +// earlier) SDKs. +// MinGW-w64 has always provided this struct. + #if defined(_WIN32) && defined(_LIBUNWIND_TARGET_X86_64) && \ + !defined(__MINGW32__) && VER_PRODUCTBUILD < 8000 +struct _DISPATCHER_CONTEXT { + ULONG64 ControlPc; + ULONG64 ImageBase; + PRUNTIME_FUNCTION FunctionEntry; + ULONG64 EstablisherFrame; + ULONG64 TargetIp; + PCONTEXT ContextRecord; + PEXCEPTION_ROUTINE LanguageHandler; + PVOID HandlerData; + PUNWIND_HISTORY_TABLE HistoryTable; + ULONG ScopeIndex; + ULONG Fill0; +}; + #endif + +struct UNWIND_INFO { + uint8_t Version : 3; + uint8_t Flags : 5; + uint8_t SizeOfProlog; + uint8_t CountOfCodes; + uint8_t FrameRegister : 4; + uint8_t FrameOffset : 4; + uint16_t UnwindCodes[2]; +}; + +extern "C" _Unwind_Reason_Code __libunwind_seh_personality( + int, _Unwind_Action, uint64_t, _Unwind_Exception *, + struct _Unwind_Context *); + +#endif + +#include "config.h" + +#include "AddressSpace.hpp" +#include "CompactUnwinder.hpp" +#include "config.h" +#include "DwarfInstructions.hpp" +#include "EHHeaderParser.hpp" +#include "libunwind.h" +#include "Registers.hpp" +#include "RWMutex.hpp" +#include "Unwind-EHABI.h" + +namespace libunwind { + +#if defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) +/// Cache of recently found FDEs. +template +class _LIBUNWIND_HIDDEN DwarfFDECache { + typedef typename A::pint_t pint_t; +public: + static pint_t findFDE(pint_t mh, pint_t pc); + static void add(pint_t mh, pint_t ip_start, pint_t ip_end, pint_t fde); + static void removeAllIn(pint_t mh); + static void iterateCacheEntries(void (*func)(unw_word_t ip_start, + unw_word_t ip_end, + unw_word_t fde, unw_word_t mh)); + +private: + + struct entry { + pint_t mh; + pint_t ip_start; + pint_t ip_end; + pint_t fde; + }; + + // These fields are all static to avoid needing an initializer. + // There is only one instance of this class per process. + static RWMutex _lock; +#ifdef __APPLE__ + static void dyldUnloadHook(const struct mach_header *mh, intptr_t slide); + static bool _registeredForDyldUnloads; +#endif + static entry *_buffer; + static entry *_bufferUsed; + static entry *_bufferEnd; + static entry _initialBuffer[64]; +}; + +template +typename DwarfFDECache::entry * +DwarfFDECache::_buffer = _initialBuffer; + +template +typename DwarfFDECache::entry * +DwarfFDECache::_bufferUsed = _initialBuffer; + +template +typename DwarfFDECache::entry * +DwarfFDECache::_bufferEnd = &_initialBuffer[64]; + +template +typename DwarfFDECache::entry DwarfFDECache::_initialBuffer[64]; + +template +RWMutex DwarfFDECache::_lock; + +#ifdef __APPLE__ +template +bool DwarfFDECache::_registeredForDyldUnloads = false; +#endif + +template +typename A::pint_t DwarfFDECache::findFDE(pint_t mh, pint_t pc) { + pint_t result = 0; + _LIBUNWIND_LOG_IF_FALSE(_lock.lock_shared()); + for (entry *p = _buffer; p < _bufferUsed; ++p) { + if ((mh == p->mh) || (mh == 0)) { + if ((p->ip_start <= pc) && (pc < p->ip_end)) { + result = p->fde; + break; + } + } + } + _LIBUNWIND_LOG_IF_FALSE(_lock.unlock_shared()); + return result; +} + +template +void DwarfFDECache::add(pint_t mh, pint_t ip_start, pint_t ip_end, + pint_t fde) { +#if !defined(_LIBUNWIND_NO_HEAP) + _LIBUNWIND_LOG_IF_FALSE(_lock.lock()); + if (_bufferUsed >= _bufferEnd) { + size_t oldSize = (size_t)(_bufferEnd - _buffer); + size_t newSize = oldSize * 4; + // Can't use operator new (we are below it). + entry *newBuffer = (entry *)malloc(newSize * sizeof(entry)); + memcpy(newBuffer, _buffer, oldSize * sizeof(entry)); + if (_buffer != _initialBuffer) + free(_buffer); + _buffer = newBuffer; + _bufferUsed = &newBuffer[oldSize]; + _bufferEnd = &newBuffer[newSize]; + } + _bufferUsed->mh = mh; + _bufferUsed->ip_start = ip_start; + _bufferUsed->ip_end = ip_end; + _bufferUsed->fde = fde; + ++_bufferUsed; +#ifdef __APPLE__ + if (!_registeredForDyldUnloads) { + _dyld_register_func_for_remove_image(&dyldUnloadHook); + _registeredForDyldUnloads = true; + } +#endif + _LIBUNWIND_LOG_IF_FALSE(_lock.unlock()); +#endif +} + +template +void DwarfFDECache::removeAllIn(pint_t mh) { + _LIBUNWIND_LOG_IF_FALSE(_lock.lock()); + entry *d = _buffer; + for (const entry *s = _buffer; s < _bufferUsed; ++s) { + if (s->mh != mh) { + if (d != s) + *d = *s; + ++d; + } + } + _bufferUsed = d; + _LIBUNWIND_LOG_IF_FALSE(_lock.unlock()); +} + +#ifdef __APPLE__ +template +void DwarfFDECache::dyldUnloadHook(const struct mach_header *mh, intptr_t ) { + removeAllIn((pint_t) mh); +} +#endif + +template +void DwarfFDECache::iterateCacheEntries(void (*func)( + unw_word_t ip_start, unw_word_t ip_end, unw_word_t fde, unw_word_t mh)) { + _LIBUNWIND_LOG_IF_FALSE(_lock.lock()); + for (entry *p = _buffer; p < _bufferUsed; ++p) { + (*func)(p->ip_start, p->ip_end, p->fde, p->mh); + } + _LIBUNWIND_LOG_IF_FALSE(_lock.unlock()); +} +#endif // defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) + + +#define arrayoffsetof(type, index, field) ((size_t)(&((type *)0)[index].field)) + +#if defined(_LIBUNWIND_SUPPORT_COMPACT_UNWIND) +template class UnwindSectionHeader { +public: + UnwindSectionHeader(A &addressSpace, typename A::pint_t addr) + : _addressSpace(addressSpace), _addr(addr) {} + + uint32_t version() const { + return _addressSpace.get32(_addr + + offsetof(unwind_info_section_header, version)); + } + uint32_t commonEncodingsArraySectionOffset() const { + return _addressSpace.get32(_addr + + offsetof(unwind_info_section_header, + commonEncodingsArraySectionOffset)); + } + uint32_t commonEncodingsArrayCount() const { + return _addressSpace.get32(_addr + offsetof(unwind_info_section_header, + commonEncodingsArrayCount)); + } + uint32_t personalityArraySectionOffset() const { + return _addressSpace.get32(_addr + offsetof(unwind_info_section_header, + personalityArraySectionOffset)); + } + uint32_t personalityArrayCount() const { + return _addressSpace.get32( + _addr + offsetof(unwind_info_section_header, personalityArrayCount)); + } + uint32_t indexSectionOffset() const { + return _addressSpace.get32( + _addr + offsetof(unwind_info_section_header, indexSectionOffset)); + } + uint32_t indexCount() const { + return _addressSpace.get32( + _addr + offsetof(unwind_info_section_header, indexCount)); + } + +private: + A &_addressSpace; + typename A::pint_t _addr; +}; + +template class UnwindSectionIndexArray { +public: + UnwindSectionIndexArray(A &addressSpace, typename A::pint_t addr) + : _addressSpace(addressSpace), _addr(addr) {} + + uint32_t functionOffset(uint32_t index) const { + return _addressSpace.get32( + _addr + arrayoffsetof(unwind_info_section_header_index_entry, index, + functionOffset)); + } + uint32_t secondLevelPagesSectionOffset(uint32_t index) const { + return _addressSpace.get32( + _addr + arrayoffsetof(unwind_info_section_header_index_entry, index, + secondLevelPagesSectionOffset)); + } + uint32_t lsdaIndexArraySectionOffset(uint32_t index) const { + return _addressSpace.get32( + _addr + arrayoffsetof(unwind_info_section_header_index_entry, index, + lsdaIndexArraySectionOffset)); + } + +private: + A &_addressSpace; + typename A::pint_t _addr; +}; + +template class UnwindSectionRegularPageHeader { +public: + UnwindSectionRegularPageHeader(A &addressSpace, typename A::pint_t addr) + : _addressSpace(addressSpace), _addr(addr) {} + + uint32_t kind() const { + return _addressSpace.get32( + _addr + offsetof(unwind_info_regular_second_level_page_header, kind)); + } + uint16_t entryPageOffset() const { + return _addressSpace.get16( + _addr + offsetof(unwind_info_regular_second_level_page_header, + entryPageOffset)); + } + uint16_t entryCount() const { + return _addressSpace.get16( + _addr + + offsetof(unwind_info_regular_second_level_page_header, entryCount)); + } + +private: + A &_addressSpace; + typename A::pint_t _addr; +}; + +template class UnwindSectionRegularArray { +public: + UnwindSectionRegularArray(A &addressSpace, typename A::pint_t addr) + : _addressSpace(addressSpace), _addr(addr) {} + + uint32_t functionOffset(uint32_t index) const { + return _addressSpace.get32( + _addr + arrayoffsetof(unwind_info_regular_second_level_entry, index, + functionOffset)); + } + uint32_t encoding(uint32_t index) const { + return _addressSpace.get32( + _addr + + arrayoffsetof(unwind_info_regular_second_level_entry, index, encoding)); + } + +private: + A &_addressSpace; + typename A::pint_t _addr; +}; + +template class UnwindSectionCompressedPageHeader { +public: + UnwindSectionCompressedPageHeader(A &addressSpace, typename A::pint_t addr) + : _addressSpace(addressSpace), _addr(addr) {} + + uint32_t kind() const { + return _addressSpace.get32( + _addr + + offsetof(unwind_info_compressed_second_level_page_header, kind)); + } + uint16_t entryPageOffset() const { + return _addressSpace.get16( + _addr + offsetof(unwind_info_compressed_second_level_page_header, + entryPageOffset)); + } + uint16_t entryCount() const { + return _addressSpace.get16( + _addr + + offsetof(unwind_info_compressed_second_level_page_header, entryCount)); + } + uint16_t encodingsPageOffset() const { + return _addressSpace.get16( + _addr + offsetof(unwind_info_compressed_second_level_page_header, + encodingsPageOffset)); + } + uint16_t encodingsCount() const { + return _addressSpace.get16( + _addr + offsetof(unwind_info_compressed_second_level_page_header, + encodingsCount)); + } + +private: + A &_addressSpace; + typename A::pint_t _addr; +}; + +template class UnwindSectionCompressedArray { +public: + UnwindSectionCompressedArray(A &addressSpace, typename A::pint_t addr) + : _addressSpace(addressSpace), _addr(addr) {} + + uint32_t functionOffset(uint32_t index) const { + return UNWIND_INFO_COMPRESSED_ENTRY_FUNC_OFFSET( + _addressSpace.get32(_addr + index * sizeof(uint32_t))); + } + uint16_t encodingIndex(uint32_t index) const { + return UNWIND_INFO_COMPRESSED_ENTRY_ENCODING_INDEX( + _addressSpace.get32(_addr + index * sizeof(uint32_t))); + } + +private: + A &_addressSpace; + typename A::pint_t _addr; +}; + +template class UnwindSectionLsdaArray { +public: + UnwindSectionLsdaArray(A &addressSpace, typename A::pint_t addr) + : _addressSpace(addressSpace), _addr(addr) {} + + uint32_t functionOffset(uint32_t index) const { + return _addressSpace.get32( + _addr + arrayoffsetof(unwind_info_section_header_lsda_index_entry, + index, functionOffset)); + } + uint32_t lsdaOffset(uint32_t index) const { + return _addressSpace.get32( + _addr + arrayoffsetof(unwind_info_section_header_lsda_index_entry, + index, lsdaOffset)); + } + +private: + A &_addressSpace; + typename A::pint_t _addr; +}; +#endif // defined(_LIBUNWIND_SUPPORT_COMPACT_UNWIND) + +class _LIBUNWIND_HIDDEN AbstractUnwindCursor { +public: + // NOTE: provide a class specific placement deallocation function (S5.3.4 p20) + // This avoids an unnecessary dependency to libc++abi. + void operator delete(void *, size_t) {} + + virtual ~AbstractUnwindCursor() {} + virtual bool validReg(int) { _LIBUNWIND_ABORT("validReg not implemented"); } + virtual unw_word_t getReg(int) { _LIBUNWIND_ABORT("getReg not implemented"); } + virtual void setReg(int, unw_word_t, unw_word_t) { + _LIBUNWIND_ABORT("setReg not implemented"); + } + virtual unw_word_t getRegLocation(int) { + _LIBUNWIND_ABORT("getRegLocation not implemented"); + } + virtual bool validFloatReg(int) { + _LIBUNWIND_ABORT("validFloatReg not implemented"); + } + virtual unw_fpreg_t getFloatReg(int) { + _LIBUNWIND_ABORT("getFloatReg not implemented"); + } + virtual void setFloatReg(int, unw_fpreg_t) { + _LIBUNWIND_ABORT("setFloatReg not implemented"); + } + virtual int step() { _LIBUNWIND_ABORT("step not implemented"); } + virtual void getInfo(unw_proc_info_t *) { + _LIBUNWIND_ABORT("getInfo not implemented"); + } + virtual void jumpto() { _LIBUNWIND_ABORT("jumpto not implemented"); } + virtual bool isSignalFrame() { + _LIBUNWIND_ABORT("isSignalFrame not implemented"); + } + virtual bool getFunctionName(char *, size_t, unw_word_t *) { + _LIBUNWIND_ABORT("getFunctionName not implemented"); + } + virtual void setInfoBasedOnIPRegister(bool = false) { + _LIBUNWIND_ABORT("setInfoBasedOnIPRegister not implemented"); + } + virtual const char *getRegisterName(int) { + _LIBUNWIND_ABORT("getRegisterName not implemented"); + } +#ifdef __arm__ + virtual void saveVFPAsX() { _LIBUNWIND_ABORT("saveVFPAsX not implemented"); } +#endif +}; + +#if defined(_LIBUNWIND_SUPPORT_SEH_UNWIND) && defined(_WIN32) + +/// \c UnwindCursor contains all state (including all register values) during +/// an unwind. This is normally stack-allocated inside a unw_cursor_t. +template +class UnwindCursor : public AbstractUnwindCursor { + typedef typename A::pint_t pint_t; +public: + UnwindCursor(unw_context_t *context, A &as); + UnwindCursor(CONTEXT *context, A &as); + UnwindCursor(A &as, void *threadArg); + virtual ~UnwindCursor() {} + virtual bool validReg(int); + virtual unw_word_t getReg(int); + virtual void setReg(int, unw_word_t); + virtual bool validFloatReg(int); + virtual unw_fpreg_t getFloatReg(int); + virtual void setFloatReg(int, unw_fpreg_t); + virtual int step(); + virtual void getInfo(unw_proc_info_t *); + virtual void jumpto(); + virtual bool isSignalFrame(); + virtual bool getFunctionName(char *buf, size_t len, unw_word_t *off); + virtual void setInfoBasedOnIPRegister(bool isReturnAddress = false); + virtual const char *getRegisterName(int num); +#ifdef __arm__ + virtual void saveVFPAsX(); +#endif + + DISPATCHER_CONTEXT *getDispatcherContext() { return &_dispContext; } + void setDispatcherContext(DISPATCHER_CONTEXT *disp) { _dispContext = *disp; } + + // libunwind does not and should not depend on C++ library which means that we + // need our own defition of inline placement new. + static void *operator new(size_t, UnwindCursor *p) { return p; } + +private: + + pint_t getLastPC() const { return _dispContext.ControlPc; } + void setLastPC(pint_t pc) { _dispContext.ControlPc = pc; } + RUNTIME_FUNCTION *lookUpSEHUnwindInfo(pint_t pc, pint_t *base) { + _dispContext.FunctionEntry = RtlLookupFunctionEntry(pc, + &_dispContext.ImageBase, + _dispContext.HistoryTable); + *base = _dispContext.ImageBase; + return _dispContext.FunctionEntry; + } + bool getInfoFromSEH(pint_t pc); + int stepWithSEHData() { + _dispContext.LanguageHandler = RtlVirtualUnwind(UNW_FLAG_UHANDLER, + _dispContext.ImageBase, + _dispContext.ControlPc, + _dispContext.FunctionEntry, + _dispContext.ContextRecord, + &_dispContext.HandlerData, + &_dispContext.EstablisherFrame, + NULL); + // Update some fields of the unwind info now, since we have them. + _info.lsda = reinterpret_cast(_dispContext.HandlerData); + if (_dispContext.LanguageHandler) { + _info.handler = reinterpret_cast(__libunwind_seh_personality); + } else + _info.handler = 0; + return UNW_STEP_SUCCESS; + } + + A &_addressSpace; + unw_proc_info_t _info; + DISPATCHER_CONTEXT _dispContext; + CONTEXT _msContext; + UNWIND_HISTORY_TABLE _histTable; + bool _unwindInfoMissing; +}; + + +template +UnwindCursor::UnwindCursor(unw_context_t *context, A &as) + : _addressSpace(as), _unwindInfoMissing(false) { + static_assert((check_fit, unw_cursor_t>::does_fit), + "UnwindCursor<> does not fit in unw_cursor_t"); + memset(&_info, 0, sizeof(_info)); + memset(&_histTable, 0, sizeof(_histTable)); + _dispContext.ContextRecord = &_msContext; + _dispContext.HistoryTable = &_histTable; + // Initialize MS context from ours. + R r(context); + _msContext.ContextFlags = CONTEXT_CONTROL|CONTEXT_INTEGER|CONTEXT_FLOATING_POINT; +#if defined(_LIBUNWIND_TARGET_X86_64) + _msContext.Rax = r.getRegister(UNW_X86_64_RAX); + _msContext.Rcx = r.getRegister(UNW_X86_64_RCX); + _msContext.Rdx = r.getRegister(UNW_X86_64_RDX); + _msContext.Rbx = r.getRegister(UNW_X86_64_RBX); + _msContext.Rsp = r.getRegister(UNW_X86_64_RSP); + _msContext.Rbp = r.getRegister(UNW_X86_64_RBP); + _msContext.Rsi = r.getRegister(UNW_X86_64_RSI); + _msContext.Rdi = r.getRegister(UNW_X86_64_RDI); + _msContext.R8 = r.getRegister(UNW_X86_64_R8); + _msContext.R9 = r.getRegister(UNW_X86_64_R9); + _msContext.R10 = r.getRegister(UNW_X86_64_R10); + _msContext.R11 = r.getRegister(UNW_X86_64_R11); + _msContext.R12 = r.getRegister(UNW_X86_64_R12); + _msContext.R13 = r.getRegister(UNW_X86_64_R13); + _msContext.R14 = r.getRegister(UNW_X86_64_R14); + _msContext.R15 = r.getRegister(UNW_X86_64_R15); + _msContext.Rip = r.getRegister(UNW_REG_IP); + union { + v128 v; + M128A m; + } t; + t.v = r.getVectorRegister(UNW_X86_64_XMM0); + _msContext.Xmm0 = t.m; + t.v = r.getVectorRegister(UNW_X86_64_XMM1); + _msContext.Xmm1 = t.m; + t.v = r.getVectorRegister(UNW_X86_64_XMM2); + _msContext.Xmm2 = t.m; + t.v = r.getVectorRegister(UNW_X86_64_XMM3); + _msContext.Xmm3 = t.m; + t.v = r.getVectorRegister(UNW_X86_64_XMM4); + _msContext.Xmm4 = t.m; + t.v = r.getVectorRegister(UNW_X86_64_XMM5); + _msContext.Xmm5 = t.m; + t.v = r.getVectorRegister(UNW_X86_64_XMM6); + _msContext.Xmm6 = t.m; + t.v = r.getVectorRegister(UNW_X86_64_XMM7); + _msContext.Xmm7 = t.m; + t.v = r.getVectorRegister(UNW_X86_64_XMM8); + _msContext.Xmm8 = t.m; + t.v = r.getVectorRegister(UNW_X86_64_XMM9); + _msContext.Xmm9 = t.m; + t.v = r.getVectorRegister(UNW_X86_64_XMM10); + _msContext.Xmm10 = t.m; + t.v = r.getVectorRegister(UNW_X86_64_XMM11); + _msContext.Xmm11 = t.m; + t.v = r.getVectorRegister(UNW_X86_64_XMM12); + _msContext.Xmm12 = t.m; + t.v = r.getVectorRegister(UNW_X86_64_XMM13); + _msContext.Xmm13 = t.m; + t.v = r.getVectorRegister(UNW_X86_64_XMM14); + _msContext.Xmm14 = t.m; + t.v = r.getVectorRegister(UNW_X86_64_XMM15); + _msContext.Xmm15 = t.m; +#elif defined(_LIBUNWIND_TARGET_ARM) + _msContext.R0 = r.getRegister(UNW_ARM_R0); + _msContext.R1 = r.getRegister(UNW_ARM_R1); + _msContext.R2 = r.getRegister(UNW_ARM_R2); + _msContext.R3 = r.getRegister(UNW_ARM_R3); + _msContext.R4 = r.getRegister(UNW_ARM_R4); + _msContext.R5 = r.getRegister(UNW_ARM_R5); + _msContext.R6 = r.getRegister(UNW_ARM_R6); + _msContext.R7 = r.getRegister(UNW_ARM_R7); + _msContext.R8 = r.getRegister(UNW_ARM_R8); + _msContext.R9 = r.getRegister(UNW_ARM_R9); + _msContext.R10 = r.getRegister(UNW_ARM_R10); + _msContext.R11 = r.getRegister(UNW_ARM_R11); + _msContext.R12 = r.getRegister(UNW_ARM_R12); + _msContext.Sp = r.getRegister(UNW_ARM_SP); + _msContext.Lr = r.getRegister(UNW_ARM_LR); + _msContext.Pc = r.getRegister(UNW_ARM_IP); + for (int i = UNW_ARM_D0; i <= UNW_ARM_D31; ++i) { + union { + uint64_t w; + double d; + } d; + d.d = r.getFloatRegister(i); + _msContext.D[i - UNW_ARM_D0] = d.w; + } +#elif defined(_LIBUNWIND_TARGET_AARCH64) + for (int i = UNW_ARM64_X0; i <= UNW_ARM64_X30; ++i) + _msContext.X[i - UNW_ARM64_X0] = r.getRegister(i); + _msContext.Sp = r.getRegister(UNW_REG_SP); + _msContext.Pc = r.getRegister(UNW_REG_IP); + for (int i = UNW_ARM64_D0; i <= UNW_ARM64_D31; ++i) + _msContext.V[i - UNW_ARM64_D0].D[0] = r.getFloatRegister(i); +#endif +} + +template +UnwindCursor::UnwindCursor(CONTEXT *context, A &as) + : _addressSpace(as), _unwindInfoMissing(false) { + static_assert((check_fit, unw_cursor_t>::does_fit), + "UnwindCursor<> does not fit in unw_cursor_t"); + memset(&_info, 0, sizeof(_info)); + memset(&_histTable, 0, sizeof(_histTable)); + _dispContext.ContextRecord = &_msContext; + _dispContext.HistoryTable = &_histTable; + _msContext = *context; +} + + +template +bool UnwindCursor::validReg(int regNum) { + if (regNum == UNW_REG_IP || regNum == UNW_REG_SP) return true; +#if defined(_LIBUNWIND_TARGET_X86_64) + if (regNum >= UNW_X86_64_RAX && regNum <= UNW_X86_64_R15) return true; +#elif defined(_LIBUNWIND_TARGET_ARM) + if (regNum >= UNW_ARM_R0 && regNum <= UNW_ARM_R15) return true; +#elif defined(_LIBUNWIND_TARGET_AARCH64) + if (regNum >= UNW_ARM64_X0 && regNum <= UNW_ARM64_X30) return true; +#endif + return false; +} + +template +unw_word_t UnwindCursor::getReg(int regNum) { + switch (regNum) { +#if defined(_LIBUNWIND_TARGET_X86_64) + case UNW_REG_IP: return _msContext.Rip; + case UNW_X86_64_RAX: return _msContext.Rax; + case UNW_X86_64_RDX: return _msContext.Rdx; + case UNW_X86_64_RCX: return _msContext.Rcx; + case UNW_X86_64_RBX: return _msContext.Rbx; + case UNW_REG_SP: + case UNW_X86_64_RSP: return _msContext.Rsp; + case UNW_X86_64_RBP: return _msContext.Rbp; + case UNW_X86_64_RSI: return _msContext.Rsi; + case UNW_X86_64_RDI: return _msContext.Rdi; + case UNW_X86_64_R8: return _msContext.R8; + case UNW_X86_64_R9: return _msContext.R9; + case UNW_X86_64_R10: return _msContext.R10; + case UNW_X86_64_R11: return _msContext.R11; + case UNW_X86_64_R12: return _msContext.R12; + case UNW_X86_64_R13: return _msContext.R13; + case UNW_X86_64_R14: return _msContext.R14; + case UNW_X86_64_R15: return _msContext.R15; +#elif defined(_LIBUNWIND_TARGET_ARM) + case UNW_ARM_R0: return _msContext.R0; + case UNW_ARM_R1: return _msContext.R1; + case UNW_ARM_R2: return _msContext.R2; + case UNW_ARM_R3: return _msContext.R3; + case UNW_ARM_R4: return _msContext.R4; + case UNW_ARM_R5: return _msContext.R5; + case UNW_ARM_R6: return _msContext.R6; + case UNW_ARM_R7: return _msContext.R7; + case UNW_ARM_R8: return _msContext.R8; + case UNW_ARM_R9: return _msContext.R9; + case UNW_ARM_R10: return _msContext.R10; + case UNW_ARM_R11: return _msContext.R11; + case UNW_ARM_R12: return _msContext.R12; + case UNW_REG_SP: + case UNW_ARM_SP: return _msContext.Sp; + case UNW_ARM_LR: return _msContext.Lr; + case UNW_REG_IP: + case UNW_ARM_IP: return _msContext.Pc; +#elif defined(_LIBUNWIND_TARGET_AARCH64) + case UNW_REG_SP: return _msContext.Sp; + case UNW_REG_IP: return _msContext.Pc; + default: return _msContext.X[regNum - UNW_ARM64_X0]; +#endif + } + _LIBUNWIND_ABORT("unsupported register"); +} + +template +void UnwindCursor::setReg(int regNum, unw_word_t value) { + switch (regNum) { +#if defined(_LIBUNWIND_TARGET_X86_64) + case UNW_REG_IP: _msContext.Rip = value; break; + case UNW_X86_64_RAX: _msContext.Rax = value; break; + case UNW_X86_64_RDX: _msContext.Rdx = value; break; + case UNW_X86_64_RCX: _msContext.Rcx = value; break; + case UNW_X86_64_RBX: _msContext.Rbx = value; break; + case UNW_REG_SP: + case UNW_X86_64_RSP: _msContext.Rsp = value; break; + case UNW_X86_64_RBP: _msContext.Rbp = value; break; + case UNW_X86_64_RSI: _msContext.Rsi = value; break; + case UNW_X86_64_RDI: _msContext.Rdi = value; break; + case UNW_X86_64_R8: _msContext.R8 = value; break; + case UNW_X86_64_R9: _msContext.R9 = value; break; + case UNW_X86_64_R10: _msContext.R10 = value; break; + case UNW_X86_64_R11: _msContext.R11 = value; break; + case UNW_X86_64_R12: _msContext.R12 = value; break; + case UNW_X86_64_R13: _msContext.R13 = value; break; + case UNW_X86_64_R14: _msContext.R14 = value; break; + case UNW_X86_64_R15: _msContext.R15 = value; break; +#elif defined(_LIBUNWIND_TARGET_ARM) + case UNW_ARM_R0: _msContext.R0 = value; break; + case UNW_ARM_R1: _msContext.R1 = value; break; + case UNW_ARM_R2: _msContext.R2 = value; break; + case UNW_ARM_R3: _msContext.R3 = value; break; + case UNW_ARM_R4: _msContext.R4 = value; break; + case UNW_ARM_R5: _msContext.R5 = value; break; + case UNW_ARM_R6: _msContext.R6 = value; break; + case UNW_ARM_R7: _msContext.R7 = value; break; + case UNW_ARM_R8: _msContext.R8 = value; break; + case UNW_ARM_R9: _msContext.R9 = value; break; + case UNW_ARM_R10: _msContext.R10 = value; break; + case UNW_ARM_R11: _msContext.R11 = value; break; + case UNW_ARM_R12: _msContext.R12 = value; break; + case UNW_REG_SP: + case UNW_ARM_SP: _msContext.Sp = value; break; + case UNW_ARM_LR: _msContext.Lr = value; break; + case UNW_REG_IP: + case UNW_ARM_IP: _msContext.Pc = value; break; +#elif defined(_LIBUNWIND_TARGET_AARCH64) + case UNW_REG_SP: _msContext.Sp = value; break; + case UNW_REG_IP: _msContext.Pc = value; break; + case UNW_ARM64_X0: + case UNW_ARM64_X1: + case UNW_ARM64_X2: + case UNW_ARM64_X3: + case UNW_ARM64_X4: + case UNW_ARM64_X5: + case UNW_ARM64_X6: + case UNW_ARM64_X7: + case UNW_ARM64_X8: + case UNW_ARM64_X9: + case UNW_ARM64_X10: + case UNW_ARM64_X11: + case UNW_ARM64_X12: + case UNW_ARM64_X13: + case UNW_ARM64_X14: + case UNW_ARM64_X15: + case UNW_ARM64_X16: + case UNW_ARM64_X17: + case UNW_ARM64_X18: + case UNW_ARM64_X19: + case UNW_ARM64_X20: + case UNW_ARM64_X21: + case UNW_ARM64_X22: + case UNW_ARM64_X23: + case UNW_ARM64_X24: + case UNW_ARM64_X25: + case UNW_ARM64_X26: + case UNW_ARM64_X27: + case UNW_ARM64_X28: + case UNW_ARM64_FP: + case UNW_ARM64_LR: _msContext.X[regNum - UNW_ARM64_X0] = value; break; +#endif + default: + _LIBUNWIND_ABORT("unsupported register"); + } +} + +template +bool UnwindCursor::validFloatReg(int regNum) { +#if defined(_LIBUNWIND_TARGET_ARM) + if (regNum >= UNW_ARM_S0 && regNum <= UNW_ARM_S31) return true; + if (regNum >= UNW_ARM_D0 && regNum <= UNW_ARM_D31) return true; +#elif defined(_LIBUNWIND_TARGET_AARCH64) + if (regNum >= UNW_ARM64_D0 && regNum <= UNW_ARM64_D31) return true; +#else + (void)regNum; +#endif + return false; +} + +template +unw_fpreg_t UnwindCursor::getFloatReg(int regNum) { +#if defined(_LIBUNWIND_TARGET_ARM) + if (regNum >= UNW_ARM_S0 && regNum <= UNW_ARM_S31) { + union { + uint32_t w; + float f; + } d; + d.w = _msContext.S[regNum - UNW_ARM_S0]; + return d.f; + } + if (regNum >= UNW_ARM_D0 && regNum <= UNW_ARM_D31) { + union { + uint64_t w; + double d; + } d; + d.w = _msContext.D[regNum - UNW_ARM_D0]; + return d.d; + } + _LIBUNWIND_ABORT("unsupported float register"); +#elif defined(_LIBUNWIND_TARGET_AARCH64) + return _msContext.V[regNum - UNW_ARM64_D0].D[0]; +#else + (void)regNum; + _LIBUNWIND_ABORT("float registers unimplemented"); +#endif +} + +template +void UnwindCursor::setFloatReg(int regNum, unw_fpreg_t value) { +#if defined(_LIBUNWIND_TARGET_ARM) + if (regNum >= UNW_ARM_S0 && regNum <= UNW_ARM_S31) { + union { + uint32_t w; + float f; + } d; + d.f = value; + _msContext.S[regNum - UNW_ARM_S0] = d.w; + } + if (regNum >= UNW_ARM_D0 && regNum <= UNW_ARM_D31) { + union { + uint64_t w; + double d; + } d; + d.d = value; + _msContext.D[regNum - UNW_ARM_D0] = d.w; + } + _LIBUNWIND_ABORT("unsupported float register"); +#elif defined(_LIBUNWIND_TARGET_AARCH64) + _msContext.V[regNum - UNW_ARM64_D0].D[0] = value; +#else + (void)regNum; + (void)value; + _LIBUNWIND_ABORT("float registers unimplemented"); +#endif +} + +template void UnwindCursor::jumpto() { + RtlRestoreContext(&_msContext, nullptr); +} + +#ifdef __arm__ +template void UnwindCursor::saveVFPAsX() {} +#endif + +template +const char *UnwindCursor::getRegisterName(int regNum) { + return R::getRegisterName(regNum); +} + +template bool UnwindCursor::isSignalFrame() { + return false; +} + +#else // !defined(_LIBUNWIND_SUPPORT_SEH_UNWIND) || !defined(_WIN32) + +/// UnwindCursor contains all state (including all register values) during +/// an unwind. This is normally stack allocated inside a unw_cursor_t. +template +class UnwindCursor : public AbstractUnwindCursor{ + typedef typename A::pint_t pint_t; +public: + UnwindCursor(A &as); + UnwindCursor(unw_context_t *context, A &as); + UnwindCursor(A &as, void *threadArg); + virtual ~UnwindCursor() {} + virtual bool validReg(int); + virtual unw_word_t getReg(int); + virtual void setReg(int, unw_word_t, unw_word_t); + virtual unw_word_t getRegLocation(int); + virtual bool validFloatReg(int); + virtual unw_fpreg_t getFloatReg(int); + virtual void setFloatReg(int, unw_fpreg_t); + virtual int step(); + virtual void getInfo(unw_proc_info_t *); + virtual void jumpto(); + virtual bool isSignalFrame(); + virtual bool getFunctionName(char *buf, size_t len, unw_word_t *off); + virtual void setInfoBasedOnIPRegister(bool isReturnAddress = false); + virtual const char *getRegisterName(int num); +#ifdef __arm__ + virtual void saveVFPAsX(); +#endif + + // libunwind does not and should not depend on C++ library which means that we + // need our own defition of inline placement new. + static void *operator new(size_t, UnwindCursor *p) { return p; } + +private: + +#if defined(_LIBUNWIND_ARM_EHABI) + bool getInfoFromEHABISection(pint_t pc, const UnwindInfoSections §s); + + int stepWithEHABI() { + size_t len = 0; + size_t off = 0; + // FIXME: Calling decode_eht_entry() here is violating the libunwind + // abstraction layer. + const uint32_t *ehtp = + decode_eht_entry(reinterpret_cast(_info.unwind_info), + &off, &len); + if (_Unwind_VRS_Interpret((_Unwind_Context *)this, ehtp, off, len) != + _URC_CONTINUE_UNWIND) + return UNW_STEP_END; + return UNW_STEP_SUCCESS; + } +#endif + +#if defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) +public: + bool getInfoFromDwarfSection(pint_t pc, const UnwindInfoSections §s, + uint32_t fdeSectionOffsetHint=0); + int stepWithDwarfFDE() { + return DwarfInstructions::stepWithDwarf(_addressSpace, + (pint_t)this->getReg(UNW_REG_IP), + (pint_t)_info.unwind_info, + _registers); + } +#endif + +#if defined(_LIBUNWIND_SUPPORT_COMPACT_UNWIND) + bool getInfoFromCompactEncodingSection(pint_t pc, + const UnwindInfoSections §s); + int stepWithCompactEncoding() { + #if defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) + if ( compactSaysUseDwarf() ) + return stepWithDwarfFDE(); + #endif + R dummy; + return stepWithCompactEncoding(dummy); + } + +#if defined(_LIBUNWIND_TARGET_X86_64) + int stepWithCompactEncoding(Registers_x86_64 &) { + return CompactUnwinder_x86_64::stepWithCompactEncoding( + _info.format, _info.start_ip, _addressSpace, _registers); + } +#endif + +#if defined(_LIBUNWIND_TARGET_I386) + int stepWithCompactEncoding(Registers_x86 &) { + return CompactUnwinder_x86::stepWithCompactEncoding( + _info.format, (uint32_t)_info.start_ip, _addressSpace, _registers); + } +#endif + +#if defined(_LIBUNWIND_TARGET_PPC) + int stepWithCompactEncoding(Registers_ppc &) { + return UNW_EINVAL; + } +#endif + +#if defined(_LIBUNWIND_TARGET_PPC64) + int stepWithCompactEncoding(Registers_ppc64 &) { + return UNW_EINVAL; + } +#endif + + +#if defined(_LIBUNWIND_TARGET_AARCH64) + int stepWithCompactEncoding(Registers_arm64 &) { + return CompactUnwinder_arm64::stepWithCompactEncoding( + _info.format, _info.start_ip, _addressSpace, _registers); + } +#endif + +#if defined(_LIBUNWIND_TARGET_MIPS_O32) + int stepWithCompactEncoding(Registers_mips_o32 &) { + return UNW_EINVAL; + } +#endif + +#if defined(_LIBUNWIND_TARGET_MIPS_NEWABI) + int stepWithCompactEncoding(Registers_mips_newabi &) { + return UNW_EINVAL; + } +#endif + +#if defined(_LIBUNWIND_TARGET_SPARC) + int stepWithCompactEncoding(Registers_sparc &) { return UNW_EINVAL; } +#endif + + bool compactSaysUseDwarf(uint32_t *offset=NULL) const { + R dummy; + return compactSaysUseDwarf(dummy, offset); + } + +#if defined(_LIBUNWIND_TARGET_X86_64) + bool compactSaysUseDwarf(Registers_x86_64 &, uint32_t *offset) const { + if ((_info.format & UNWIND_X86_64_MODE_MASK) == UNWIND_X86_64_MODE_DWARF) { + if (offset) + *offset = (_info.format & UNWIND_X86_64_DWARF_SECTION_OFFSET); + return true; + } + return false; + } +#endif + +#if defined(_LIBUNWIND_TARGET_I386) + bool compactSaysUseDwarf(Registers_x86 &, uint32_t *offset) const { + if ((_info.format & UNWIND_X86_MODE_MASK) == UNWIND_X86_MODE_DWARF) { + if (offset) + *offset = (_info.format & UNWIND_X86_DWARF_SECTION_OFFSET); + return true; + } + return false; + } +#endif + +#if defined(_LIBUNWIND_TARGET_PPC) + bool compactSaysUseDwarf(Registers_ppc &, uint32_t *) const { + return true; + } +#endif + +#if defined(_LIBUNWIND_TARGET_PPC64) + bool compactSaysUseDwarf(Registers_ppc64 &, uint32_t *) const { + return true; + } +#endif + +#if defined(_LIBUNWIND_TARGET_AARCH64) + bool compactSaysUseDwarf(Registers_arm64 &, uint32_t *offset) const { + if ((_info.format & UNWIND_ARM64_MODE_MASK) == UNWIND_ARM64_MODE_DWARF) { + if (offset) + *offset = (_info.format & UNWIND_ARM64_DWARF_SECTION_OFFSET); + return true; + } + return false; + } +#endif + +#if defined(_LIBUNWIND_TARGET_MIPS_O32) + bool compactSaysUseDwarf(Registers_mips_o32 &, uint32_t *) const { + return true; + } +#endif + +#if defined(_LIBUNWIND_TARGET_MIPS_NEWABI) + bool compactSaysUseDwarf(Registers_mips_newabi &, uint32_t *) const { + return true; + } +#endif + +#if defined(_LIBUNWIND_TARGET_SPARC) + bool compactSaysUseDwarf(Registers_sparc &, uint32_t *) const { return true; } +#endif + +#endif // defined(_LIBUNWIND_SUPPORT_COMPACT_UNWIND) + +#if defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) + compact_unwind_encoding_t dwarfEncoding() const { + R dummy; + return dwarfEncoding(dummy); + } + +#if defined(_LIBUNWIND_TARGET_X86_64) + compact_unwind_encoding_t dwarfEncoding(Registers_x86_64 &) const { + return UNWIND_X86_64_MODE_DWARF; + } +#endif + +#if defined(_LIBUNWIND_TARGET_I386) + compact_unwind_encoding_t dwarfEncoding(Registers_x86 &) const { + return UNWIND_X86_MODE_DWARF; + } +#endif + +#if defined(_LIBUNWIND_TARGET_PPC) + compact_unwind_encoding_t dwarfEncoding(Registers_ppc &) const { + return 0; + } +#endif + +#if defined(_LIBUNWIND_TARGET_PPC64) + compact_unwind_encoding_t dwarfEncoding(Registers_ppc64 &) const { + return 0; + } +#endif + +#if defined(_LIBUNWIND_TARGET_AARCH64) + compact_unwind_encoding_t dwarfEncoding(Registers_arm64 &) const { + return UNWIND_ARM64_MODE_DWARF; + } +#endif + +#if defined(_LIBUNWIND_TARGET_ARM) + compact_unwind_encoding_t dwarfEncoding(Registers_arm &) const { + return 0; + } +#endif + +#if defined (_LIBUNWIND_TARGET_OR1K) + compact_unwind_encoding_t dwarfEncoding(Registers_or1k &) const { + return 0; + } +#endif + +#if defined (_LIBUNWIND_TARGET_MIPS_O32) + compact_unwind_encoding_t dwarfEncoding(Registers_mips_o32 &) const { + return 0; + } +#endif + +#if defined (_LIBUNWIND_TARGET_MIPS_NEWABI) + compact_unwind_encoding_t dwarfEncoding(Registers_mips_newabi &) const { + return 0; + } +#endif + +#if defined(_LIBUNWIND_TARGET_SPARC) + compact_unwind_encoding_t dwarfEncoding(Registers_sparc &) const { return 0; } +#endif + +#endif // defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) + +#if defined(_LIBUNWIND_SUPPORT_SEH_UNWIND) + // For runtime environments using SEH unwind data without Windows runtime + // support. + pint_t getLastPC() const { /* FIXME: Implement */ return 0; } + void setLastPC(pint_t pc) { /* FIXME: Implement */ } + RUNTIME_FUNCTION *lookUpSEHUnwindInfo(pint_t pc, pint_t *base) { + /* FIXME: Implement */ + *base = 0; + return nullptr; + } + bool getInfoFromSEH(pint_t pc); + int stepWithSEHData() { /* FIXME: Implement */ return 0; } +#endif // defined(_LIBUNWIND_SUPPORT_SEH_UNWIND) + + + A &_addressSpace; + R _registers; + unw_proc_info_t _info; + bool _unwindInfoMissing; + bool _isSignalFrame; +}; + +template +UnwindCursor::UnwindCursor(A &as) + : _addressSpace(as) + , _unwindInfoMissing(false) + , _isSignalFrame(false) { + memset(&_info, 0, sizeof(_info)); +} + +template +UnwindCursor::UnwindCursor(unw_context_t *context, A &as) + : _addressSpace(as), _registers(context), _unwindInfoMissing(false), + _isSignalFrame(false) { + static_assert((check_fit, unw_cursor_t>::does_fit), + "UnwindCursor<> does not fit in unw_cursor_t"); + memset(&_info, 0, sizeof(_info)); +} + +template +UnwindCursor::UnwindCursor(A &as, void *arg) + : _addressSpace(as),_registers(arg), _unwindInfoMissing(false), + _isSignalFrame(false) { + memset(&_info, 0, sizeof(_info)); + + // FIXME + // fill in _registers from thread arg +} + + +template +bool UnwindCursor::validReg(int regNum) { + return _registers.validRegister(regNum); +} + +template +unw_word_t UnwindCursor::getReg(int regNum) { + return _registers.getRegister(regNum); +} + +template +void UnwindCursor::setReg(int regNum, unw_word_t value, unw_word_t location) { + _registers.setRegister(regNum, (typename A::pint_t)value, (typename A::pint_t)location); +} + +template +unw_word_t UnwindCursor::getRegLocation(int regNum) { + return _registers.getRegisterLocation(regNum); +} + +template +bool UnwindCursor::validFloatReg(int regNum) { + return _registers.validFloatRegister(regNum); +} + +template +unw_fpreg_t UnwindCursor::getFloatReg(int regNum) { + return _registers.getFloatRegister(regNum); +} + +template +void UnwindCursor::setFloatReg(int regNum, unw_fpreg_t value) { + _registers.setFloatRegister(regNum, value); +} + +template void UnwindCursor::jumpto() { + _registers.jumpto(); +} + +#ifdef __arm__ +template void UnwindCursor::saveVFPAsX() { + _registers.saveVFPAsX(); +} +#endif + +template +const char *UnwindCursor::getRegisterName(int regNum) { + return _registers.getRegisterName(regNum); +} + +template bool UnwindCursor::isSignalFrame() { + return _isSignalFrame; +} + +#endif // defined(_LIBUNWIND_SUPPORT_SEH_UNWIND) + +#if defined(_LIBUNWIND_ARM_EHABI) +struct EHABIIndexEntry { + uint32_t functionOffset; + uint32_t data; +}; + +template +struct EHABISectionIterator { + typedef EHABISectionIterator _Self; + + typedef typename A::pint_t value_type; + typedef typename A::pint_t* pointer; + typedef typename A::pint_t& reference; + typedef size_t size_type; + typedef size_t difference_type; + + static _Self begin(A& addressSpace, const UnwindInfoSections& sects) { + return _Self(addressSpace, sects, 0); + } + static _Self end(A& addressSpace, const UnwindInfoSections& sects) { + return _Self(addressSpace, sects, + sects.arm_section_length / sizeof(EHABIIndexEntry)); + } + + EHABISectionIterator(A& addressSpace, const UnwindInfoSections& sects, size_t i) + : _i(i), _addressSpace(&addressSpace), _sects(§s) {} + + _Self& operator++() { ++_i; return *this; } + _Self& operator+=(size_t a) { _i += a; return *this; } + _Self& operator--() { assert(_i > 0); --_i; return *this; } + _Self& operator-=(size_t a) { assert(_i >= a); _i -= a; return *this; } + + _Self operator+(size_t a) { _Self out = *this; out._i += a; return out; } + _Self operator-(size_t a) { assert(_i >= a); _Self out = *this; out._i -= a; return out; } + + size_t operator-(const _Self& other) { return _i - other._i; } + + bool operator==(const _Self& other) const { + assert(_addressSpace == other._addressSpace); + assert(_sects == other._sects); + return _i == other._i; + } + + typename A::pint_t operator*() const { return functionAddress(); } + + typename A::pint_t functionAddress() const { + typename A::pint_t indexAddr = _sects->arm_section + arrayoffsetof( + EHABIIndexEntry, _i, functionOffset); + return indexAddr + signExtendPrel31(_addressSpace->get32(indexAddr)); + } + + typename A::pint_t dataAddress() { + typename A::pint_t indexAddr = _sects->arm_section + arrayoffsetof( + EHABIIndexEntry, _i, data); + return indexAddr; + } + + private: + size_t _i; + A* _addressSpace; + const UnwindInfoSections* _sects; +}; + +namespace { + +template +EHABISectionIterator EHABISectionUpperBound( + EHABISectionIterator first, + EHABISectionIterator last, + typename A::pint_t value) { + size_t len = last - first; + while (len > 0) { + size_t l2 = len / 2; + EHABISectionIterator m = first + l2; + if (value < *m) { + len = l2; + } else { + first = ++m; + len -= l2 + 1; + } + } + return first; +} + +} + +template +bool UnwindCursor::getInfoFromEHABISection( + pint_t pc, + const UnwindInfoSections §s) { + EHABISectionIterator begin = + EHABISectionIterator::begin(_addressSpace, sects); + EHABISectionIterator end = + EHABISectionIterator::end(_addressSpace, sects); + if (begin == end) + return false; + + EHABISectionIterator itNextPC = EHABISectionUpperBound(begin, end, pc); + if (itNextPC == begin) + return false; + EHABISectionIterator itThisPC = itNextPC - 1; + + pint_t thisPC = itThisPC.functionAddress(); + // If an exception is thrown from a function, corresponding to the last entry + // in the table, we don't really know the function extent and have to choose a + // value for nextPC. Choosing max() will allow the range check during trace to + // succeed. + pint_t nextPC = (itNextPC == end) ? UINTPTR_MAX : itNextPC.functionAddress(); + pint_t indexDataAddr = itThisPC.dataAddress(); + + if (indexDataAddr == 0) + return false; + + uint32_t indexData = _addressSpace.get32(indexDataAddr); + if (indexData == UNW_EXIDX_CANTUNWIND) + return false; + + // If the high bit is set, the exception handling table entry is inline inside + // the index table entry on the second word (aka |indexDataAddr|). Otherwise, + // the table points at an offset in the exception handling table (section 5 EHABI). + pint_t exceptionTableAddr; + uint32_t exceptionTableData; + bool isSingleWordEHT; + if (indexData & 0x80000000) { + exceptionTableAddr = indexDataAddr; + // TODO(ajwong): Should this data be 0? + exceptionTableData = indexData; + isSingleWordEHT = true; + } else { + exceptionTableAddr = indexDataAddr + signExtendPrel31(indexData); + exceptionTableData = _addressSpace.get32(exceptionTableAddr); + isSingleWordEHT = false; + } + + // Now we know the 3 things: + // exceptionTableAddr -- exception handler table entry. + // exceptionTableData -- the data inside the first word of the eht entry. + // isSingleWordEHT -- whether the entry is in the index. + unw_word_t personalityRoutine = 0xbadf00d; + bool scope32 = false; + uintptr_t lsda; + + // If the high bit in the exception handling table entry is set, the entry is + // in compact form (section 6.3 EHABI). + if (exceptionTableData & 0x80000000) { + // Grab the index of the personality routine from the compact form. + uint32_t choice = (exceptionTableData & 0x0f000000) >> 24; + uint32_t extraWords = 0; + switch (choice) { + case 0: + personalityRoutine = (unw_word_t) &__aeabi_unwind_cpp_pr0; + extraWords = 0; + scope32 = false; + lsda = isSingleWordEHT ? 0 : (exceptionTableAddr + 4); + break; + case 1: + personalityRoutine = (unw_word_t) &__aeabi_unwind_cpp_pr1; + extraWords = (exceptionTableData & 0x00ff0000) >> 16; + scope32 = false; + lsda = exceptionTableAddr + (extraWords + 1) * 4; + break; + case 2: + personalityRoutine = (unw_word_t) &__aeabi_unwind_cpp_pr2; + extraWords = (exceptionTableData & 0x00ff0000) >> 16; + scope32 = true; + lsda = exceptionTableAddr + (extraWords + 1) * 4; + break; + default: + _LIBUNWIND_ABORT("unknown personality routine"); + return false; + } + + if (isSingleWordEHT) { + if (extraWords != 0) { + _LIBUNWIND_ABORT("index inlined table detected but pr function " + "requires extra words"); + return false; + } + } + } else { + pint_t personalityAddr = + exceptionTableAddr + signExtendPrel31(exceptionTableData); + personalityRoutine = personalityAddr; + + // ARM EHABI # 6.2, # 9.2 + // + // +---- ehtp + // v + // +--------------------------------------+ + // | +--------+--------+--------+-------+ | + // | |0| prel31 to personalityRoutine | | + // | +--------+--------+--------+-------+ | + // | | N | unwind opcodes | | <-- UnwindData + // | +--------+--------+--------+-------+ | + // | | Word 2 unwind opcodes | | + // | +--------+--------+--------+-------+ | + // | ... | + // | +--------+--------+--------+-------+ | + // | | Word N unwind opcodes | | + // | +--------+--------+--------+-------+ | + // | | LSDA | | <-- lsda + // | | ... | | + // | +--------+--------+--------+-------+ | + // +--------------------------------------+ + + uint32_t *UnwindData = reinterpret_cast(exceptionTableAddr) + 1; + uint32_t FirstDataWord = *UnwindData; + size_t N = ((FirstDataWord >> 24) & 0xff); + size_t NDataWords = N + 1; + lsda = reinterpret_cast(UnwindData + NDataWords); + } + + _info.start_ip = thisPC; + _info.end_ip = nextPC; + _info.handler = personalityRoutine; + _info.unwind_info = exceptionTableAddr; + _info.lsda = lsda; + // flags is pr_cache.additional. See EHABI #7.2 for definition of bit 0. + _info.flags = (isSingleWordEHT ? 1 : 0) | (scope32 ? 0x2 : 0); // Use enum? + + return true; +} +#endif + +#if defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) +template +bool UnwindCursor::getInfoFromDwarfSection(pint_t pc, + const UnwindInfoSections §s, + uint32_t fdeSectionOffsetHint) { + typename CFI_Parser::FDE_Info fdeInfo; + typename CFI_Parser::CIE_Info cieInfo; + bool foundFDE = false; + bool foundInCache = false; + // If compact encoding table gave offset into dwarf section, go directly there + if (fdeSectionOffsetHint != 0) { + foundFDE = CFI_Parser::findFDE(_addressSpace, pc, sects.dwarf_section, + (uint32_t)sects.dwarf_section_length, + sects.dwarf_section + fdeSectionOffsetHint, + &fdeInfo, &cieInfo); + } +#if defined(_LIBUNWIND_SUPPORT_DWARF_INDEX) + if (!foundFDE && (sects.dwarf_index_section != 0)) { + foundFDE = EHHeaderParser::findFDE( + _addressSpace, pc, sects.dwarf_index_section, + (uint32_t)sects.dwarf_index_section_length, &fdeInfo, &cieInfo); + } +#endif + if (!foundFDE) { + // otherwise, search cache of previously found FDEs. + pint_t cachedFDE = DwarfFDECache::findFDE(sects.dso_base, pc); + if (cachedFDE != 0) { + foundFDE = + CFI_Parser::findFDE(_addressSpace, pc, sects.dwarf_section, + (uint32_t)sects.dwarf_section_length, + cachedFDE, &fdeInfo, &cieInfo); + foundInCache = foundFDE; + } + } + if (!foundFDE) { + // Still not found, do full scan of __eh_frame section. + foundFDE = CFI_Parser::findFDE(_addressSpace, pc, sects.dwarf_section, + (uint32_t)sects.dwarf_section_length, 0, + &fdeInfo, &cieInfo); + } + if (foundFDE) { + typename CFI_Parser::PrologInfo prolog; + if (CFI_Parser::parseFDEInstructions(_addressSpace, fdeInfo, cieInfo, pc, + R::getArch(), &prolog)) { + // Save off parsed FDE info + _info.start_ip = fdeInfo.pcStart; + _info.end_ip = fdeInfo.pcEnd; + _info.lsda = fdeInfo.lsda; + _info.handler = cieInfo.personality; + _info.gp = prolog.spExtraArgSize; + _info.flags = 0; + _info.format = dwarfEncoding(); + _info.unwind_info = fdeInfo.fdeStart; + _info.unwind_info_size = (uint32_t)fdeInfo.fdeLength; + _info.extra = (unw_word_t) sects.dso_base; + + // Add to cache (to make next lookup faster) if we had no hint + // and there was no index. + if (!foundInCache && (fdeSectionOffsetHint == 0)) { + #if defined(_LIBUNWIND_SUPPORT_DWARF_INDEX) + if (sects.dwarf_index_section == 0) + #endif + DwarfFDECache::add(sects.dso_base, fdeInfo.pcStart, fdeInfo.pcEnd, + fdeInfo.fdeStart); + } + return true; + } + } + //_LIBUNWIND_DEBUG_LOG("can't find/use FDE for pc=0x%llX", (uint64_t)pc); + return false; +} +#endif // defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) + + +#if defined(_LIBUNWIND_SUPPORT_COMPACT_UNWIND) +template +bool UnwindCursor::getInfoFromCompactEncodingSection(pint_t pc, + const UnwindInfoSections §s) { + const bool log = false; + if (log) + fprintf(stderr, "getInfoFromCompactEncodingSection(pc=0x%llX, mh=0x%llX)\n", + (uint64_t)pc, (uint64_t)sects.dso_base); + + const UnwindSectionHeader sectionHeader(_addressSpace, + sects.compact_unwind_section); + if (sectionHeader.version() != UNWIND_SECTION_VERSION) + return false; + + // do a binary search of top level index to find page with unwind info + pint_t targetFunctionOffset = pc - sects.dso_base; + const UnwindSectionIndexArray topIndex(_addressSpace, + sects.compact_unwind_section + + sectionHeader.indexSectionOffset()); + uint32_t low = 0; + uint32_t high = sectionHeader.indexCount(); + uint32_t last = high - 1; + while (low < high) { + uint32_t mid = (low + high) / 2; + //if ( log ) fprintf(stderr, "\tmid=%d, low=%d, high=%d, *mid=0x%08X\n", + //mid, low, high, topIndex.functionOffset(mid)); + if (topIndex.functionOffset(mid) <= targetFunctionOffset) { + if ((mid == last) || + (topIndex.functionOffset(mid + 1) > targetFunctionOffset)) { + low = mid; + break; + } else { + low = mid + 1; + } + } else { + high = mid; + } + } + const uint32_t firstLevelFunctionOffset = topIndex.functionOffset(low); + const uint32_t firstLevelNextPageFunctionOffset = + topIndex.functionOffset(low + 1); + const pint_t secondLevelAddr = + sects.compact_unwind_section + topIndex.secondLevelPagesSectionOffset(low); + const pint_t lsdaArrayStartAddr = + sects.compact_unwind_section + topIndex.lsdaIndexArraySectionOffset(low); + const pint_t lsdaArrayEndAddr = + sects.compact_unwind_section + topIndex.lsdaIndexArraySectionOffset(low+1); + if (log) + fprintf(stderr, "\tfirst level search for result index=%d " + "to secondLevelAddr=0x%llX\n", + low, (uint64_t) secondLevelAddr); + // do a binary search of second level page index + uint32_t encoding = 0; + pint_t funcStart = 0; + pint_t funcEnd = 0; + pint_t lsda = 0; + pint_t personality = 0; + uint32_t pageKind = _addressSpace.get32(secondLevelAddr); + if (pageKind == UNWIND_SECOND_LEVEL_REGULAR) { + // regular page + UnwindSectionRegularPageHeader pageHeader(_addressSpace, + secondLevelAddr); + UnwindSectionRegularArray pageIndex( + _addressSpace, secondLevelAddr + pageHeader.entryPageOffset()); + // binary search looks for entry with e where index[e].offset <= pc < + // index[e+1].offset + if (log) + fprintf(stderr, "\tbinary search for targetFunctionOffset=0x%08llX in " + "regular page starting at secondLevelAddr=0x%llX\n", + (uint64_t) targetFunctionOffset, (uint64_t) secondLevelAddr); + low = 0; + high = pageHeader.entryCount(); + while (low < high) { + uint32_t mid = (low + high) / 2; + if (pageIndex.functionOffset(mid) <= targetFunctionOffset) { + if (mid == (uint32_t)(pageHeader.entryCount() - 1)) { + // at end of table + low = mid; + funcEnd = firstLevelNextPageFunctionOffset + sects.dso_base; + break; + } else if (pageIndex.functionOffset(mid + 1) > targetFunctionOffset) { + // next is too big, so we found it + low = mid; + funcEnd = pageIndex.functionOffset(low + 1) + sects.dso_base; + break; + } else { + low = mid + 1; + } + } else { + high = mid; + } + } + encoding = pageIndex.encoding(low); + funcStart = pageIndex.functionOffset(low) + sects.dso_base; + if (pc < funcStart) { + if (log) + fprintf( + stderr, + "\tpc not in table, pc=0x%llX, funcStart=0x%llX, funcEnd=0x%llX\n", + (uint64_t) pc, (uint64_t) funcStart, (uint64_t) funcEnd); + return false; + } + if (pc > funcEnd) { + if (log) + fprintf( + stderr, + "\tpc not in table, pc=0x%llX, funcStart=0x%llX, funcEnd=0x%llX\n", + (uint64_t) pc, (uint64_t) funcStart, (uint64_t) funcEnd); + return false; + } + } else if (pageKind == UNWIND_SECOND_LEVEL_COMPRESSED) { + // compressed page + UnwindSectionCompressedPageHeader pageHeader(_addressSpace, + secondLevelAddr); + UnwindSectionCompressedArray pageIndex( + _addressSpace, secondLevelAddr + pageHeader.entryPageOffset()); + const uint32_t targetFunctionPageOffset = + (uint32_t)(targetFunctionOffset - firstLevelFunctionOffset); + // binary search looks for entry with e where index[e].offset <= pc < + // index[e+1].offset + if (log) + fprintf(stderr, "\tbinary search of compressed page starting at " + "secondLevelAddr=0x%llX\n", + (uint64_t) secondLevelAddr); + low = 0; + last = pageHeader.entryCount() - 1; + high = pageHeader.entryCount(); + while (low < high) { + uint32_t mid = (low + high) / 2; + if (pageIndex.functionOffset(mid) <= targetFunctionPageOffset) { + if ((mid == last) || + (pageIndex.functionOffset(mid + 1) > targetFunctionPageOffset)) { + low = mid; + break; + } else { + low = mid + 1; + } + } else { + high = mid; + } + } + funcStart = pageIndex.functionOffset(low) + firstLevelFunctionOffset + + sects.dso_base; + if (low < last) + funcEnd = + pageIndex.functionOffset(low + 1) + firstLevelFunctionOffset + + sects.dso_base; + else + funcEnd = firstLevelNextPageFunctionOffset + sects.dso_base; + if (pc < funcStart) { + _LIBUNWIND_DEBUG_LOG("malformed __unwind_info, pc=0x%llX not in second " + "level compressed unwind table. funcStart=0x%llX", + (uint64_t) pc, (uint64_t) funcStart); + return false; + } + if (pc > funcEnd) { + _LIBUNWIND_DEBUG_LOG("malformed __unwind_info, pc=0x%llX not in second " + "level compressed unwind table. funcEnd=0x%llX", + (uint64_t) pc, (uint64_t) funcEnd); + return false; + } + uint16_t encodingIndex = pageIndex.encodingIndex(low); + if (encodingIndex < sectionHeader.commonEncodingsArrayCount()) { + // encoding is in common table in section header + encoding = _addressSpace.get32( + sects.compact_unwind_section + + sectionHeader.commonEncodingsArraySectionOffset() + + encodingIndex * sizeof(uint32_t)); + } else { + // encoding is in page specific table + uint16_t pageEncodingIndex = + encodingIndex - (uint16_t)sectionHeader.commonEncodingsArrayCount(); + encoding = _addressSpace.get32(secondLevelAddr + + pageHeader.encodingsPageOffset() + + pageEncodingIndex * sizeof(uint32_t)); + } + } else { + _LIBUNWIND_DEBUG_LOG("malformed __unwind_info at 0x%0llX bad second " + "level page", + (uint64_t) sects.compact_unwind_section); + return false; + } + + // look up LSDA, if encoding says function has one + if (encoding & UNWIND_HAS_LSDA) { + UnwindSectionLsdaArray lsdaIndex(_addressSpace, lsdaArrayStartAddr); + uint32_t funcStartOffset = (uint32_t)(funcStart - sects.dso_base); + low = 0; + high = (uint32_t)(lsdaArrayEndAddr - lsdaArrayStartAddr) / + sizeof(unwind_info_section_header_lsda_index_entry); + // binary search looks for entry with exact match for functionOffset + if (log) + fprintf(stderr, + "\tbinary search of lsda table for targetFunctionOffset=0x%08X\n", + funcStartOffset); + while (low < high) { + uint32_t mid = (low + high) / 2; + if (lsdaIndex.functionOffset(mid) == funcStartOffset) { + lsda = lsdaIndex.lsdaOffset(mid) + sects.dso_base; + break; + } else if (lsdaIndex.functionOffset(mid) < funcStartOffset) { + low = mid + 1; + } else { + high = mid; + } + } + if (lsda == 0) { + _LIBUNWIND_DEBUG_LOG("found encoding 0x%08X with HAS_LSDA bit set for " + "pc=0x%0llX, but lsda table has no entry", + encoding, (uint64_t) pc); + return false; + } + } + + // extact personality routine, if encoding says function has one + uint32_t personalityIndex = (encoding & UNWIND_PERSONALITY_MASK) >> + (__builtin_ctz(UNWIND_PERSONALITY_MASK)); + if (personalityIndex != 0) { + --personalityIndex; // change 1-based to zero-based index + if (personalityIndex > sectionHeader.personalityArrayCount()) { + _LIBUNWIND_DEBUG_LOG("found encoding 0x%08X with personality index %d, " + "but personality table has only %d entries", + encoding, personalityIndex, + sectionHeader.personalityArrayCount()); + return false; + } + int32_t personalityDelta = (int32_t)_addressSpace.get32( + sects.compact_unwind_section + + sectionHeader.personalityArraySectionOffset() + + personalityIndex * sizeof(uint32_t)); + pint_t personalityPointer = sects.dso_base + (pint_t)personalityDelta; + personality = _addressSpace.getP(personalityPointer); + if (log) + fprintf(stderr, "getInfoFromCompactEncodingSection(pc=0x%llX), " + "personalityDelta=0x%08X, personality=0x%08llX\n", + (uint64_t) pc, personalityDelta, (uint64_t) personality); + } + + if (log) + fprintf(stderr, "getInfoFromCompactEncodingSection(pc=0x%llX), " + "encoding=0x%08X, lsda=0x%08llX for funcStart=0x%llX\n", + (uint64_t) pc, encoding, (uint64_t) lsda, (uint64_t) funcStart); + _info.start_ip = funcStart; + _info.end_ip = funcEnd; + _info.lsda = lsda; + _info.handler = personality; + _info.gp = 0; + _info.flags = 0; + _info.format = encoding; + _info.unwind_info = 0; + _info.unwind_info_size = 0; + _info.extra = sects.dso_base; + return true; +} +#endif // defined(_LIBUNWIND_SUPPORT_COMPACT_UNWIND) + + +#if defined(_LIBUNWIND_SUPPORT_SEH_UNWIND) +template +bool UnwindCursor::getInfoFromSEH(pint_t pc) { + pint_t base; + RUNTIME_FUNCTION *unwindEntry = lookUpSEHUnwindInfo(pc, &base); + if (!unwindEntry) { + _LIBUNWIND_DEBUG_LOG("\tpc not in table, pc=0x%llX", (uint64_t) pc); + return false; + } + _info.gp = 0; + _info.flags = 0; + _info.format = 0; + _info.unwind_info_size = sizeof(RUNTIME_FUNCTION); + _info.unwind_info = reinterpret_cast(unwindEntry); + _info.extra = base; + _info.start_ip = base + unwindEntry->BeginAddress; +#ifdef _LIBUNWIND_TARGET_X86_64 + _info.end_ip = base + unwindEntry->EndAddress; + // Only fill in the handler and LSDA if they're stale. + if (pc != getLastPC()) { + UNWIND_INFO *xdata = reinterpret_cast(base + unwindEntry->UnwindData); + if (xdata->Flags & (UNW_FLAG_EHANDLER|UNW_FLAG_UHANDLER)) { + // The personality is given in the UNWIND_INFO itself. The LSDA immediately + // follows the UNWIND_INFO. (This follows how both Clang and MSVC emit + // these structures.) + // N.B. UNWIND_INFO structs are DWORD-aligned. + uint32_t lastcode = (xdata->CountOfCodes + 1) & ~1; + const uint32_t *handler = reinterpret_cast(&xdata->UnwindCodes[lastcode]); + _info.lsda = reinterpret_cast(handler+1); + if (*handler) { + _info.handler = reinterpret_cast(__libunwind_seh_personality); + } else + _info.handler = 0; + } else { + _info.lsda = 0; + _info.handler = 0; + } + } +#elif defined(_LIBUNWIND_TARGET_ARM) + _info.end_ip = _info.start_ip + unwindEntry->FunctionLength; + _info.lsda = 0; // FIXME + _info.handler = 0; // FIXME +#endif + setLastPC(pc); + return true; +} +#endif + + +template +void UnwindCursor::setInfoBasedOnIPRegister(bool isReturnAddress) { + pint_t pc = (pint_t)this->getReg(UNW_REG_IP); +#if defined(_LIBUNWIND_ARM_EHABI) + // Remove the thumb bit so the IP represents the actual instruction address. + // This matches the behaviour of _Unwind_GetIP on arm. + pc &= (pint_t)~0x1; +#endif + + // If the last line of a function is a "throw" the compiler sometimes + // emits no instructions after the call to __cxa_throw. This means + // the return address is actually the start of the next function. + // To disambiguate this, back up the pc when we know it is a return + // address. + if (isReturnAddress) + --pc; + + // Ask address space object to find unwind sections for this pc. + UnwindInfoSections sects; + if (_addressSpace.findUnwindSections(pc, sects)) { +#if defined(_LIBUNWIND_SUPPORT_COMPACT_UNWIND) + // If there is a compact unwind encoding table, look there first. + if (sects.compact_unwind_section != 0) { + if (this->getInfoFromCompactEncodingSection(pc, sects)) { + #if defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) + // Found info in table, done unless encoding says to use dwarf. + uint32_t dwarfOffset; + if ((sects.dwarf_section != 0) && compactSaysUseDwarf(&dwarfOffset)) { + if (this->getInfoFromDwarfSection(pc, sects, dwarfOffset)) { + // found info in dwarf, done + return; + } + } + #endif + // If unwind table has entry, but entry says there is no unwind info, + // record that we have no unwind info. + if (_info.format == 0) + _unwindInfoMissing = true; + return; + } + } +#endif // defined(_LIBUNWIND_SUPPORT_COMPACT_UNWIND) + +#if defined(_LIBUNWIND_SUPPORT_SEH_UNWIND) + // If there is SEH unwind info, look there next. + if (this->getInfoFromSEH(pc)) + return; +#endif + +#if defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) + // If there is dwarf unwind info, look there next. + if (sects.dwarf_section != 0) { + if (this->getInfoFromDwarfSection(pc, sects)) { + // found info in dwarf, done + return; + } + } +#endif + +#if defined(_LIBUNWIND_ARM_EHABI) + // If there is ARM EHABI unwind info, look there next. + if (sects.arm_section != 0 && this->getInfoFromEHABISection(pc, sects)) + return; +#endif + } + +#if defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) + // There is no static unwind info for this pc. Look to see if an FDE was + // dynamically registered for it. + pint_t cachedFDE = DwarfFDECache::findFDE(0, pc); + if (cachedFDE != 0) { + CFI_Parser::FDE_Info fdeInfo; + CFI_Parser::CIE_Info cieInfo; + const char *msg = CFI_Parser::decodeFDE(_addressSpace, + cachedFDE, &fdeInfo, &cieInfo); + if (msg == NULL) { + typename CFI_Parser::PrologInfo prolog; + if (CFI_Parser::parseFDEInstructions(_addressSpace, fdeInfo, cieInfo, + pc, R::getArch(), &prolog)) { + // save off parsed FDE info + _info.start_ip = fdeInfo.pcStart; + _info.end_ip = fdeInfo.pcEnd; + _info.lsda = fdeInfo.lsda; + _info.handler = cieInfo.personality; + _info.gp = prolog.spExtraArgSize; + // Some frameless functions need SP + // altered when resuming in function. + _info.flags = 0; + _info.format = dwarfEncoding(); + _info.unwind_info = fdeInfo.fdeStart; + _info.unwind_info_size = (uint32_t)fdeInfo.fdeLength; + _info.extra = 0; + return; + } + } + } + + // Lastly, ask AddressSpace object about platform specific ways to locate + // other FDEs. + pint_t fde; + if (_addressSpace.findOtherFDE(pc, fde)) { + CFI_Parser::FDE_Info fdeInfo; + CFI_Parser::CIE_Info cieInfo; + if (!CFI_Parser::decodeFDE(_addressSpace, fde, &fdeInfo, &cieInfo)) { + // Double check this FDE is for a function that includes the pc. + if ((fdeInfo.pcStart <= pc) && (pc < fdeInfo.pcEnd)) { + typename CFI_Parser::PrologInfo prolog; + if (CFI_Parser::parseFDEInstructions(_addressSpace, fdeInfo, cieInfo, + pc, R::getArch(), &prolog)) { + // save off parsed FDE info + _info.start_ip = fdeInfo.pcStart; + _info.end_ip = fdeInfo.pcEnd; + _info.lsda = fdeInfo.lsda; + _info.handler = cieInfo.personality; + _info.gp = prolog.spExtraArgSize; + _info.flags = 0; + _info.format = dwarfEncoding(); + _info.unwind_info = fdeInfo.fdeStart; + _info.unwind_info_size = (uint32_t)fdeInfo.fdeLength; + _info.extra = 0; + return; + } + } + } + } +#endif // #if defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) + + // no unwind info, flag that we can't reliably unwind + _unwindInfoMissing = true; +} + +template +int UnwindCursor::step() { + // Bottom of stack is defined is when unwind info cannot be found. + if (_unwindInfoMissing) + return UNW_STEP_END; + + // Use unwinding info to modify register set as if function returned. + int result; +#if defined(_LIBUNWIND_SUPPORT_COMPACT_UNWIND) + result = this->stepWithCompactEncoding(); +#elif defined(_LIBUNWIND_SUPPORT_SEH_UNWIND) + result = this->stepWithSEHData(); +#elif defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) + result = this->stepWithDwarfFDE(); +#elif defined(_LIBUNWIND_ARM_EHABI) + result = this->stepWithEHABI(); +#else + #error Need _LIBUNWIND_SUPPORT_COMPACT_UNWIND or \ + _LIBUNWIND_SUPPORT_SEH_UNWIND or \ + _LIBUNWIND_SUPPORT_DWARF_UNWIND or \ + _LIBUNWIND_ARM_EHABI +#endif + + // update info based on new PC + if (result == UNW_STEP_SUCCESS) { + this->setInfoBasedOnIPRegister(true); + if (_unwindInfoMissing) + return UNW_STEP_END; + } + + return result; +} + +template +void UnwindCursor::getInfo(unw_proc_info_t *info) { + *info = _info; +} + +template +bool UnwindCursor::getFunctionName(char *buf, size_t bufLen, + unw_word_t *offset) { + return _addressSpace.findFunctionName((pint_t)this->getReg(UNW_REG_IP), + buf, bufLen, offset); +} + +} // namespace libunwind + +#endif // __UNWINDCURSOR_HPP__ diff --git a/src/coreclr/src/nativeaot/libunwind/src/UnwindLevel1-gcc-ext.c b/src/coreclr/src/nativeaot/libunwind/src/UnwindLevel1-gcc-ext.c new file mode 100644 index 0000000000000..63e4083a45794 --- /dev/null +++ b/src/coreclr/src/nativeaot/libunwind/src/UnwindLevel1-gcc-ext.c @@ -0,0 +1,319 @@ +//===--------------------- UnwindLevel1-gcc-ext.c -------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// +// Implements gcc extensions to the C++ ABI Exception Handling Level 1. +// +//===----------------------------------------------------------------------===// + +#include +#include +#include +#include +#include +#include + +#include "config.h" +#include "libunwind_ext.h" +#include "libunwind.h" +#include "Unwind-EHABI.h" +#include "unwind.h" + +#if defined(_LIBUNWIND_BUILD_ZERO_COST_APIS) + +#if defined(_LIBUNWIND_SUPPORT_SEH_UNWIND) +#define private_1 private_[0] +#endif + +/// Called by __cxa_rethrow(). +_LIBUNWIND_EXPORT _Unwind_Reason_Code +_Unwind_Resume_or_Rethrow(_Unwind_Exception *exception_object) { +#if defined(_LIBUNWIND_ARM_EHABI) + _LIBUNWIND_TRACE_API("_Unwind_Resume_or_Rethrow(ex_obj=%p), private_1=%ld", + (void *)exception_object, + (long)exception_object->unwinder_cache.reserved1); +#else + _LIBUNWIND_TRACE_API("_Unwind_Resume_or_Rethrow(ex_obj=%p), private_1=%" PRIdPTR, + (void *)exception_object, + (intptr_t)exception_object->private_1); +#endif + +#if defined(_LIBUNWIND_ARM_EHABI) + // _Unwind_RaiseException on EHABI will always set the reserved1 field to 0, + // which is in the same position as private_1 below. + return _Unwind_RaiseException(exception_object); +#else + // If this is non-forced and a stopping place was found, then this is a + // re-throw. + // Call _Unwind_RaiseException() as if this was a new exception + if (exception_object->private_1 == 0) { + return _Unwind_RaiseException(exception_object); + // Will return if there is no catch clause, so that __cxa_rethrow can call + // std::terminate(). + } + + // Call through to _Unwind_Resume() which distiguishes between forced and + // regular exceptions. + _Unwind_Resume(exception_object); + _LIBUNWIND_ABORT("_Unwind_Resume_or_Rethrow() called _Unwind_RaiseException()" + " which unexpectedly returned"); +#endif +} + + +/// Called by personality handler during phase 2 to get base address for data +/// relative encodings. +_LIBUNWIND_EXPORT uintptr_t +_Unwind_GetDataRelBase(struct _Unwind_Context *context) { + (void)context; + _LIBUNWIND_TRACE_API("_Unwind_GetDataRelBase(context=%p)", (void *)context); + _LIBUNWIND_ABORT("_Unwind_GetDataRelBase() not implemented"); +} + + +/// Called by personality handler during phase 2 to get base address for text +/// relative encodings. +_LIBUNWIND_EXPORT uintptr_t +_Unwind_GetTextRelBase(struct _Unwind_Context *context) { + (void)context; + _LIBUNWIND_TRACE_API("_Unwind_GetTextRelBase(context=%p)", (void *)context); + _LIBUNWIND_ABORT("_Unwind_GetTextRelBase() not implemented"); +} + + +/// Scans unwind information to find the function that contains the +/// specified code address "pc". +_LIBUNWIND_EXPORT void *_Unwind_FindEnclosingFunction(void *pc) { + _LIBUNWIND_TRACE_API("_Unwind_FindEnclosingFunction(pc=%p)", pc); + // This is slow, but works. + // We create an unwind cursor then alter the IP to be pc + unw_cursor_t cursor; + unw_context_t uc; + unw_proc_info_t info; + __unw_getcontext(&uc); + __unw_init_local(&cursor, &uc); + __unw_set_reg(&cursor, UNW_REG_IP, (unw_word_t)(intptr_t)pc); + if (__unw_get_proc_info(&cursor, &info) == UNW_ESUCCESS) + return (void *)(intptr_t) info.start_ip; + else + return NULL; +} + +/// Walk every frame and call trace function at each one. If trace function +/// returns anything other than _URC_NO_REASON, then walk is terminated. +_LIBUNWIND_EXPORT _Unwind_Reason_Code +_Unwind_Backtrace(_Unwind_Trace_Fn callback, void *ref) { + unw_cursor_t cursor; + unw_context_t uc; + __unw_getcontext(&uc); + __unw_init_local(&cursor, &uc); + + _LIBUNWIND_TRACE_API("_Unwind_Backtrace(callback=%p)", + (void *)(uintptr_t)callback); + +#if defined(_LIBUNWIND_ARM_EHABI) + // Create a mock exception object for force unwinding. + _Unwind_Exception ex; + memset(&ex, '\0', sizeof(ex)); + ex.exception_class = 0x434C4E47554E5700; // CLNGUNW\0 +#endif + + // walk each frame + while (true) { + _Unwind_Reason_Code result; + +#if !defined(_LIBUNWIND_ARM_EHABI) + // ask libunwind to get next frame (skip over first frame which is + // _Unwind_Backtrace()) + if (__unw_step(&cursor) <= 0) { + _LIBUNWIND_TRACE_UNWINDING(" _backtrace: ended because cursor reached " + "bottom of stack, returning %d", + _URC_END_OF_STACK); + return _URC_END_OF_STACK; + } +#else + // Get the information for this frame. + unw_proc_info_t frameInfo; + if (__unw_get_proc_info(&cursor, &frameInfo) != UNW_ESUCCESS) { + return _URC_END_OF_STACK; + } + + // Update the pr_cache in the mock exception object. + const uint32_t* unwindInfo = (uint32_t *) frameInfo.unwind_info; + ex.pr_cache.fnstart = frameInfo.start_ip; + ex.pr_cache.ehtp = (_Unwind_EHT_Header *) unwindInfo; + ex.pr_cache.additional= frameInfo.flags; + + struct _Unwind_Context *context = (struct _Unwind_Context *)&cursor; + // Get and call the personality function to unwind the frame. + __personality_routine handler = (__personality_routine) frameInfo.handler; + if (handler == NULL) { + return _URC_END_OF_STACK; + } + if (handler(_US_VIRTUAL_UNWIND_FRAME | _US_FORCE_UNWIND, &ex, context) != + _URC_CONTINUE_UNWIND) { + return _URC_END_OF_STACK; + } +#endif // defined(_LIBUNWIND_ARM_EHABI) + + // debugging + if (_LIBUNWIND_TRACING_UNWINDING) { + char functionName[512]; + unw_proc_info_t frame; + unw_word_t offset; + __unw_get_proc_name(&cursor, functionName, 512, &offset); + __unw_get_proc_info(&cursor, &frame); + _LIBUNWIND_TRACE_UNWINDING( + " _backtrace: start_ip=0x%" PRIxPTR ", func=%s, lsda=0x%" PRIxPTR ", context=%p", + frame.start_ip, functionName, frame.lsda, + (void *)&cursor); + } + + // call trace function with this frame + result = (*callback)((struct _Unwind_Context *)(&cursor), ref); + if (result != _URC_NO_REASON) { + _LIBUNWIND_TRACE_UNWINDING( + " _backtrace: ended because callback returned %d", result); + return result; + } + } +} + + +/// Find DWARF unwind info for an address 'pc' in some function. +_LIBUNWIND_EXPORT const void *_Unwind_Find_FDE(const void *pc, + struct dwarf_eh_bases *bases) { + // This is slow, but works. + // We create an unwind cursor then alter the IP to be pc + unw_cursor_t cursor; + unw_context_t uc; + unw_proc_info_t info; + __unw_getcontext(&uc); + __unw_init_local(&cursor, &uc); + __unw_set_reg(&cursor, UNW_REG_IP, (unw_word_t)(intptr_t)pc); + __unw_get_proc_info(&cursor, &info); + bases->tbase = (uintptr_t)info.extra; + bases->dbase = 0; // dbase not used on Mac OS X + bases->func = (uintptr_t)info.start_ip; + _LIBUNWIND_TRACE_API("_Unwind_Find_FDE(pc=%p) => %p", pc, + (void *)(intptr_t) info.unwind_info); + return (void *)(intptr_t) info.unwind_info; +} + +/// Returns the CFA (call frame area, or stack pointer at start of function) +/// for the current context. +_LIBUNWIND_EXPORT uintptr_t _Unwind_GetCFA(struct _Unwind_Context *context) { + unw_cursor_t *cursor = (unw_cursor_t *)context; + unw_word_t result; + __unw_get_reg(cursor, UNW_REG_SP, &result); + _LIBUNWIND_TRACE_API("_Unwind_GetCFA(context=%p) => 0x%" PRIxPTR, + (void *)context, result); + return (uintptr_t)result; +} + + +/// Called by personality handler during phase 2 to get instruction pointer. +/// ipBefore is a boolean that says if IP is already adjusted to be the call +/// site address. Normally IP is the return address. +_LIBUNWIND_EXPORT uintptr_t _Unwind_GetIPInfo(struct _Unwind_Context *context, + int *ipBefore) { + _LIBUNWIND_TRACE_API("_Unwind_GetIPInfo(context=%p)", (void *)context); + *ipBefore = 0; + return _Unwind_GetIP(context); +} + +#if defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) + +/// Called by programs with dynamic code generators that want +/// to register a dynamically generated FDE. +/// This function has existed on Mac OS X since 10.4, but +/// was broken until 10.6. +_LIBUNWIND_EXPORT void __register_frame(const void *fde) { + _LIBUNWIND_TRACE_API("__register_frame(%p)", fde); + __unw_add_dynamic_fde((unw_word_t)(uintptr_t)fde); +} + + +/// Called by programs with dynamic code generators that want +/// to unregister a dynamically generated FDE. +/// This function has existed on Mac OS X since 10.4, but +/// was broken until 10.6. +_LIBUNWIND_EXPORT void __deregister_frame(const void *fde) { + _LIBUNWIND_TRACE_API("__deregister_frame(%p)", fde); + __unw_remove_dynamic_fde((unw_word_t)(uintptr_t)fde); +} + + +// The following register/deregister functions are gcc extensions. +// They have existed on Mac OS X, but have never worked because Mac OS X +// before 10.6 used keymgr to track known FDEs, but these functions +// never got updated to use keymgr. +// For now, we implement these as do-nothing functions to keep any existing +// applications working. We also add the not in 10.6 symbol so that nwe +// application won't be able to use them. + +#if defined(_LIBUNWIND_SUPPORT_FRAME_APIS) +_LIBUNWIND_EXPORT void __register_frame_info_bases(const void *fde, void *ob, + void *tb, void *db) { + (void)fde; + (void)ob; + (void)tb; + (void)db; + _LIBUNWIND_TRACE_API("__register_frame_info_bases(%p,%p, %p, %p)", + fde, ob, tb, db); + // do nothing, this function never worked in Mac OS X +} + +_LIBUNWIND_EXPORT void __register_frame_info(const void *fde, void *ob) { + (void)fde; + (void)ob; + _LIBUNWIND_TRACE_API("__register_frame_info(%p, %p)", fde, ob); + // do nothing, this function never worked in Mac OS X +} + +_LIBUNWIND_EXPORT void __register_frame_info_table_bases(const void *fde, + void *ob, void *tb, + void *db) { + (void)fde; + (void)ob; + (void)tb; + (void)db; + _LIBUNWIND_TRACE_API("__register_frame_info_table_bases" + "(%p,%p, %p, %p)", fde, ob, tb, db); + // do nothing, this function never worked in Mac OS X +} + +_LIBUNWIND_EXPORT void __register_frame_info_table(const void *fde, void *ob) { + (void)fde; + (void)ob; + _LIBUNWIND_TRACE_API("__register_frame_info_table(%p, %p)", fde, ob); + // do nothing, this function never worked in Mac OS X +} + +_LIBUNWIND_EXPORT void __register_frame_table(const void *fde) { + (void)fde; + _LIBUNWIND_TRACE_API("__register_frame_table(%p)", fde); + // do nothing, this function never worked in Mac OS X +} + +_LIBUNWIND_EXPORT void *__deregister_frame_info(const void *fde) { + (void)fde; + _LIBUNWIND_TRACE_API("__deregister_frame_info(%p)", fde); + // do nothing, this function never worked in Mac OS X + return NULL; +} + +_LIBUNWIND_EXPORT void *__deregister_frame_info_bases(const void *fde) { + (void)fde; + _LIBUNWIND_TRACE_API("__deregister_frame_info_bases(%p)", fde); + // do nothing, this function never worked in Mac OS X + return NULL; +} +#endif // defined(_LIBUNWIND_SUPPORT_FRAME_APIS) + +#endif // defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) + +#endif // defined(_LIBUNWIND_BUILD_ZERO_COST_APIS) diff --git a/src/coreclr/src/nativeaot/libunwind/src/UnwindLevel1.c b/src/coreclr/src/nativeaot/libunwind/src/UnwindLevel1.c new file mode 100644 index 0000000000000..bcb1a7fbec2a1 --- /dev/null +++ b/src/coreclr/src/nativeaot/libunwind/src/UnwindLevel1.c @@ -0,0 +1,515 @@ +//===------------------------- UnwindLevel1.c -----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// +// Implements C++ ABI Exception Handling Level 1 as documented at: +// https://itanium-cxx-abi.github.io/cxx-abi/abi-eh.html +// using libunwind +// +//===----------------------------------------------------------------------===// + +// ARM EHABI does not specify _Unwind_{Get,Set}{GR,IP}(). Thus, we are +// defining inline functions to delegate the function calls to +// _Unwind_VRS_{Get,Set}(). However, some applications might declare the +// function protetype directly (instead of including ), thus we need +// to export these functions from libunwind.so as well. +#define _LIBUNWIND_UNWIND_LEVEL1_EXTERNAL_LINKAGE 1 + +#include +#include +#include +#include +#include +#include + +#include "config.h" +#include "libunwind.h" +#include "libunwind_ext.h" +#include "unwind.h" + +#if !defined(_LIBUNWIND_ARM_EHABI) && !defined(__USING_SJLJ_EXCEPTIONS__) + +#ifndef _LIBUNWIND_SUPPORT_SEH_UNWIND + +static _Unwind_Reason_Code +unwind_phase1(unw_context_t *uc, unw_cursor_t *cursor, _Unwind_Exception *exception_object) { + __unw_init_local(cursor, uc); + + // Walk each frame looking for a place to stop. + bool handlerNotFound = true; + while (handlerNotFound) { + // Ask libunwind to get next frame (skip over first which is + // _Unwind_RaiseException). + int stepResult = __unw_step(cursor); + if (stepResult == 0) { + _LIBUNWIND_TRACE_UNWINDING( + "unwind_phase1(ex_ojb=%p): __unw_step() reached " + "bottom => _URC_END_OF_STACK", + (void *)exception_object); + return _URC_END_OF_STACK; + } else if (stepResult < 0) { + _LIBUNWIND_TRACE_UNWINDING( + "unwind_phase1(ex_ojb=%p): __unw_step failed => " + "_URC_FATAL_PHASE1_ERROR", + (void *)exception_object); + return _URC_FATAL_PHASE1_ERROR; + } + + // See if frame has code to run (has personality routine). + unw_proc_info_t frameInfo; + unw_word_t sp; + if (__unw_get_proc_info(cursor, &frameInfo) != UNW_ESUCCESS) { + _LIBUNWIND_TRACE_UNWINDING( + "unwind_phase1(ex_ojb=%p): __unw_get_proc_info " + "failed => _URC_FATAL_PHASE1_ERROR", + (void *)exception_object); + return _URC_FATAL_PHASE1_ERROR; + } + + // When tracing, print state information. + if (_LIBUNWIND_TRACING_UNWINDING) { + char functionBuf[512]; + const char *functionName = functionBuf; + unw_word_t offset; + if ((__unw_get_proc_name(cursor, functionBuf, sizeof(functionBuf), + &offset) != UNW_ESUCCESS) || + (frameInfo.start_ip + offset > frameInfo.end_ip)) + functionName = ".anonymous."; + unw_word_t pc; + __unw_get_reg(cursor, UNW_REG_IP, &pc); + _LIBUNWIND_TRACE_UNWINDING( + "unwind_phase1(ex_ojb=%p): pc=0x%" PRIxPTR ", start_ip=0x%" PRIxPTR + ", func=%s, lsda=0x%" PRIxPTR ", personality=0x%" PRIxPTR "", + (void *)exception_object, pc, frameInfo.start_ip, functionName, + frameInfo.lsda, frameInfo.handler); + } + + // If there is a personality routine, ask it if it will want to stop at + // this frame. + if (frameInfo.handler != 0) { + __personality_routine p = + (__personality_routine)(uintptr_t)(frameInfo.handler); + _LIBUNWIND_TRACE_UNWINDING( + "unwind_phase1(ex_ojb=%p): calling personality function %p", + (void *)exception_object, (void *)(uintptr_t)p); + _Unwind_Reason_Code personalityResult = + (*p)(1, _UA_SEARCH_PHASE, exception_object->exception_class, + exception_object, (struct _Unwind_Context *)(cursor)); + switch (personalityResult) { + case _URC_HANDLER_FOUND: + // found a catch clause or locals that need destructing in this frame + // stop search and remember stack pointer at the frame + handlerNotFound = false; + __unw_get_reg(cursor, UNW_REG_SP, &sp); + exception_object->private_2 = (uintptr_t)sp; + _LIBUNWIND_TRACE_UNWINDING( + "unwind_phase1(ex_ojb=%p): _URC_HANDLER_FOUND", + (void *)exception_object); + return _URC_NO_REASON; + + case _URC_CONTINUE_UNWIND: + _LIBUNWIND_TRACE_UNWINDING( + "unwind_phase1(ex_ojb=%p): _URC_CONTINUE_UNWIND", + (void *)exception_object); + // continue unwinding + break; + + default: + // something went wrong + _LIBUNWIND_TRACE_UNWINDING( + "unwind_phase1(ex_ojb=%p): _URC_FATAL_PHASE1_ERROR", + (void *)exception_object); + return _URC_FATAL_PHASE1_ERROR; + } + } + } + return _URC_NO_REASON; +} + + +static _Unwind_Reason_Code +unwind_phase2(unw_context_t *uc, unw_cursor_t *cursor, _Unwind_Exception *exception_object) { + __unw_init_local(cursor, uc); + + _LIBUNWIND_TRACE_UNWINDING("unwind_phase2(ex_ojb=%p)", + (void *)exception_object); + + // Walk each frame until we reach where search phase said to stop. + while (true) { + + // Ask libunwind to get next frame (skip over first which is + // _Unwind_RaiseException). + int stepResult = __unw_step(cursor); + if (stepResult == 0) { + _LIBUNWIND_TRACE_UNWINDING( + "unwind_phase2(ex_ojb=%p): __unw_step() reached " + "bottom => _URC_END_OF_STACK", + (void *)exception_object); + return _URC_END_OF_STACK; + } else if (stepResult < 0) { + _LIBUNWIND_TRACE_UNWINDING( + "unwind_phase2(ex_ojb=%p): __unw_step failed => " + "_URC_FATAL_PHASE1_ERROR", + (void *)exception_object); + return _URC_FATAL_PHASE2_ERROR; + } + + // Get info about this frame. + unw_word_t sp; + unw_proc_info_t frameInfo; + __unw_get_reg(cursor, UNW_REG_SP, &sp); + if (__unw_get_proc_info(cursor, &frameInfo) != UNW_ESUCCESS) { + _LIBUNWIND_TRACE_UNWINDING( + "unwind_phase2(ex_ojb=%p): __unw_get_proc_info " + "failed => _URC_FATAL_PHASE1_ERROR", + (void *)exception_object); + return _URC_FATAL_PHASE2_ERROR; + } + + // When tracing, print state information. + if (_LIBUNWIND_TRACING_UNWINDING) { + char functionBuf[512]; + const char *functionName = functionBuf; + unw_word_t offset; + if ((__unw_get_proc_name(cursor, functionBuf, sizeof(functionBuf), + &offset) != UNW_ESUCCESS) || + (frameInfo.start_ip + offset > frameInfo.end_ip)) + functionName = ".anonymous."; + _LIBUNWIND_TRACE_UNWINDING("unwind_phase2(ex_ojb=%p): start_ip=0x%" PRIxPTR + ", func=%s, sp=0x%" PRIxPTR ", lsda=0x%" PRIxPTR + ", personality=0x%" PRIxPTR, + (void *)exception_object, frameInfo.start_ip, + functionName, sp, frameInfo.lsda, + frameInfo.handler); + } + + // If there is a personality routine, tell it we are unwinding. + if (frameInfo.handler != 0) { + __personality_routine p = + (__personality_routine)(uintptr_t)(frameInfo.handler); + _Unwind_Action action = _UA_CLEANUP_PHASE; + if (sp == exception_object->private_2) { + // Tell personality this was the frame it marked in phase 1. + action = (_Unwind_Action)(_UA_CLEANUP_PHASE | _UA_HANDLER_FRAME); + } + _Unwind_Reason_Code personalityResult = + (*p)(1, action, exception_object->exception_class, exception_object, + (struct _Unwind_Context *)(cursor)); + switch (personalityResult) { + case _URC_CONTINUE_UNWIND: + // Continue unwinding + _LIBUNWIND_TRACE_UNWINDING( + "unwind_phase2(ex_ojb=%p): _URC_CONTINUE_UNWIND", + (void *)exception_object); + if (sp == exception_object->private_2) { + // Phase 1 said we would stop at this frame, but we did not... + _LIBUNWIND_ABORT("during phase1 personality function said it would " + "stop here, but now in phase2 it did not stop here"); + } + break; + case _URC_INSTALL_CONTEXT: + _LIBUNWIND_TRACE_UNWINDING( + "unwind_phase2(ex_ojb=%p): _URC_INSTALL_CONTEXT", + (void *)exception_object); + // Personality routine says to transfer control to landing pad. + // We may get control back if landing pad calls _Unwind_Resume(). + if (_LIBUNWIND_TRACING_UNWINDING) { + unw_word_t pc; + __unw_get_reg(cursor, UNW_REG_IP, &pc); + __unw_get_reg(cursor, UNW_REG_SP, &sp); + _LIBUNWIND_TRACE_UNWINDING("unwind_phase2(ex_ojb=%p): re-entering " + "user code with ip=0x%" PRIxPTR + ", sp=0x%" PRIxPTR, + (void *)exception_object, pc, sp); + } + __unw_resume(cursor); + // __unw_resume() only returns if there was an error. + return _URC_FATAL_PHASE2_ERROR; + default: + // Personality routine returned an unknown result code. + _LIBUNWIND_DEBUG_LOG("personality function returned unknown result %d", + personalityResult); + return _URC_FATAL_PHASE2_ERROR; + } + } + } + + // Clean up phase did not resume at the frame that the search phase + // said it would... + return _URC_FATAL_PHASE2_ERROR; +} + +static _Unwind_Reason_Code +unwind_phase2_forced(unw_context_t *uc, unw_cursor_t *cursor, + _Unwind_Exception *exception_object, + _Unwind_Stop_Fn stop, void *stop_parameter) { + __unw_init_local(cursor, uc); + + // Walk each frame until we reach where search phase said to stop + while (__unw_step(cursor) > 0) { + + // Update info about this frame. + unw_proc_info_t frameInfo; + if (__unw_get_proc_info(cursor, &frameInfo) != UNW_ESUCCESS) { + _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): __unw_step " + "failed => _URC_END_OF_STACK", + (void *)exception_object); + return _URC_FATAL_PHASE2_ERROR; + } + + // When tracing, print state information. + if (_LIBUNWIND_TRACING_UNWINDING) { + char functionBuf[512]; + const char *functionName = functionBuf; + unw_word_t offset; + if ((__unw_get_proc_name(cursor, functionBuf, sizeof(functionBuf), + &offset) != UNW_ESUCCESS) || + (frameInfo.start_ip + offset > frameInfo.end_ip)) + functionName = ".anonymous."; + _LIBUNWIND_TRACE_UNWINDING( + "unwind_phase2_forced(ex_ojb=%p): start_ip=0x%" PRIxPTR + ", func=%s, lsda=0x%" PRIxPTR ", personality=0x%" PRIxPTR, + (void *)exception_object, frameInfo.start_ip, functionName, + frameInfo.lsda, frameInfo.handler); + } + + // Call stop function at each frame. + _Unwind_Action action = + (_Unwind_Action)(_UA_FORCE_UNWIND | _UA_CLEANUP_PHASE); + _Unwind_Reason_Code stopResult = + (*stop)(1, action, exception_object->exception_class, exception_object, + (struct _Unwind_Context *)(cursor), stop_parameter); + _LIBUNWIND_TRACE_UNWINDING( + "unwind_phase2_forced(ex_ojb=%p): stop function returned %d", + (void *)exception_object, stopResult); + if (stopResult != _URC_NO_REASON) { + _LIBUNWIND_TRACE_UNWINDING( + "unwind_phase2_forced(ex_ojb=%p): stopped by stop function", + (void *)exception_object); + return _URC_FATAL_PHASE2_ERROR; + } + + // If there is a personality routine, tell it we are unwinding. + if (frameInfo.handler != 0) { + __personality_routine p = + (__personality_routine)(intptr_t)(frameInfo.handler); + _LIBUNWIND_TRACE_UNWINDING( + "unwind_phase2_forced(ex_ojb=%p): calling personality function %p", + (void *)exception_object, (void *)(uintptr_t)p); + _Unwind_Reason_Code personalityResult = + (*p)(1, action, exception_object->exception_class, exception_object, + (struct _Unwind_Context *)(cursor)); + switch (personalityResult) { + case _URC_CONTINUE_UNWIND: + _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): " + "personality returned " + "_URC_CONTINUE_UNWIND", + (void *)exception_object); + // Destructors called, continue unwinding + break; + case _URC_INSTALL_CONTEXT: + _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): " + "personality returned " + "_URC_INSTALL_CONTEXT", + (void *)exception_object); + // We may get control back if landing pad calls _Unwind_Resume(). + __unw_resume(cursor); + break; + default: + // Personality routine returned an unknown result code. + _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): " + "personality returned %d, " + "_URC_FATAL_PHASE2_ERROR", + (void *)exception_object, personalityResult); + return _URC_FATAL_PHASE2_ERROR; + } + } + } + + // Call stop function one last time and tell it we've reached the end + // of the stack. + _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): calling stop " + "function with _UA_END_OF_STACK", + (void *)exception_object); + _Unwind_Action lastAction = + (_Unwind_Action)(_UA_FORCE_UNWIND | _UA_CLEANUP_PHASE | _UA_END_OF_STACK); + (*stop)(1, lastAction, exception_object->exception_class, exception_object, + (struct _Unwind_Context *)(cursor), stop_parameter); + + // Clean up phase did not resume at the frame that the search phase said it + // would. + return _URC_FATAL_PHASE2_ERROR; +} + + +/// Called by __cxa_throw. Only returns if there is a fatal error. +_LIBUNWIND_EXPORT _Unwind_Reason_Code +_Unwind_RaiseException(_Unwind_Exception *exception_object) { + _LIBUNWIND_TRACE_API("_Unwind_RaiseException(ex_obj=%p)", + (void *)exception_object); + unw_context_t uc; + unw_cursor_t cursor; + __unw_getcontext(&uc); + + // Mark that this is a non-forced unwind, so _Unwind_Resume() + // can do the right thing. + exception_object->private_1 = 0; + exception_object->private_2 = 0; + + // phase 1: the search phase + _Unwind_Reason_Code phase1 = unwind_phase1(&uc, &cursor, exception_object); + if (phase1 != _URC_NO_REASON) + return phase1; + + // phase 2: the clean up phase + return unwind_phase2(&uc, &cursor, exception_object); +} + + + +/// When _Unwind_RaiseException() is in phase2, it hands control +/// to the personality function at each frame. The personality +/// may force a jump to a landing pad in that function, the landing +/// pad code may then call _Unwind_Resume() to continue with the +/// unwinding. Note: the call to _Unwind_Resume() is from compiler +/// geneated user code. All other _Unwind_* routines are called +/// by the C++ runtime __cxa_* routines. +/// +/// Note: re-throwing an exception (as opposed to continuing the unwind) +/// is implemented by having the code call __cxa_rethrow() which +/// in turn calls _Unwind_Resume_or_Rethrow(). +_LIBUNWIND_EXPORT void +_Unwind_Resume(_Unwind_Exception *exception_object) { + _LIBUNWIND_TRACE_API("_Unwind_Resume(ex_obj=%p)", (void *)exception_object); + unw_context_t uc; + unw_cursor_t cursor; + __unw_getcontext(&uc); + + if (exception_object->private_1 != 0) + unwind_phase2_forced(&uc, &cursor, exception_object, + (_Unwind_Stop_Fn) exception_object->private_1, + (void *)exception_object->private_2); + else + unwind_phase2(&uc, &cursor, exception_object); + + // Clients assume _Unwind_Resume() does not return, so all we can do is abort. + _LIBUNWIND_ABORT("_Unwind_Resume() can't return"); +} + + + +/// Not used by C++. +/// Unwinds stack, calling "stop" function at each frame. +/// Could be used to implement longjmp(). +_LIBUNWIND_EXPORT _Unwind_Reason_Code +_Unwind_ForcedUnwind(_Unwind_Exception *exception_object, + _Unwind_Stop_Fn stop, void *stop_parameter) { + _LIBUNWIND_TRACE_API("_Unwind_ForcedUnwind(ex_obj=%p, stop=%p)", + (void *)exception_object, (void *)(uintptr_t)stop); + unw_context_t uc; + unw_cursor_t cursor; + __unw_getcontext(&uc); + + // Mark that this is a forced unwind, so _Unwind_Resume() can do + // the right thing. + exception_object->private_1 = (uintptr_t) stop; + exception_object->private_2 = (uintptr_t) stop_parameter; + + // do it + return unwind_phase2_forced(&uc, &cursor, exception_object, stop, stop_parameter); +} + + +/// Called by personality handler during phase 2 to get LSDA for current frame. +_LIBUNWIND_EXPORT uintptr_t +_Unwind_GetLanguageSpecificData(struct _Unwind_Context *context) { + unw_cursor_t *cursor = (unw_cursor_t *)context; + unw_proc_info_t frameInfo; + uintptr_t result = 0; + if (__unw_get_proc_info(cursor, &frameInfo) == UNW_ESUCCESS) + result = (uintptr_t)frameInfo.lsda; + _LIBUNWIND_TRACE_API( + "_Unwind_GetLanguageSpecificData(context=%p) => 0x%" PRIxPTR, + (void *)context, result); + if (result != 0) { + if (*((uint8_t *)result) != 0xFF) + _LIBUNWIND_DEBUG_LOG("lsda at 0x%" PRIxPTR " does not start with 0xFF", + result); + } + return result; +} + + +/// Called by personality handler during phase 2 to find the start of the +/// function. +_LIBUNWIND_EXPORT uintptr_t +_Unwind_GetRegionStart(struct _Unwind_Context *context) { + unw_cursor_t *cursor = (unw_cursor_t *)context; + unw_proc_info_t frameInfo; + uintptr_t result = 0; + if (__unw_get_proc_info(cursor, &frameInfo) == UNW_ESUCCESS) + result = (uintptr_t)frameInfo.start_ip; + _LIBUNWIND_TRACE_API("_Unwind_GetRegionStart(context=%p) => 0x%" PRIxPTR, + (void *)context, result); + return result; +} + +#endif // !_LIBUNWIND_SUPPORT_SEH_UNWIND + +/// Called by personality handler during phase 2 if a foreign exception +// is caught. +_LIBUNWIND_EXPORT void +_Unwind_DeleteException(_Unwind_Exception *exception_object) { + _LIBUNWIND_TRACE_API("_Unwind_DeleteException(ex_obj=%p)", + (void *)exception_object); + if (exception_object->exception_cleanup != NULL) + (*exception_object->exception_cleanup)(_URC_FOREIGN_EXCEPTION_CAUGHT, + exception_object); +} + +/// Called by personality handler during phase 2 to get register values. +_LIBUNWIND_EXPORT uintptr_t +_Unwind_GetGR(struct _Unwind_Context *context, int index) { + unw_cursor_t *cursor = (unw_cursor_t *)context; + unw_word_t result; + __unw_get_reg(cursor, index, &result); + _LIBUNWIND_TRACE_API("_Unwind_GetGR(context=%p, reg=%d) => 0x%" PRIxPTR, + (void *)context, index, result); + return (uintptr_t)result; +} + +/// Called by personality handler during phase 2 to alter register values. +_LIBUNWIND_EXPORT void _Unwind_SetGR(struct _Unwind_Context *context, int index, + uintptr_t value) { + _LIBUNWIND_TRACE_API("_Unwind_SetGR(context=%p, reg=%d, value=0x%0" PRIxPTR + ")", + (void *)context, index, value); + unw_cursor_t *cursor = (unw_cursor_t *)context; + __unw_set_reg(cursor, index, value); +} + +/// Called by personality handler during phase 2 to get instruction pointer. +_LIBUNWIND_EXPORT uintptr_t _Unwind_GetIP(struct _Unwind_Context *context) { + unw_cursor_t *cursor = (unw_cursor_t *)context; + unw_word_t result; + __unw_get_reg(cursor, UNW_REG_IP, &result); + _LIBUNWIND_TRACE_API("_Unwind_GetIP(context=%p) => 0x%" PRIxPTR, + (void *)context, result); + return (uintptr_t)result; +} + +/// Called by personality handler during phase 2 to alter instruction pointer, +/// such as setting where the landing pad is, so _Unwind_Resume() will +/// start executing in the landing pad. +_LIBUNWIND_EXPORT void _Unwind_SetIP(struct _Unwind_Context *context, + uintptr_t value) { + _LIBUNWIND_TRACE_API("_Unwind_SetIP(context=%p, value=0x%0" PRIxPTR ")", + (void *)context, value); + unw_cursor_t *cursor = (unw_cursor_t *)context; + __unw_set_reg(cursor, UNW_REG_IP, value); +} + +#endif // !defined(_LIBUNWIND_ARM_EHABI) && !defined(__USING_SJLJ_EXCEPTIONS__) diff --git a/src/coreclr/src/nativeaot/libunwind/src/UnwindRegistersRestore.S b/src/coreclr/src/nativeaot/libunwind/src/UnwindRegistersRestore.S new file mode 100644 index 0000000000000..01113565e8e00 --- /dev/null +++ b/src/coreclr/src/nativeaot/libunwind/src/UnwindRegistersRestore.S @@ -0,0 +1,1032 @@ +//===-------------------- UnwindRegistersRestore.S ------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "assembly.h" + + .text + +#if !defined(__USING_SJLJ_EXCEPTIONS__) + +#if defined(__i386__) +DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind13Registers_x866jumptoEv) +# +# void libunwind::Registers_x86::jumpto() +# +#if defined(_WIN32) +# On windows, the 'this' pointer is passed in ecx instead of on the stack + movl %ecx, %eax +#else +# On entry: +# + + +# +-----------------------+ +# + thread_state pointer + +# +-----------------------+ +# + return address + +# +-----------------------+ <-- SP +# + + + movl 4(%esp), %eax +#endif + # set up eax and ret on new stack location + movl 28(%eax), %edx # edx holds new stack pointer + subl $8,%edx + movl %edx, 28(%eax) + movl 0(%eax), %ebx + movl %ebx, 0(%edx) + movl 40(%eax), %ebx + movl %ebx, 4(%edx) + # we now have ret and eax pushed onto where new stack will be + # restore all registers + movl 4(%eax), %ebx + movl 8(%eax), %ecx + movl 12(%eax), %edx + movl 16(%eax), %edi + movl 20(%eax), %esi + movl 24(%eax), %ebp + movl 28(%eax), %esp + # skip ss + # skip eflags + pop %eax # eax was already pushed on new stack + ret # eip was already pushed on new stack + # skip cs + # skip ds + # skip es + # skip fs + # skip gs + +#elif defined(__x86_64__) + +DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind16Registers_x86_646jumptoEv) +# +# void libunwind::Registers_x86_64::jumpto() +# +#if defined(_WIN64) +# On entry, thread_state pointer is in rcx; move it into rdi +# to share restore code below. Since this routine restores and +# overwrites all registers, we can use the same registers for +# pointers and temporaries as on unix even though win64 normally +# mustn't clobber some of them. + movq %rcx, %rdi +#else +# On entry, thread_state pointer is in rdi +#endif + + movq 56(%rdi), %rax # rax holds new stack pointer + subq $16, %rax + movq %rax, 56(%rdi) + movq 32(%rdi), %rbx # store new rdi on new stack + movq %rbx, 0(%rax) + movq 128(%rdi), %rbx # store new rip on new stack + movq %rbx, 8(%rax) + # restore all registers + movq 0(%rdi), %rax + movq 8(%rdi), %rbx + movq 16(%rdi), %rcx + movq 24(%rdi), %rdx + # restore rdi later + movq 40(%rdi), %rsi + movq 48(%rdi), %rbp + # restore rsp later + movq 64(%rdi), %r8 + movq 72(%rdi), %r9 + movq 80(%rdi), %r10 + movq 88(%rdi), %r11 + movq 96(%rdi), %r12 + movq 104(%rdi), %r13 + movq 112(%rdi), %r14 + movq 120(%rdi), %r15 + # skip rflags + # skip cs + # skip fs + # skip gs + +#if defined(_WIN64) + movdqu 176(%rdi),%xmm0 + movdqu 192(%rdi),%xmm1 + movdqu 208(%rdi),%xmm2 + movdqu 224(%rdi),%xmm3 + movdqu 240(%rdi),%xmm4 + movdqu 256(%rdi),%xmm5 + movdqu 272(%rdi),%xmm6 + movdqu 288(%rdi),%xmm7 + movdqu 304(%rdi),%xmm8 + movdqu 320(%rdi),%xmm9 + movdqu 336(%rdi),%xmm10 + movdqu 352(%rdi),%xmm11 + movdqu 368(%rdi),%xmm12 + movdqu 384(%rdi),%xmm13 + movdqu 400(%rdi),%xmm14 + movdqu 416(%rdi),%xmm15 +#endif + movq 56(%rdi), %rsp # cut back rsp to new location + pop %rdi # rdi was saved here earlier + ret # rip was saved here + + +#elif defined(__powerpc64__) + +DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind15Registers_ppc646jumptoEv) +// +// void libunwind::Registers_ppc64::jumpto() +// +// On entry: +// thread_state pointer is in r3 +// + +// load register (GPR) +#define PPC64_LR(n) \ + ld %r##n, (8 * (n + 2))(%r3) + + // restore integral registers + // skip r0 for now + // skip r1 for now + PPC64_LR(2) + // skip r3 for now + // skip r4 for now + // skip r5 for now + PPC64_LR(6) + PPC64_LR(7) + PPC64_LR(8) + PPC64_LR(9) + PPC64_LR(10) + PPC64_LR(11) + PPC64_LR(12) + PPC64_LR(13) + PPC64_LR(14) + PPC64_LR(15) + PPC64_LR(16) + PPC64_LR(17) + PPC64_LR(18) + PPC64_LR(19) + PPC64_LR(20) + PPC64_LR(21) + PPC64_LR(22) + PPC64_LR(23) + PPC64_LR(24) + PPC64_LR(25) + PPC64_LR(26) + PPC64_LR(27) + PPC64_LR(28) + PPC64_LR(29) + PPC64_LR(30) + PPC64_LR(31) + +#ifdef PPC64_HAS_VMX + + // restore VS registers + // (note that this also restores floating point registers and V registers, + // because part of VS is mapped to these registers) + + addi %r4, %r3, PPC64_OFFS_FP + +// load VS register +#define PPC64_LVS(n) \ + lxvd2x %vs##n, 0, %r4 ;\ + addi %r4, %r4, 16 + + // restore the first 32 VS regs (and also all floating point regs) + PPC64_LVS(0) + PPC64_LVS(1) + PPC64_LVS(2) + PPC64_LVS(3) + PPC64_LVS(4) + PPC64_LVS(5) + PPC64_LVS(6) + PPC64_LVS(7) + PPC64_LVS(8) + PPC64_LVS(9) + PPC64_LVS(10) + PPC64_LVS(11) + PPC64_LVS(12) + PPC64_LVS(13) + PPC64_LVS(14) + PPC64_LVS(15) + PPC64_LVS(16) + PPC64_LVS(17) + PPC64_LVS(18) + PPC64_LVS(19) + PPC64_LVS(20) + PPC64_LVS(21) + PPC64_LVS(22) + PPC64_LVS(23) + PPC64_LVS(24) + PPC64_LVS(25) + PPC64_LVS(26) + PPC64_LVS(27) + PPC64_LVS(28) + PPC64_LVS(29) + PPC64_LVS(30) + PPC64_LVS(31) + + // use VRSAVE to conditionally restore the remaining VS regs, + // that are where the V regs are mapped + + ld %r5, PPC64_OFFS_VRSAVE(%r3) // test VRsave + cmpwi %r5, 0 + beq Lnovec + +// conditionally load VS +#define PPC64_CLVS_BOTTOM(n) \ + beq Ldone##n ;\ + addi %r4, %r3, PPC64_OFFS_FP + n * 16 ;\ + lxvd2x %vs##n, 0, %r4 ;\ +Ldone##n: + +#define PPC64_CLVSl(n) \ + andis. %r0, %r5, (1<<(47-n)) ;\ +PPC64_CLVS_BOTTOM(n) + +#define PPC64_CLVSh(n) \ + andi. %r0, %r5, (1<<(63-n)) ;\ +PPC64_CLVS_BOTTOM(n) + + PPC64_CLVSl(32) + PPC64_CLVSl(33) + PPC64_CLVSl(34) + PPC64_CLVSl(35) + PPC64_CLVSl(36) + PPC64_CLVSl(37) + PPC64_CLVSl(38) + PPC64_CLVSl(39) + PPC64_CLVSl(40) + PPC64_CLVSl(41) + PPC64_CLVSl(42) + PPC64_CLVSl(43) + PPC64_CLVSl(44) + PPC64_CLVSl(45) + PPC64_CLVSl(46) + PPC64_CLVSl(47) + PPC64_CLVSh(48) + PPC64_CLVSh(49) + PPC64_CLVSh(50) + PPC64_CLVSh(51) + PPC64_CLVSh(52) + PPC64_CLVSh(53) + PPC64_CLVSh(54) + PPC64_CLVSh(55) + PPC64_CLVSh(56) + PPC64_CLVSh(57) + PPC64_CLVSh(58) + PPC64_CLVSh(59) + PPC64_CLVSh(60) + PPC64_CLVSh(61) + PPC64_CLVSh(62) + PPC64_CLVSh(63) + +#else + +// load FP register +#define PPC64_LF(n) \ + lfd %f##n, (PPC64_OFFS_FP + n * 16)(%r3) + + // restore float registers + PPC64_LF(0) + PPC64_LF(1) + PPC64_LF(2) + PPC64_LF(3) + PPC64_LF(4) + PPC64_LF(5) + PPC64_LF(6) + PPC64_LF(7) + PPC64_LF(8) + PPC64_LF(9) + PPC64_LF(10) + PPC64_LF(11) + PPC64_LF(12) + PPC64_LF(13) + PPC64_LF(14) + PPC64_LF(15) + PPC64_LF(16) + PPC64_LF(17) + PPC64_LF(18) + PPC64_LF(19) + PPC64_LF(20) + PPC64_LF(21) + PPC64_LF(22) + PPC64_LF(23) + PPC64_LF(24) + PPC64_LF(25) + PPC64_LF(26) + PPC64_LF(27) + PPC64_LF(28) + PPC64_LF(29) + PPC64_LF(30) + PPC64_LF(31) + + // restore vector registers if any are in use + ld %r5, PPC64_OFFS_VRSAVE(%r3) // test VRsave + cmpwi %r5, 0 + beq Lnovec + + subi %r4, %r1, 16 + // r4 is now a 16-byte aligned pointer into the red zone + // the _vectorScalarRegisters may not be 16-byte aligned + // so copy via red zone temp buffer + +#define PPC64_CLV_UNALIGNED_BOTTOM(n) \ + beq Ldone##n ;\ + ld %r0, (PPC64_OFFS_V + n * 16)(%r3) ;\ + std %r0, 0(%r4) ;\ + ld %r0, (PPC64_OFFS_V + n * 16 + 8)(%r3) ;\ + std %r0, 8(%r4) ;\ + lvx %v##n, 0, %r4 ;\ +Ldone ## n: + +#define PPC64_CLV_UNALIGNEDl(n) \ + andis. %r0, %r5, (1<<(15-n)) ;\ +PPC64_CLV_UNALIGNED_BOTTOM(n) + +#define PPC64_CLV_UNALIGNEDh(n) \ + andi. %r0, %r5, (1<<(31-n)) ;\ +PPC64_CLV_UNALIGNED_BOTTOM(n) + + PPC64_CLV_UNALIGNEDl(0) + PPC64_CLV_UNALIGNEDl(1) + PPC64_CLV_UNALIGNEDl(2) + PPC64_CLV_UNALIGNEDl(3) + PPC64_CLV_UNALIGNEDl(4) + PPC64_CLV_UNALIGNEDl(5) + PPC64_CLV_UNALIGNEDl(6) + PPC64_CLV_UNALIGNEDl(7) + PPC64_CLV_UNALIGNEDl(8) + PPC64_CLV_UNALIGNEDl(9) + PPC64_CLV_UNALIGNEDl(10) + PPC64_CLV_UNALIGNEDl(11) + PPC64_CLV_UNALIGNEDl(12) + PPC64_CLV_UNALIGNEDl(13) + PPC64_CLV_UNALIGNEDl(14) + PPC64_CLV_UNALIGNEDl(15) + PPC64_CLV_UNALIGNEDh(16) + PPC64_CLV_UNALIGNEDh(17) + PPC64_CLV_UNALIGNEDh(18) + PPC64_CLV_UNALIGNEDh(19) + PPC64_CLV_UNALIGNEDh(20) + PPC64_CLV_UNALIGNEDh(21) + PPC64_CLV_UNALIGNEDh(22) + PPC64_CLV_UNALIGNEDh(23) + PPC64_CLV_UNALIGNEDh(24) + PPC64_CLV_UNALIGNEDh(25) + PPC64_CLV_UNALIGNEDh(26) + PPC64_CLV_UNALIGNEDh(27) + PPC64_CLV_UNALIGNEDh(28) + PPC64_CLV_UNALIGNEDh(29) + PPC64_CLV_UNALIGNEDh(30) + PPC64_CLV_UNALIGNEDh(31) + +#endif + +Lnovec: + ld %r0, PPC64_OFFS_CR(%r3) + mtcr %r0 + ld %r0, PPC64_OFFS_SRR0(%r3) + mtctr %r0 + + PPC64_LR(0) + PPC64_LR(5) + PPC64_LR(4) + PPC64_LR(1) + PPC64_LR(3) + bctr + +#elif defined(__ppc__) + +DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind13Registers_ppc6jumptoEv) +// +// void libunwind::Registers_ppc::jumpto() +// +// On entry: +// thread_state pointer is in r3 +// + + // restore integral registerrs + // skip r0 for now + // skip r1 for now + lwz %r2, 16(%r3) + // skip r3 for now + // skip r4 for now + // skip r5 for now + lwz %r6, 32(%r3) + lwz %r7, 36(%r3) + lwz %r8, 40(%r3) + lwz %r9, 44(%r3) + lwz %r10, 48(%r3) + lwz %r11, 52(%r3) + lwz %r12, 56(%r3) + lwz %r13, 60(%r3) + lwz %r14, 64(%r3) + lwz %r15, 68(%r3) + lwz %r16, 72(%r3) + lwz %r17, 76(%r3) + lwz %r18, 80(%r3) + lwz %r19, 84(%r3) + lwz %r20, 88(%r3) + lwz %r21, 92(%r3) + lwz %r22, 96(%r3) + lwz %r23,100(%r3) + lwz %r24,104(%r3) + lwz %r25,108(%r3) + lwz %r26,112(%r3) + lwz %r27,116(%r3) + lwz %r28,120(%r3) + lwz %r29,124(%r3) + lwz %r30,128(%r3) + lwz %r31,132(%r3) + + // restore float registers + lfd %f0, 160(%r3) + lfd %f1, 168(%r3) + lfd %f2, 176(%r3) + lfd %f3, 184(%r3) + lfd %f4, 192(%r3) + lfd %f5, 200(%r3) + lfd %f6, 208(%r3) + lfd %f7, 216(%r3) + lfd %f8, 224(%r3) + lfd %f9, 232(%r3) + lfd %f10,240(%r3) + lfd %f11,248(%r3) + lfd %f12,256(%r3) + lfd %f13,264(%r3) + lfd %f14,272(%r3) + lfd %f15,280(%r3) + lfd %f16,288(%r3) + lfd %f17,296(%r3) + lfd %f18,304(%r3) + lfd %f19,312(%r3) + lfd %f20,320(%r3) + lfd %f21,328(%r3) + lfd %f22,336(%r3) + lfd %f23,344(%r3) + lfd %f24,352(%r3) + lfd %f25,360(%r3) + lfd %f26,368(%r3) + lfd %f27,376(%r3) + lfd %f28,384(%r3) + lfd %f29,392(%r3) + lfd %f30,400(%r3) + lfd %f31,408(%r3) + + // restore vector registers if any are in use + lwz %r5, 156(%r3) // test VRsave + cmpwi %r5, 0 + beq Lnovec + + subi %r4, %r1, 16 + rlwinm %r4, %r4, 0, 0, 27 // mask low 4-bits + // r4 is now a 16-byte aligned pointer into the red zone + // the _vectorRegisters may not be 16-byte aligned so copy via red zone temp buffer + + +#define LOAD_VECTOR_UNALIGNEDl(_index) \ + andis. %r0, %r5, (1<<(15-_index)) SEPARATOR \ + beq Ldone ## _index SEPARATOR \ + lwz %r0, 424+_index*16(%r3) SEPARATOR \ + stw %r0, 0(%r4) SEPARATOR \ + lwz %r0, 424+_index*16+4(%r3) SEPARATOR \ + stw %r0, 4(%r4) SEPARATOR \ + lwz %r0, 424+_index*16+8(%r3) SEPARATOR \ + stw %r0, 8(%r4) SEPARATOR \ + lwz %r0, 424+_index*16+12(%r3) SEPARATOR \ + stw %r0, 12(%r4) SEPARATOR \ + lvx %v ## _index, 0, %r4 SEPARATOR \ + Ldone ## _index: + +#define LOAD_VECTOR_UNALIGNEDh(_index) \ + andi. %r0, %r5, (1<<(31-_index)) SEPARATOR \ + beq Ldone ## _index SEPARATOR \ + lwz %r0, 424+_index*16(%r3) SEPARATOR \ + stw %r0, 0(%r4) SEPARATOR \ + lwz %r0, 424+_index*16+4(%r3) SEPARATOR \ + stw %r0, 4(%r4) SEPARATOR \ + lwz %r0, 424+_index*16+8(%r3) SEPARATOR \ + stw %r0, 8(%r4) SEPARATOR \ + lwz %r0, 424+_index*16+12(%r3) SEPARATOR \ + stw %r0, 12(%r4) SEPARATOR \ + lvx %v ## _index, 0, %r4 SEPARATOR \ + Ldone ## _index: + + + LOAD_VECTOR_UNALIGNEDl(0) + LOAD_VECTOR_UNALIGNEDl(1) + LOAD_VECTOR_UNALIGNEDl(2) + LOAD_VECTOR_UNALIGNEDl(3) + LOAD_VECTOR_UNALIGNEDl(4) + LOAD_VECTOR_UNALIGNEDl(5) + LOAD_VECTOR_UNALIGNEDl(6) + LOAD_VECTOR_UNALIGNEDl(7) + LOAD_VECTOR_UNALIGNEDl(8) + LOAD_VECTOR_UNALIGNEDl(9) + LOAD_VECTOR_UNALIGNEDl(10) + LOAD_VECTOR_UNALIGNEDl(11) + LOAD_VECTOR_UNALIGNEDl(12) + LOAD_VECTOR_UNALIGNEDl(13) + LOAD_VECTOR_UNALIGNEDl(14) + LOAD_VECTOR_UNALIGNEDl(15) + LOAD_VECTOR_UNALIGNEDh(16) + LOAD_VECTOR_UNALIGNEDh(17) + LOAD_VECTOR_UNALIGNEDh(18) + LOAD_VECTOR_UNALIGNEDh(19) + LOAD_VECTOR_UNALIGNEDh(20) + LOAD_VECTOR_UNALIGNEDh(21) + LOAD_VECTOR_UNALIGNEDh(22) + LOAD_VECTOR_UNALIGNEDh(23) + LOAD_VECTOR_UNALIGNEDh(24) + LOAD_VECTOR_UNALIGNEDh(25) + LOAD_VECTOR_UNALIGNEDh(26) + LOAD_VECTOR_UNALIGNEDh(27) + LOAD_VECTOR_UNALIGNEDh(28) + LOAD_VECTOR_UNALIGNEDh(29) + LOAD_VECTOR_UNALIGNEDh(30) + LOAD_VECTOR_UNALIGNEDh(31) + +Lnovec: + lwz %r0, 136(%r3) // __cr + mtcr %r0 + lwz %r0, 148(%r3) // __ctr + mtctr %r0 + lwz %r0, 0(%r3) // __ssr0 + mtctr %r0 + lwz %r0, 8(%r3) // do r0 now + lwz %r5, 28(%r3) // do r5 now + lwz %r4, 24(%r3) // do r4 now + lwz %r1, 12(%r3) // do sp now + lwz %r3, 20(%r3) // do r3 last + bctr + +#elif defined(__arm64__) || defined(__aarch64__) + +// +// void libunwind::Registers_arm64::jumpto() +// +// On entry: +// thread_state pointer is in x0 +// + .p2align 2 +DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind15Registers_arm646jumptoEv) + // skip restore of x0,x1 for now + ldp x2, x3, [x0, #0x010] + ldp x4, x5, [x0, #0x020] + ldp x6, x7, [x0, #0x030] + ldp x8, x9, [x0, #0x040] + ldp x10,x11, [x0, #0x050] + ldp x12,x13, [x0, #0x060] + ldp x14,x15, [x0, #0x070] + ldp x16,x17, [x0, #0x080] + ldp x18,x19, [x0, #0x090] + ldp x20,x21, [x0, #0x0A0] + ldp x22,x23, [x0, #0x0B0] + ldp x24,x25, [x0, #0x0C0] + ldp x26,x27, [x0, #0x0D0] + ldp x28,x29, [x0, #0x0E0] + ldr x30, [x0, #0x100] // restore pc into lr + ldr x1, [x0, #0x0F8] + mov sp,x1 // restore sp + + ldp d0, d1, [x0, #0x110] + ldp d2, d3, [x0, #0x120] + ldp d4, d5, [x0, #0x130] + ldp d6, d7, [x0, #0x140] + ldp d8, d9, [x0, #0x150] + ldp d10,d11, [x0, #0x160] + ldp d12,d13, [x0, #0x170] + ldp d14,d15, [x0, #0x180] + ldp d16,d17, [x0, #0x190] + ldp d18,d19, [x0, #0x1A0] + ldp d20,d21, [x0, #0x1B0] + ldp d22,d23, [x0, #0x1C0] + ldp d24,d25, [x0, #0x1D0] + ldp d26,d27, [x0, #0x1E0] + ldp d28,d29, [x0, #0x1F0] + ldr d30, [x0, #0x200] + ldr d31, [x0, #0x208] + + ldp x0, x1, [x0, #0x000] // restore x0,x1 + ret x30 // jump to pc + +#elif defined(__arm__) && !defined(__APPLE__) + +#if !defined(__ARM_ARCH_ISA_ARM) +#if (__ARM_ARCH_ISA_THUMB == 2) + .syntax unified +#endif + .thumb +#endif + +@ +@ void libunwind::Registers_arm::restoreCoreAndJumpTo() +@ +@ On entry: +@ thread_state pointer is in r0 +@ + .p2align 2 +DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind13Registers_arm20restoreCoreAndJumpToEv) +#if !defined(__ARM_ARCH_ISA_ARM) && __ARM_ARCH_ISA_THUMB == 1 + @ r8-r11: ldm into r1-r4, then mov to r8-r11 + adds r0, #0x20 + ldm r0!, {r1-r4} + subs r0, #0x30 + mov r8, r1 + mov r9, r2 + mov r10, r3 + mov r11, r4 + @ r12 does not need loading, it it the intra-procedure-call scratch register + ldr r2, [r0, #0x34] + ldr r3, [r0, #0x3c] + mov sp, r2 + mov lr, r3 @ restore pc into lr + ldm r0, {r0-r7} +#else + @ Use lr as base so that r0 can be restored. + mov lr, r0 + @ 32bit thumb-2 restrictions for ldm: + @ . the sp (r13) cannot be in the list + @ . the pc (r15) and lr (r14) cannot both be in the list in an LDM instruction + ldm lr, {r0-r12} + ldr sp, [lr, #52] + ldr lr, [lr, #60] @ restore pc into lr +#endif + JMP(lr) + +@ +@ static void libunwind::Registers_arm::restoreVFPWithFLDMD(unw_fpreg_t* values) +@ +@ On entry: +@ values pointer is in r0 +@ + .p2align 2 +#if defined(__ELF__) + .fpu vfpv3-d16 +#endif +DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind13Registers_arm19restoreVFPWithFLDMDEPv) + @ VFP and iwMMX instructions are only available when compiling with the flags + @ that enable them. We do not want to do that in the library (because we do not + @ want the compiler to generate instructions that access those) but this is + @ only accessed if the personality routine needs these registers. Use of + @ these registers implies they are, actually, available on the target, so + @ it's ok to execute. + @ So, generate the instruction using the corresponding coprocessor mnemonic. + vldmia r0, {d0-d15} + JMP(lr) + +@ +@ static void libunwind::Registers_arm::restoreVFPWithFLDMX(unw_fpreg_t* values) +@ +@ On entry: +@ values pointer is in r0 +@ + .p2align 2 +#if defined(__ELF__) + .fpu vfpv3-d16 +#endif +DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind13Registers_arm19restoreVFPWithFLDMXEPv) + vldmia r0, {d0-d15} @ fldmiax is deprecated in ARMv7+ and now behaves like vldmia + JMP(lr) + +@ +@ static void libunwind::Registers_arm::restoreVFPv3(unw_fpreg_t* values) +@ +@ On entry: +@ values pointer is in r0 +@ + .p2align 2 +#if defined(__ELF__) + .fpu vfpv3 +#endif +DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind13Registers_arm12restoreVFPv3EPv) + vldmia r0, {d16-d31} + JMP(lr) + +#if defined(__ARM_WMMX) + +@ +@ static void libunwind::Registers_arm::restoreiWMMX(unw_fpreg_t* values) +@ +@ On entry: +@ values pointer is in r0 +@ + .p2align 2 +#if defined(__ELF__) + .arch armv5te +#endif +DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind13Registers_arm12restoreiWMMXEPv) + ldcl p1, cr0, [r0], #8 @ wldrd wR0, [r0], #8 + ldcl p1, cr1, [r0], #8 @ wldrd wR1, [r0], #8 + ldcl p1, cr2, [r0], #8 @ wldrd wR2, [r0], #8 + ldcl p1, cr3, [r0], #8 @ wldrd wR3, [r0], #8 + ldcl p1, cr4, [r0], #8 @ wldrd wR4, [r0], #8 + ldcl p1, cr5, [r0], #8 @ wldrd wR5, [r0], #8 + ldcl p1, cr6, [r0], #8 @ wldrd wR6, [r0], #8 + ldcl p1, cr7, [r0], #8 @ wldrd wR7, [r0], #8 + ldcl p1, cr8, [r0], #8 @ wldrd wR8, [r0], #8 + ldcl p1, cr9, [r0], #8 @ wldrd wR9, [r0], #8 + ldcl p1, cr10, [r0], #8 @ wldrd wR10, [r0], #8 + ldcl p1, cr11, [r0], #8 @ wldrd wR11, [r0], #8 + ldcl p1, cr12, [r0], #8 @ wldrd wR12, [r0], #8 + ldcl p1, cr13, [r0], #8 @ wldrd wR13, [r0], #8 + ldcl p1, cr14, [r0], #8 @ wldrd wR14, [r0], #8 + ldcl p1, cr15, [r0], #8 @ wldrd wR15, [r0], #8 + JMP(lr) + +@ +@ static void libunwind::Registers_arm::restoreiWMMXControl(unw_uint32_t* values) +@ +@ On entry: +@ values pointer is in r0 +@ + .p2align 2 +#if defined(__ELF__) + .arch armv5te +#endif +DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind13Registers_arm19restoreiWMMXControlEPj) + ldc2 p1, cr8, [r0], #4 @ wldrw wCGR0, [r0], #4 + ldc2 p1, cr9, [r0], #4 @ wldrw wCGR1, [r0], #4 + ldc2 p1, cr10, [r0], #4 @ wldrw wCGR2, [r0], #4 + ldc2 p1, cr11, [r0], #4 @ wldrw wCGR3, [r0], #4 + JMP(lr) + +#endif + +#elif defined(__or1k__) + +DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind14Registers_or1k6jumptoEv) +# +# void libunwind::Registers_or1k::jumpto() +# +# On entry: +# thread_state pointer is in r3 +# + + # restore integral registers + l.lwz r0, 0(r3) + l.lwz r1, 4(r3) + l.lwz r2, 8(r3) + # skip r3 for now + l.lwz r4, 16(r3) + l.lwz r5, 20(r3) + l.lwz r6, 24(r3) + l.lwz r7, 28(r3) + l.lwz r8, 32(r3) + # skip r9 + l.lwz r10, 40(r3) + l.lwz r11, 44(r3) + l.lwz r12, 48(r3) + l.lwz r13, 52(r3) + l.lwz r14, 56(r3) + l.lwz r15, 60(r3) + l.lwz r16, 64(r3) + l.lwz r17, 68(r3) + l.lwz r18, 72(r3) + l.lwz r19, 76(r3) + l.lwz r20, 80(r3) + l.lwz r21, 84(r3) + l.lwz r22, 88(r3) + l.lwz r23, 92(r3) + l.lwz r24, 96(r3) + l.lwz r25,100(r3) + l.lwz r26,104(r3) + l.lwz r27,108(r3) + l.lwz r28,112(r3) + l.lwz r29,116(r3) + l.lwz r30,120(r3) + l.lwz r31,124(r3) + + # at last, restore r3 + l.lwz r3, 12(r3) + + # load new pc into ra + l.lwz r9, 128(r3) + # jump to pc + l.jr r9 + l.nop + +#elif defined(__mips__) && defined(_ABIO32) && _MIPS_SIM == _ABIO32 + +// +// void libunwind::Registers_mips_o32::jumpto() +// +// On entry: +// thread state pointer is in a0 ($4) +// +DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind18Registers_mips_o326jumptoEv) + .set push + .set noat + .set noreorder + .set nomacro +#ifdef __mips_hard_float +#if __mips_fpr != 64 + ldc1 $f0, (4 * 36 + 8 * 0)($4) + ldc1 $f2, (4 * 36 + 8 * 2)($4) + ldc1 $f4, (4 * 36 + 8 * 4)($4) + ldc1 $f6, (4 * 36 + 8 * 6)($4) + ldc1 $f8, (4 * 36 + 8 * 8)($4) + ldc1 $f10, (4 * 36 + 8 * 10)($4) + ldc1 $f12, (4 * 36 + 8 * 12)($4) + ldc1 $f14, (4 * 36 + 8 * 14)($4) + ldc1 $f16, (4 * 36 + 8 * 16)($4) + ldc1 $f18, (4 * 36 + 8 * 18)($4) + ldc1 $f20, (4 * 36 + 8 * 20)($4) + ldc1 $f22, (4 * 36 + 8 * 22)($4) + ldc1 $f24, (4 * 36 + 8 * 24)($4) + ldc1 $f26, (4 * 36 + 8 * 26)($4) + ldc1 $f28, (4 * 36 + 8 * 28)($4) + ldc1 $f30, (4 * 36 + 8 * 30)($4) +#else + ldc1 $f0, (4 * 36 + 8 * 0)($4) + ldc1 $f1, (4 * 36 + 8 * 1)($4) + ldc1 $f2, (4 * 36 + 8 * 2)($4) + ldc1 $f3, (4 * 36 + 8 * 3)($4) + ldc1 $f4, (4 * 36 + 8 * 4)($4) + ldc1 $f5, (4 * 36 + 8 * 5)($4) + ldc1 $f6, (4 * 36 + 8 * 6)($4) + ldc1 $f7, (4 * 36 + 8 * 7)($4) + ldc1 $f8, (4 * 36 + 8 * 8)($4) + ldc1 $f9, (4 * 36 + 8 * 9)($4) + ldc1 $f10, (4 * 36 + 8 * 10)($4) + ldc1 $f11, (4 * 36 + 8 * 11)($4) + ldc1 $f12, (4 * 36 + 8 * 12)($4) + ldc1 $f13, (4 * 36 + 8 * 13)($4) + ldc1 $f14, (4 * 36 + 8 * 14)($4) + ldc1 $f15, (4 * 36 + 8 * 15)($4) + ldc1 $f16, (4 * 36 + 8 * 16)($4) + ldc1 $f17, (4 * 36 + 8 * 17)($4) + ldc1 $f18, (4 * 36 + 8 * 18)($4) + ldc1 $f19, (4 * 36 + 8 * 19)($4) + ldc1 $f20, (4 * 36 + 8 * 20)($4) + ldc1 $f21, (4 * 36 + 8 * 21)($4) + ldc1 $f22, (4 * 36 + 8 * 22)($4) + ldc1 $f23, (4 * 36 + 8 * 23)($4) + ldc1 $f24, (4 * 36 + 8 * 24)($4) + ldc1 $f25, (4 * 36 + 8 * 25)($4) + ldc1 $f26, (4 * 36 + 8 * 26)($4) + ldc1 $f27, (4 * 36 + 8 * 27)($4) + ldc1 $f28, (4 * 36 + 8 * 28)($4) + ldc1 $f29, (4 * 36 + 8 * 29)($4) + ldc1 $f30, (4 * 36 + 8 * 30)($4) + ldc1 $f31, (4 * 36 + 8 * 31)($4) +#endif +#endif + // restore hi and lo + lw $8, (4 * 33)($4) + mthi $8 + lw $8, (4 * 34)($4) + mtlo $8 + // r0 is zero + lw $1, (4 * 1)($4) + lw $2, (4 * 2)($4) + lw $3, (4 * 3)($4) + // skip a0 for now + lw $5, (4 * 5)($4) + lw $6, (4 * 6)($4) + lw $7, (4 * 7)($4) + lw $8, (4 * 8)($4) + lw $9, (4 * 9)($4) + lw $10, (4 * 10)($4) + lw $11, (4 * 11)($4) + lw $12, (4 * 12)($4) + lw $13, (4 * 13)($4) + lw $14, (4 * 14)($4) + lw $15, (4 * 15)($4) + lw $16, (4 * 16)($4) + lw $17, (4 * 17)($4) + lw $18, (4 * 18)($4) + lw $19, (4 * 19)($4) + lw $20, (4 * 20)($4) + lw $21, (4 * 21)($4) + lw $22, (4 * 22)($4) + lw $23, (4 * 23)($4) + lw $24, (4 * 24)($4) + lw $25, (4 * 25)($4) + lw $26, (4 * 26)($4) + lw $27, (4 * 27)($4) + lw $28, (4 * 28)($4) + lw $29, (4 * 29)($4) + lw $30, (4 * 30)($4) + // load new pc into ra + lw $31, (4 * 32)($4) + // jump to ra, load a0 in the delay slot + jr $31 + lw $4, (4 * 4)($4) + .set pop + +#elif defined(__mips64) + +// +// void libunwind::Registers_mips_newabi::jumpto() +// +// On entry: +// thread state pointer is in a0 ($4) +// +DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind21Registers_mips_newabi6jumptoEv) + .set push + .set noat + .set noreorder + .set nomacro +#ifdef __mips_hard_float + ldc1 $f0, (8 * 35)($4) + ldc1 $f1, (8 * 36)($4) + ldc1 $f2, (8 * 37)($4) + ldc1 $f3, (8 * 38)($4) + ldc1 $f4, (8 * 39)($4) + ldc1 $f5, (8 * 40)($4) + ldc1 $f6, (8 * 41)($4) + ldc1 $f7, (8 * 42)($4) + ldc1 $f8, (8 * 43)($4) + ldc1 $f9, (8 * 44)($4) + ldc1 $f10, (8 * 45)($4) + ldc1 $f11, (8 * 46)($4) + ldc1 $f12, (8 * 47)($4) + ldc1 $f13, (8 * 48)($4) + ldc1 $f14, (8 * 49)($4) + ldc1 $f15, (8 * 50)($4) + ldc1 $f16, (8 * 51)($4) + ldc1 $f17, (8 * 52)($4) + ldc1 $f18, (8 * 53)($4) + ldc1 $f19, (8 * 54)($4) + ldc1 $f20, (8 * 55)($4) + ldc1 $f21, (8 * 56)($4) + ldc1 $f22, (8 * 57)($4) + ldc1 $f23, (8 * 58)($4) + ldc1 $f24, (8 * 59)($4) + ldc1 $f25, (8 * 60)($4) + ldc1 $f26, (8 * 61)($4) + ldc1 $f27, (8 * 62)($4) + ldc1 $f28, (8 * 63)($4) + ldc1 $f29, (8 * 64)($4) + ldc1 $f30, (8 * 65)($4) + ldc1 $f31, (8 * 66)($4) +#endif + // restore hi and lo + ld $8, (8 * 33)($4) + mthi $8 + ld $8, (8 * 34)($4) + mtlo $8 + // r0 is zero + ld $1, (8 * 1)($4) + ld $2, (8 * 2)($4) + ld $3, (8 * 3)($4) + // skip a0 for now + ld $5, (8 * 5)($4) + ld $6, (8 * 6)($4) + ld $7, (8 * 7)($4) + ld $8, (8 * 8)($4) + ld $9, (8 * 9)($4) + ld $10, (8 * 10)($4) + ld $11, (8 * 11)($4) + ld $12, (8 * 12)($4) + ld $13, (8 * 13)($4) + ld $14, (8 * 14)($4) + ld $15, (8 * 15)($4) + ld $16, (8 * 16)($4) + ld $17, (8 * 17)($4) + ld $18, (8 * 18)($4) + ld $19, (8 * 19)($4) + ld $20, (8 * 20)($4) + ld $21, (8 * 21)($4) + ld $22, (8 * 22)($4) + ld $23, (8 * 23)($4) + ld $24, (8 * 24)($4) + ld $25, (8 * 25)($4) + ld $26, (8 * 26)($4) + ld $27, (8 * 27)($4) + ld $28, (8 * 28)($4) + ld $29, (8 * 29)($4) + ld $30, (8 * 30)($4) + // load new pc into ra + ld $31, (8 * 32)($4) + // jump to ra, load a0 in the delay slot + jr $31 + ld $4, (8 * 4)($4) + .set pop + +#elif defined(__sparc__) + +// +// void libunwind::Registers_sparc_o32::jumpto() +// +// On entry: +// thread_state pointer is in o0 +// +DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind15Registers_sparc6jumptoEv) + ta 3 + ldd [%o0 + 64], %l0 + ldd [%o0 + 72], %l2 + ldd [%o0 + 80], %l4 + ldd [%o0 + 88], %l6 + ldd [%o0 + 96], %i0 + ldd [%o0 + 104], %i2 + ldd [%o0 + 112], %i4 + ldd [%o0 + 120], %i6 + ld [%o0 + 60], %o7 + jmp %o7 + nop + +#endif + +#endif /* !defined(__USING_SJLJ_EXCEPTIONS__) */ + +NO_EXEC_STACK_DIRECTIVE + diff --git a/src/coreclr/src/nativeaot/libunwind/src/UnwindRegistersSave.S b/src/coreclr/src/nativeaot/libunwind/src/UnwindRegistersSave.S new file mode 100644 index 0000000000000..54505e53bac70 --- /dev/null +++ b/src/coreclr/src/nativeaot/libunwind/src/UnwindRegistersSave.S @@ -0,0 +1,983 @@ +//===------------------------ UnwindRegistersSave.S -----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "assembly.h" + + .text + +#if !defined(__USING_SJLJ_EXCEPTIONS__) + +#if defined(__i386__) + +# +# extern int __unw_getcontext(unw_context_t* thread_state) +# +# On entry: +# + + +# +-----------------------+ +# + thread_state pointer + +# +-----------------------+ +# + return address + +# +-----------------------+ <-- SP +# + + +# +DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext) + push %eax + movl 8(%esp), %eax + movl %ebx, 4(%eax) + movl %ecx, 8(%eax) + movl %edx, 12(%eax) + movl %edi, 16(%eax) + movl %esi, 20(%eax) + movl %ebp, 24(%eax) + movl %esp, %edx + addl $8, %edx + movl %edx, 28(%eax) # store what sp was at call site as esp + # skip ss + # skip eflags + movl 4(%esp), %edx + movl %edx, 40(%eax) # store return address as eip + # skip cs + # skip ds + # skip es + # skip fs + # skip gs + movl (%esp), %edx + movl %edx, (%eax) # store original eax + popl %eax + xorl %eax, %eax # return UNW_ESUCCESS + ret + +#elif defined(__x86_64__) + +# +# extern int __unw_getcontext(unw_context_t* thread_state) +# +# On entry: +# thread_state pointer is in rdi +# +DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext) +#if defined(_WIN64) +#define PTR %rcx +#define TMP %rdx +#else +#define PTR %rdi +#define TMP %rsi +#endif + + movq %rax, (PTR) + movq %rbx, 8(PTR) + movq %rcx, 16(PTR) + movq %rdx, 24(PTR) + movq %rdi, 32(PTR) + movq %rsi, 40(PTR) + movq %rbp, 48(PTR) + movq %rsp, 56(PTR) + addq $8, 56(PTR) + movq %r8, 64(PTR) + movq %r9, 72(PTR) + movq %r10, 80(PTR) + movq %r11, 88(PTR) + movq %r12, 96(PTR) + movq %r13,104(PTR) + movq %r14,112(PTR) + movq %r15,120(PTR) + movq (%rsp),TMP + movq TMP,128(PTR) # store return address as rip + # skip rflags + # skip cs + # skip fs + # skip gs + +#if defined(_WIN64) + movdqu %xmm0,176(PTR) + movdqu %xmm1,192(PTR) + movdqu %xmm2,208(PTR) + movdqu %xmm3,224(PTR) + movdqu %xmm4,240(PTR) + movdqu %xmm5,256(PTR) + movdqu %xmm6,272(PTR) + movdqu %xmm7,288(PTR) + movdqu %xmm8,304(PTR) + movdqu %xmm9,320(PTR) + movdqu %xmm10,336(PTR) + movdqu %xmm11,352(PTR) + movdqu %xmm12,368(PTR) + movdqu %xmm13,384(PTR) + movdqu %xmm14,400(PTR) + movdqu %xmm15,416(PTR) +#endif + xorl %eax, %eax # return UNW_ESUCCESS + ret + +#elif defined(__mips__) && defined(_ABIO32) && _MIPS_SIM == _ABIO32 + +# +# extern int __unw_getcontext(unw_context_t* thread_state) +# +# On entry: +# thread_state pointer is in a0 ($4) +# +DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext) + .set push + .set noat + .set noreorder + .set nomacro + sw $1, (4 * 1)($4) + sw $2, (4 * 2)($4) + sw $3, (4 * 3)($4) + sw $4, (4 * 4)($4) + sw $5, (4 * 5)($4) + sw $6, (4 * 6)($4) + sw $7, (4 * 7)($4) + sw $8, (4 * 8)($4) + sw $9, (4 * 9)($4) + sw $10, (4 * 10)($4) + sw $11, (4 * 11)($4) + sw $12, (4 * 12)($4) + sw $13, (4 * 13)($4) + sw $14, (4 * 14)($4) + sw $15, (4 * 15)($4) + sw $16, (4 * 16)($4) + sw $17, (4 * 17)($4) + sw $18, (4 * 18)($4) + sw $19, (4 * 19)($4) + sw $20, (4 * 20)($4) + sw $21, (4 * 21)($4) + sw $22, (4 * 22)($4) + sw $23, (4 * 23)($4) + sw $24, (4 * 24)($4) + sw $25, (4 * 25)($4) + sw $26, (4 * 26)($4) + sw $27, (4 * 27)($4) + sw $28, (4 * 28)($4) + sw $29, (4 * 29)($4) + sw $30, (4 * 30)($4) + sw $31, (4 * 31)($4) + # Store return address to pc + sw $31, (4 * 32)($4) + # hi and lo + mfhi $8 + sw $8, (4 * 33)($4) + mflo $8 + sw $8, (4 * 34)($4) +#ifdef __mips_hard_float +#if __mips_fpr != 64 + sdc1 $f0, (4 * 36 + 8 * 0)($4) + sdc1 $f2, (4 * 36 + 8 * 2)($4) + sdc1 $f4, (4 * 36 + 8 * 4)($4) + sdc1 $f6, (4 * 36 + 8 * 6)($4) + sdc1 $f8, (4 * 36 + 8 * 8)($4) + sdc1 $f10, (4 * 36 + 8 * 10)($4) + sdc1 $f12, (4 * 36 + 8 * 12)($4) + sdc1 $f14, (4 * 36 + 8 * 14)($4) + sdc1 $f16, (4 * 36 + 8 * 16)($4) + sdc1 $f18, (4 * 36 + 8 * 18)($4) + sdc1 $f20, (4 * 36 + 8 * 20)($4) + sdc1 $f22, (4 * 36 + 8 * 22)($4) + sdc1 $f24, (4 * 36 + 8 * 24)($4) + sdc1 $f26, (4 * 36 + 8 * 26)($4) + sdc1 $f28, (4 * 36 + 8 * 28)($4) + sdc1 $f30, (4 * 36 + 8 * 30)($4) +#else + sdc1 $f0, (4 * 36 + 8 * 0)($4) + sdc1 $f1, (4 * 36 + 8 * 1)($4) + sdc1 $f2, (4 * 36 + 8 * 2)($4) + sdc1 $f3, (4 * 36 + 8 * 3)($4) + sdc1 $f4, (4 * 36 + 8 * 4)($4) + sdc1 $f5, (4 * 36 + 8 * 5)($4) + sdc1 $f6, (4 * 36 + 8 * 6)($4) + sdc1 $f7, (4 * 36 + 8 * 7)($4) + sdc1 $f8, (4 * 36 + 8 * 8)($4) + sdc1 $f9, (4 * 36 + 8 * 9)($4) + sdc1 $f10, (4 * 36 + 8 * 10)($4) + sdc1 $f11, (4 * 36 + 8 * 11)($4) + sdc1 $f12, (4 * 36 + 8 * 12)($4) + sdc1 $f13, (4 * 36 + 8 * 13)($4) + sdc1 $f14, (4 * 36 + 8 * 14)($4) + sdc1 $f15, (4 * 36 + 8 * 15)($4) + sdc1 $f16, (4 * 36 + 8 * 16)($4) + sdc1 $f17, (4 * 36 + 8 * 17)($4) + sdc1 $f18, (4 * 36 + 8 * 18)($4) + sdc1 $f19, (4 * 36 + 8 * 19)($4) + sdc1 $f20, (4 * 36 + 8 * 20)($4) + sdc1 $f21, (4 * 36 + 8 * 21)($4) + sdc1 $f22, (4 * 36 + 8 * 22)($4) + sdc1 $f23, (4 * 36 + 8 * 23)($4) + sdc1 $f24, (4 * 36 + 8 * 24)($4) + sdc1 $f25, (4 * 36 + 8 * 25)($4) + sdc1 $f26, (4 * 36 + 8 * 26)($4) + sdc1 $f27, (4 * 36 + 8 * 27)($4) + sdc1 $f28, (4 * 36 + 8 * 28)($4) + sdc1 $f29, (4 * 36 + 8 * 29)($4) + sdc1 $f30, (4 * 36 + 8 * 30)($4) + sdc1 $f31, (4 * 36 + 8 * 31)($4) +#endif +#endif + jr $31 + # return UNW_ESUCCESS + or $2, $0, $0 + .set pop + +#elif defined(__mips64) + +# +# extern int __unw_getcontext(unw_context_t* thread_state) +# +# On entry: +# thread_state pointer is in a0 ($4) +# +DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext) + .set push + .set noat + .set noreorder + .set nomacro + sd $1, (8 * 1)($4) + sd $2, (8 * 2)($4) + sd $3, (8 * 3)($4) + sd $4, (8 * 4)($4) + sd $5, (8 * 5)($4) + sd $6, (8 * 6)($4) + sd $7, (8 * 7)($4) + sd $8, (8 * 8)($4) + sd $9, (8 * 9)($4) + sd $10, (8 * 10)($4) + sd $11, (8 * 11)($4) + sd $12, (8 * 12)($4) + sd $13, (8 * 13)($4) + sd $14, (8 * 14)($4) + sd $15, (8 * 15)($4) + sd $16, (8 * 16)($4) + sd $17, (8 * 17)($4) + sd $18, (8 * 18)($4) + sd $19, (8 * 19)($4) + sd $20, (8 * 20)($4) + sd $21, (8 * 21)($4) + sd $22, (8 * 22)($4) + sd $23, (8 * 23)($4) + sd $24, (8 * 24)($4) + sd $25, (8 * 25)($4) + sd $26, (8 * 26)($4) + sd $27, (8 * 27)($4) + sd $28, (8 * 28)($4) + sd $29, (8 * 29)($4) + sd $30, (8 * 30)($4) + sd $31, (8 * 31)($4) + # Store return address to pc + sd $31, (8 * 32)($4) + # hi and lo + mfhi $8 + sd $8, (8 * 33)($4) + mflo $8 + sd $8, (8 * 34)($4) +#ifdef __mips_hard_float + sdc1 $f0, (8 * 35)($4) + sdc1 $f1, (8 * 36)($4) + sdc1 $f2, (8 * 37)($4) + sdc1 $f3, (8 * 38)($4) + sdc1 $f4, (8 * 39)($4) + sdc1 $f5, (8 * 40)($4) + sdc1 $f6, (8 * 41)($4) + sdc1 $f7, (8 * 42)($4) + sdc1 $f8, (8 * 43)($4) + sdc1 $f9, (8 * 44)($4) + sdc1 $f10, (8 * 45)($4) + sdc1 $f11, (8 * 46)($4) + sdc1 $f12, (8 * 47)($4) + sdc1 $f13, (8 * 48)($4) + sdc1 $f14, (8 * 49)($4) + sdc1 $f15, (8 * 50)($4) + sdc1 $f16, (8 * 51)($4) + sdc1 $f17, (8 * 52)($4) + sdc1 $f18, (8 * 53)($4) + sdc1 $f19, (8 * 54)($4) + sdc1 $f20, (8 * 55)($4) + sdc1 $f21, (8 * 56)($4) + sdc1 $f22, (8 * 57)($4) + sdc1 $f23, (8 * 58)($4) + sdc1 $f24, (8 * 59)($4) + sdc1 $f25, (8 * 60)($4) + sdc1 $f26, (8 * 61)($4) + sdc1 $f27, (8 * 62)($4) + sdc1 $f28, (8 * 63)($4) + sdc1 $f29, (8 * 64)($4) + sdc1 $f30, (8 * 65)($4) + sdc1 $f31, (8 * 66)($4) +#endif + jr $31 + # return UNW_ESUCCESS + or $2, $0, $0 + .set pop + +# elif defined(__mips__) + +# +# extern int __unw_getcontext(unw_context_t* thread_state) +# +# Just trap for the time being. +DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext) + teq $0, $0 + +#elif defined(__powerpc64__) + +// +// extern int __unw_getcontext(unw_context_t* thread_state) +// +// On entry: +// thread_state pointer is in r3 +// +DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext) + +// store register (GPR) +#define PPC64_STR(n) \ + std %r##n, (8 * (n + 2))(%r3) + + // save GPRs + PPC64_STR(0) + mflr %r0 + std %r0, PPC64_OFFS_SRR0(%r3) // store lr as ssr0 + PPC64_STR(1) + PPC64_STR(2) + PPC64_STR(3) + PPC64_STR(4) + PPC64_STR(5) + PPC64_STR(6) + PPC64_STR(7) + PPC64_STR(8) + PPC64_STR(9) + PPC64_STR(10) + PPC64_STR(11) + PPC64_STR(12) + PPC64_STR(13) + PPC64_STR(14) + PPC64_STR(15) + PPC64_STR(16) + PPC64_STR(17) + PPC64_STR(18) + PPC64_STR(19) + PPC64_STR(20) + PPC64_STR(21) + PPC64_STR(22) + PPC64_STR(23) + PPC64_STR(24) + PPC64_STR(25) + PPC64_STR(26) + PPC64_STR(27) + PPC64_STR(28) + PPC64_STR(29) + PPC64_STR(30) + PPC64_STR(31) + + mfcr %r0 + std %r0, PPC64_OFFS_CR(%r3) + mfxer %r0 + std %r0, PPC64_OFFS_XER(%r3) + mflr %r0 + std %r0, PPC64_OFFS_LR(%r3) + mfctr %r0 + std %r0, PPC64_OFFS_CTR(%r3) + mfvrsave %r0 + std %r0, PPC64_OFFS_VRSAVE(%r3) + +#ifdef PPC64_HAS_VMX + // save VS registers + // (note that this also saves floating point registers and V registers, + // because part of VS is mapped to these registers) + + addi %r4, %r3, PPC64_OFFS_FP + +// store VS register +#define PPC64_STVS(n) \ + stxvd2x %vs##n, 0, %r4 ;\ + addi %r4, %r4, 16 + + PPC64_STVS(0) + PPC64_STVS(1) + PPC64_STVS(2) + PPC64_STVS(3) + PPC64_STVS(4) + PPC64_STVS(5) + PPC64_STVS(6) + PPC64_STVS(7) + PPC64_STVS(8) + PPC64_STVS(9) + PPC64_STVS(10) + PPC64_STVS(11) + PPC64_STVS(12) + PPC64_STVS(13) + PPC64_STVS(14) + PPC64_STVS(15) + PPC64_STVS(16) + PPC64_STVS(17) + PPC64_STVS(18) + PPC64_STVS(19) + PPC64_STVS(20) + PPC64_STVS(21) + PPC64_STVS(22) + PPC64_STVS(23) + PPC64_STVS(24) + PPC64_STVS(25) + PPC64_STVS(26) + PPC64_STVS(27) + PPC64_STVS(28) + PPC64_STVS(29) + PPC64_STVS(30) + PPC64_STVS(31) + PPC64_STVS(32) + PPC64_STVS(33) + PPC64_STVS(34) + PPC64_STVS(35) + PPC64_STVS(36) + PPC64_STVS(37) + PPC64_STVS(38) + PPC64_STVS(39) + PPC64_STVS(40) + PPC64_STVS(41) + PPC64_STVS(42) + PPC64_STVS(43) + PPC64_STVS(44) + PPC64_STVS(45) + PPC64_STVS(46) + PPC64_STVS(47) + PPC64_STVS(48) + PPC64_STVS(49) + PPC64_STVS(50) + PPC64_STVS(51) + PPC64_STVS(52) + PPC64_STVS(53) + PPC64_STVS(54) + PPC64_STVS(55) + PPC64_STVS(56) + PPC64_STVS(57) + PPC64_STVS(58) + PPC64_STVS(59) + PPC64_STVS(60) + PPC64_STVS(61) + PPC64_STVS(62) + PPC64_STVS(63) + +#else + +// store FP register +#define PPC64_STF(n) \ + stfd %f##n, (PPC64_OFFS_FP + n * 16)(%r3) + + // save float registers + PPC64_STF(0) + PPC64_STF(1) + PPC64_STF(2) + PPC64_STF(3) + PPC64_STF(4) + PPC64_STF(5) + PPC64_STF(6) + PPC64_STF(7) + PPC64_STF(8) + PPC64_STF(9) + PPC64_STF(10) + PPC64_STF(11) + PPC64_STF(12) + PPC64_STF(13) + PPC64_STF(14) + PPC64_STF(15) + PPC64_STF(16) + PPC64_STF(17) + PPC64_STF(18) + PPC64_STF(19) + PPC64_STF(20) + PPC64_STF(21) + PPC64_STF(22) + PPC64_STF(23) + PPC64_STF(24) + PPC64_STF(25) + PPC64_STF(26) + PPC64_STF(27) + PPC64_STF(28) + PPC64_STF(29) + PPC64_STF(30) + PPC64_STF(31) + + // save vector registers + + // Use 16-bytes below the stack pointer as an + // aligned buffer to save each vector register. + // Note that the stack pointer is always 16-byte aligned. + subi %r4, %r1, 16 + +#define PPC64_STV_UNALIGNED(n) \ + stvx %v##n, 0, %r4 ;\ + ld %r5, 0(%r4) ;\ + std %r5, (PPC64_OFFS_V + n * 16)(%r3) ;\ + ld %r5, 8(%r4) ;\ + std %r5, (PPC64_OFFS_V + n * 16 + 8)(%r3) + + PPC64_STV_UNALIGNED(0) + PPC64_STV_UNALIGNED(1) + PPC64_STV_UNALIGNED(2) + PPC64_STV_UNALIGNED(3) + PPC64_STV_UNALIGNED(4) + PPC64_STV_UNALIGNED(5) + PPC64_STV_UNALIGNED(6) + PPC64_STV_UNALIGNED(7) + PPC64_STV_UNALIGNED(8) + PPC64_STV_UNALIGNED(9) + PPC64_STV_UNALIGNED(10) + PPC64_STV_UNALIGNED(11) + PPC64_STV_UNALIGNED(12) + PPC64_STV_UNALIGNED(13) + PPC64_STV_UNALIGNED(14) + PPC64_STV_UNALIGNED(15) + PPC64_STV_UNALIGNED(16) + PPC64_STV_UNALIGNED(17) + PPC64_STV_UNALIGNED(18) + PPC64_STV_UNALIGNED(19) + PPC64_STV_UNALIGNED(20) + PPC64_STV_UNALIGNED(21) + PPC64_STV_UNALIGNED(22) + PPC64_STV_UNALIGNED(23) + PPC64_STV_UNALIGNED(24) + PPC64_STV_UNALIGNED(25) + PPC64_STV_UNALIGNED(26) + PPC64_STV_UNALIGNED(27) + PPC64_STV_UNALIGNED(28) + PPC64_STV_UNALIGNED(29) + PPC64_STV_UNALIGNED(30) + PPC64_STV_UNALIGNED(31) + +#endif + + li %r3, 0 // return UNW_ESUCCESS + blr + + +#elif defined(__ppc__) + +// +// extern int unw_getcontext(unw_context_t* thread_state) +// +// On entry: +// thread_state pointer is in r3 +// +DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext) + stw %r0, 8(%r3) + mflr %r0 + stw %r0, 0(%r3) // store lr as ssr0 + stw %r1, 12(%r3) + stw %r2, 16(%r3) + stw %r3, 20(%r3) + stw %r4, 24(%r3) + stw %r5, 28(%r3) + stw %r6, 32(%r3) + stw %r7, 36(%r3) + stw %r8, 40(%r3) + stw %r9, 44(%r3) + stw %r10, 48(%r3) + stw %r11, 52(%r3) + stw %r12, 56(%r3) + stw %r13, 60(%r3) + stw %r14, 64(%r3) + stw %r15, 68(%r3) + stw %r16, 72(%r3) + stw %r17, 76(%r3) + stw %r18, 80(%r3) + stw %r19, 84(%r3) + stw %r20, 88(%r3) + stw %r21, 92(%r3) + stw %r22, 96(%r3) + stw %r23,100(%r3) + stw %r24,104(%r3) + stw %r25,108(%r3) + stw %r26,112(%r3) + stw %r27,116(%r3) + stw %r28,120(%r3) + stw %r29,124(%r3) + stw %r30,128(%r3) + stw %r31,132(%r3) + + // save VRSave register + mfspr %r0, 256 + stw %r0, 156(%r3) + // save CR registers + mfcr %r0 + stw %r0, 136(%r3) + // save CTR register + mfctr %r0 + stw %r0, 148(%r3) + + // save float registers + stfd %f0, 160(%r3) + stfd %f1, 168(%r3) + stfd %f2, 176(%r3) + stfd %f3, 184(%r3) + stfd %f4, 192(%r3) + stfd %f5, 200(%r3) + stfd %f6, 208(%r3) + stfd %f7, 216(%r3) + stfd %f8, 224(%r3) + stfd %f9, 232(%r3) + stfd %f10,240(%r3) + stfd %f11,248(%r3) + stfd %f12,256(%r3) + stfd %f13,264(%r3) + stfd %f14,272(%r3) + stfd %f15,280(%r3) + stfd %f16,288(%r3) + stfd %f17,296(%r3) + stfd %f18,304(%r3) + stfd %f19,312(%r3) + stfd %f20,320(%r3) + stfd %f21,328(%r3) + stfd %f22,336(%r3) + stfd %f23,344(%r3) + stfd %f24,352(%r3) + stfd %f25,360(%r3) + stfd %f26,368(%r3) + stfd %f27,376(%r3) + stfd %f28,384(%r3) + stfd %f29,392(%r3) + stfd %f30,400(%r3) + stfd %f31,408(%r3) + + + // save vector registers + + subi %r4, %r1, 16 + rlwinm %r4, %r4, 0, 0, 27 // mask low 4-bits + // r4 is now a 16-byte aligned pointer into the red zone + +#define SAVE_VECTOR_UNALIGNED(_vec, _offset) \ + stvx _vec, 0, %r4 SEPARATOR \ + lwz %r5, 0(%r4) SEPARATOR \ + stw %r5, _offset(%r3) SEPARATOR \ + lwz %r5, 4(%r4) SEPARATOR \ + stw %r5, _offset+4(%r3) SEPARATOR \ + lwz %r5, 8(%r4) SEPARATOR \ + stw %r5, _offset+8(%r3) SEPARATOR \ + lwz %r5, 12(%r4) SEPARATOR \ + stw %r5, _offset+12(%r3) + + SAVE_VECTOR_UNALIGNED( %v0, 424+0x000) + SAVE_VECTOR_UNALIGNED( %v1, 424+0x010) + SAVE_VECTOR_UNALIGNED( %v2, 424+0x020) + SAVE_VECTOR_UNALIGNED( %v3, 424+0x030) + SAVE_VECTOR_UNALIGNED( %v4, 424+0x040) + SAVE_VECTOR_UNALIGNED( %v5, 424+0x050) + SAVE_VECTOR_UNALIGNED( %v6, 424+0x060) + SAVE_VECTOR_UNALIGNED( %v7, 424+0x070) + SAVE_VECTOR_UNALIGNED( %v8, 424+0x080) + SAVE_VECTOR_UNALIGNED( %v9, 424+0x090) + SAVE_VECTOR_UNALIGNED(%v10, 424+0x0A0) + SAVE_VECTOR_UNALIGNED(%v11, 424+0x0B0) + SAVE_VECTOR_UNALIGNED(%v12, 424+0x0C0) + SAVE_VECTOR_UNALIGNED(%v13, 424+0x0D0) + SAVE_VECTOR_UNALIGNED(%v14, 424+0x0E0) + SAVE_VECTOR_UNALIGNED(%v15, 424+0x0F0) + SAVE_VECTOR_UNALIGNED(%v16, 424+0x100) + SAVE_VECTOR_UNALIGNED(%v17, 424+0x110) + SAVE_VECTOR_UNALIGNED(%v18, 424+0x120) + SAVE_VECTOR_UNALIGNED(%v19, 424+0x130) + SAVE_VECTOR_UNALIGNED(%v20, 424+0x140) + SAVE_VECTOR_UNALIGNED(%v21, 424+0x150) + SAVE_VECTOR_UNALIGNED(%v22, 424+0x160) + SAVE_VECTOR_UNALIGNED(%v23, 424+0x170) + SAVE_VECTOR_UNALIGNED(%v24, 424+0x180) + SAVE_VECTOR_UNALIGNED(%v25, 424+0x190) + SAVE_VECTOR_UNALIGNED(%v26, 424+0x1A0) + SAVE_VECTOR_UNALIGNED(%v27, 424+0x1B0) + SAVE_VECTOR_UNALIGNED(%v28, 424+0x1C0) + SAVE_VECTOR_UNALIGNED(%v29, 424+0x1D0) + SAVE_VECTOR_UNALIGNED(%v30, 424+0x1E0) + SAVE_VECTOR_UNALIGNED(%v31, 424+0x1F0) + + li %r3, 0 // return UNW_ESUCCESS + blr + + +#elif defined(__arm64__) || defined(__aarch64__) + +// +// extern int __unw_getcontext(unw_context_t* thread_state) +// +// On entry: +// thread_state pointer is in x0 +// + .p2align 2 +DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext) + stp x0, x1, [x0, #0x000] + stp x2, x3, [x0, #0x010] + stp x4, x5, [x0, #0x020] + stp x6, x7, [x0, #0x030] + stp x8, x9, [x0, #0x040] + stp x10,x11, [x0, #0x050] + stp x12,x13, [x0, #0x060] + stp x14,x15, [x0, #0x070] + stp x16,x17, [x0, #0x080] + stp x18,x19, [x0, #0x090] + stp x20,x21, [x0, #0x0A0] + stp x22,x23, [x0, #0x0B0] + stp x24,x25, [x0, #0x0C0] + stp x26,x27, [x0, #0x0D0] + stp x28,x29, [x0, #0x0E0] + str x30, [x0, #0x0F0] + mov x1,sp + str x1, [x0, #0x0F8] + str x30, [x0, #0x100] // store return address as pc + // skip cpsr + stp d0, d1, [x0, #0x110] + stp d2, d3, [x0, #0x120] + stp d4, d5, [x0, #0x130] + stp d6, d7, [x0, #0x140] + stp d8, d9, [x0, #0x150] + stp d10,d11, [x0, #0x160] + stp d12,d13, [x0, #0x170] + stp d14,d15, [x0, #0x180] + stp d16,d17, [x0, #0x190] + stp d18,d19, [x0, #0x1A0] + stp d20,d21, [x0, #0x1B0] + stp d22,d23, [x0, #0x1C0] + stp d24,d25, [x0, #0x1D0] + stp d26,d27, [x0, #0x1E0] + stp d28,d29, [x0, #0x1F0] + str d30, [x0, #0x200] + str d31, [x0, #0x208] + mov x0, #0 // return UNW_ESUCCESS + ret + +#elif defined(__arm__) && !defined(__APPLE__) + +#if !defined(__ARM_ARCH_ISA_ARM) +#if (__ARM_ARCH_ISA_THUMB == 2) + .syntax unified +#endif + .thumb +#endif + +@ +@ extern int __unw_getcontext(unw_context_t* thread_state) +@ +@ On entry: +@ thread_state pointer is in r0 +@ +@ Per EHABI #4.7 this only saves the core integer registers. +@ EHABI #7.4.5 notes that in general all VRS registers should be restored +@ however this is very hard to do for VFP registers because it is unknown +@ to the library how many registers are implemented by the architecture. +@ Instead, VFP registers are demand saved by logic external to __unw_getcontext. +@ + .p2align 2 +DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext) +#if !defined(__ARM_ARCH_ISA_ARM) && __ARM_ARCH_ISA_THUMB == 1 + stm r0!, {r0-r7} + mov r1, r8 + mov r2, r9 + mov r3, r10 + stm r0!, {r1-r3} + mov r1, r11 + mov r2, sp + mov r3, lr + str r1, [r0, #0] @ r11 + @ r12 does not need storing, it it the intra-procedure-call scratch register + str r2, [r0, #8] @ sp + str r3, [r0, #12] @ lr + str r3, [r0, #16] @ store return address as pc + @ T1 does not have a non-cpsr-clobbering register-zeroing instruction. + @ It is safe to use here though because we are about to return, and cpsr is + @ not expected to be preserved. + movs r0, #0 @ return UNW_ESUCCESS +#else + @ 32bit thumb-2 restrictions for stm: + @ . the sp (r13) cannot be in the list + @ . the pc (r15) cannot be in the list in an STM instruction + stm r0, {r0-r12} + str sp, [r0, #52] + str lr, [r0, #56] + str lr, [r0, #60] @ store return address as pc + mov r0, #0 @ return UNW_ESUCCESS +#endif + JMP(lr) + +@ +@ static void libunwind::Registers_arm::saveVFPWithFSTMD(unw_fpreg_t* values) +@ +@ On entry: +@ values pointer is in r0 +@ + .p2align 2 +#if defined(__ELF__) + .fpu vfpv3-d16 +#endif +DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind13Registers_arm16saveVFPWithFSTMDEPv) + vstmia r0, {d0-d15} + JMP(lr) + +@ +@ static void libunwind::Registers_arm::saveVFPWithFSTMX(unw_fpreg_t* values) +@ +@ On entry: +@ values pointer is in r0 +@ + .p2align 2 +#if defined(__ELF__) + .fpu vfpv3-d16 +#endif +DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind13Registers_arm16saveVFPWithFSTMXEPv) + vstmia r0, {d0-d15} @ fstmiax is deprecated in ARMv7+ and now behaves like vstmia + JMP(lr) + +@ +@ static void libunwind::Registers_arm::saveVFPv3(unw_fpreg_t* values) +@ +@ On entry: +@ values pointer is in r0 +@ + .p2align 2 +#if defined(__ELF__) + .fpu vfpv3 +#endif +DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind13Registers_arm9saveVFPv3EPv) + @ VFP and iwMMX instructions are only available when compiling with the flags + @ that enable them. We do not want to do that in the library (because we do not + @ want the compiler to generate instructions that access those) but this is + @ only accessed if the personality routine needs these registers. Use of + @ these registers implies they are, actually, available on the target, so + @ it's ok to execute. + @ So, generate the instructions using the corresponding coprocessor mnemonic. + vstmia r0, {d16-d31} + JMP(lr) + +#if defined(_LIBUNWIND_ARM_WMMX) + +@ +@ static void libunwind::Registers_arm::saveiWMMX(unw_fpreg_t* values) +@ +@ On entry: +@ values pointer is in r0 +@ + .p2align 2 +#if defined(__ELF__) + .arch armv5te +#endif +DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind13Registers_arm9saveiWMMXEPv) + stcl p1, cr0, [r0], #8 @ wstrd wR0, [r0], #8 + stcl p1, cr1, [r0], #8 @ wstrd wR1, [r0], #8 + stcl p1, cr2, [r0], #8 @ wstrd wR2, [r0], #8 + stcl p1, cr3, [r0], #8 @ wstrd wR3, [r0], #8 + stcl p1, cr4, [r0], #8 @ wstrd wR4, [r0], #8 + stcl p1, cr5, [r0], #8 @ wstrd wR5, [r0], #8 + stcl p1, cr6, [r0], #8 @ wstrd wR6, [r0], #8 + stcl p1, cr7, [r0], #8 @ wstrd wR7, [r0], #8 + stcl p1, cr8, [r0], #8 @ wstrd wR8, [r0], #8 + stcl p1, cr9, [r0], #8 @ wstrd wR9, [r0], #8 + stcl p1, cr10, [r0], #8 @ wstrd wR10, [r0], #8 + stcl p1, cr11, [r0], #8 @ wstrd wR11, [r0], #8 + stcl p1, cr12, [r0], #8 @ wstrd wR12, [r0], #8 + stcl p1, cr13, [r0], #8 @ wstrd wR13, [r0], #8 + stcl p1, cr14, [r0], #8 @ wstrd wR14, [r0], #8 + stcl p1, cr15, [r0], #8 @ wstrd wR15, [r0], #8 + JMP(lr) + +@ +@ static void libunwind::Registers_arm::saveiWMMXControl(unw_uint32_t* values) +@ +@ On entry: +@ values pointer is in r0 +@ + .p2align 2 +#if defined(__ELF__) + .arch armv5te +#endif +DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind13Registers_arm16saveiWMMXControlEPj) + stc2 p1, cr8, [r0], #4 @ wstrw wCGR0, [r0], #4 + stc2 p1, cr9, [r0], #4 @ wstrw wCGR1, [r0], #4 + stc2 p1, cr10, [r0], #4 @ wstrw wCGR2, [r0], #4 + stc2 p1, cr11, [r0], #4 @ wstrw wCGR3, [r0], #4 + JMP(lr) + +#endif + +#elif defined(__or1k__) + +# +# extern int __unw_getcontext(unw_context_t* thread_state) +# +# On entry: +# thread_state pointer is in r3 +# +DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext) + l.sw 0(r3), r0 + l.sw 4(r3), r1 + l.sw 8(r3), r2 + l.sw 12(r3), r3 + l.sw 16(r3), r4 + l.sw 20(r3), r5 + l.sw 24(r3), r6 + l.sw 28(r3), r7 + l.sw 32(r3), r8 + l.sw 36(r3), r9 + l.sw 40(r3), r10 + l.sw 44(r3), r11 + l.sw 48(r3), r12 + l.sw 52(r3), r13 + l.sw 56(r3), r14 + l.sw 60(r3), r15 + l.sw 64(r3), r16 + l.sw 68(r3), r17 + l.sw 72(r3), r18 + l.sw 76(r3), r19 + l.sw 80(r3), r20 + l.sw 84(r3), r21 + l.sw 88(r3), r22 + l.sw 92(r3), r23 + l.sw 96(r3), r24 + l.sw 100(r3), r25 + l.sw 104(r3), r26 + l.sw 108(r3), r27 + l.sw 112(r3), r28 + l.sw 116(r3), r29 + l.sw 120(r3), r30 + l.sw 124(r3), r31 + # store ra to pc + l.sw 128(r3), r9 + # zero epcr + l.sw 132(r3), r0 + +#elif defined(__sparc__) + +# +# extern int __unw_getcontext(unw_context_t* thread_state) +# +# On entry: +# thread_state pointer is in o0 +# +DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext) + ta 3 + add %o7, 8, %o7 + std %g0, [%o0 + 0] + std %g2, [%o0 + 8] + std %g4, [%o0 + 16] + std %g6, [%o0 + 24] + std %o0, [%o0 + 32] + std %o2, [%o0 + 40] + std %o4, [%o0 + 48] + std %o6, [%o0 + 56] + std %l0, [%o0 + 64] + std %l2, [%o0 + 72] + std %l4, [%o0 + 80] + std %l6, [%o0 + 88] + std %i0, [%o0 + 96] + std %i2, [%o0 + 104] + std %i4, [%o0 + 112] + std %i6, [%o0 + 120] + jmp %o7 + clr %o0 // return UNW_ESUCCESS +#endif + + WEAK_ALIAS(__unw_getcontext, unw_getcontext) + +#endif /* !defined(__USING_SJLJ_EXCEPTIONS__) */ + +NO_EXEC_STACK_DIRECTIVE diff --git a/src/coreclr/src/nativeaot/libunwind/src/Unwind_AppleExtras.cpp b/src/coreclr/src/nativeaot/libunwind/src/Unwind_AppleExtras.cpp new file mode 100644 index 0000000000000..248d99570e94a --- /dev/null +++ b/src/coreclr/src/nativeaot/libunwind/src/Unwind_AppleExtras.cpp @@ -0,0 +1,183 @@ +//===--------------------- Unwind_AppleExtras.cpp -------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// +//===----------------------------------------------------------------------===// + +#include "config.h" +#include "AddressSpace.hpp" +#include "DwarfParser.hpp" + + +// private keymgr stuff +#define KEYMGR_GCC3_DW2_OBJ_LIST 302 +extern "C" { + extern void _keymgr_set_and_unlock_processwide_ptr(int key, void *ptr); + extern void *_keymgr_get_and_lock_processwide_ptr(int key); +} + +// undocumented libgcc "struct object" +struct libgcc_object { + void *start; + void *unused1; + void *unused2; + void *fde; + unsigned long encoding; + void *fde_end; + libgcc_object *next; +}; + +// undocumented libgcc "struct km_object_info" referenced by +// KEYMGR_GCC3_DW2_OBJ_LIST +struct libgcc_object_info { + libgcc_object *seen_objects; + libgcc_object *unseen_objects; + unsigned spare[2]; +}; + + +// static linker symbols to prevent wrong two level namespace for _Unwind symbols +#if defined(__arm__) + #define NOT_HERE_BEFORE_5_0(sym) \ + extern const char sym##_tmp30 __asm("$ld$hide$os3.0$_" #sym ); \ + __attribute__((visibility("default"))) const char sym##_tmp30 = 0; \ + extern const char sym##_tmp31 __asm("$ld$hide$os3.1$_" #sym ); \ + __attribute__((visibility("default"))) const char sym##_tmp31 = 0; \ + extern const char sym##_tmp32 __asm("$ld$hide$os3.2$_" #sym );\ + __attribute__((visibility("default"))) const char sym##_tmp32 = 0; \ + extern const char sym##_tmp40 __asm("$ld$hide$os4.0$_" #sym ); \ + __attribute__((visibility("default"))) const char sym##_tmp40 = 0; \ + extern const char sym##_tmp41 __asm("$ld$hide$os4.1$_" #sym ); \ + __attribute__((visibility("default"))) const char sym##_tmp41 = 0; \ + extern const char sym##_tmp42 __asm("$ld$hide$os4.2$_" #sym ); \ + __attribute__((visibility("default"))) const char sym##_tmp42 = 0; \ + extern const char sym##_tmp43 __asm("$ld$hide$os4.3$_" #sym ); \ + __attribute__((visibility("default"))) const char sym##_tmp43 = 0; +#elif defined(__arm64__) + #define NOT_HERE_BEFORE_10_6(sym) + #define NEVER_HERE(sym) +#else + #define NOT_HERE_BEFORE_10_6(sym) \ + extern const char sym##_tmp4 __asm("$ld$hide$os10.4$_" #sym ); \ + __attribute__((visibility("default"))) const char sym##_tmp4 = 0; \ + extern const char sym##_tmp5 __asm("$ld$hide$os10.5$_" #sym ); \ + __attribute__((visibility("default"))) const char sym##_tmp5 = 0; + #define NEVER_HERE(sym) \ + extern const char sym##_tmp4 __asm("$ld$hide$os10.4$_" #sym ); \ + __attribute__((visibility("default"))) const char sym##_tmp4 = 0; \ + extern const char sym##_tmp5 __asm("$ld$hide$os10.5$_" #sym ); \ + __attribute__((visibility("default"))) const char sym##_tmp5 = 0; \ + extern const char sym##_tmp6 __asm("$ld$hide$os10.6$_" #sym ); \ + __attribute__((visibility("default"))) const char sym##_tmp6 = 0; +#endif + + +#if defined(_LIBUNWIND_BUILD_ZERO_COST_APIS) + +// +// symbols in libSystem.dylib in 10.6 and later, but are in libgcc_s.dylib in +// earlier versions +// +NOT_HERE_BEFORE_10_6(_Unwind_DeleteException) +NOT_HERE_BEFORE_10_6(_Unwind_Find_FDE) +NOT_HERE_BEFORE_10_6(_Unwind_ForcedUnwind) +NOT_HERE_BEFORE_10_6(_Unwind_GetGR) +NOT_HERE_BEFORE_10_6(_Unwind_GetIP) +NOT_HERE_BEFORE_10_6(_Unwind_GetLanguageSpecificData) +NOT_HERE_BEFORE_10_6(_Unwind_GetRegionStart) +NOT_HERE_BEFORE_10_6(_Unwind_RaiseException) +NOT_HERE_BEFORE_10_6(_Unwind_Resume) +NOT_HERE_BEFORE_10_6(_Unwind_SetGR) +NOT_HERE_BEFORE_10_6(_Unwind_SetIP) +NOT_HERE_BEFORE_10_6(_Unwind_Backtrace) +NOT_HERE_BEFORE_10_6(_Unwind_FindEnclosingFunction) +NOT_HERE_BEFORE_10_6(_Unwind_GetCFA) +NOT_HERE_BEFORE_10_6(_Unwind_GetDataRelBase) +NOT_HERE_BEFORE_10_6(_Unwind_GetTextRelBase) +NOT_HERE_BEFORE_10_6(_Unwind_Resume_or_Rethrow) +NOT_HERE_BEFORE_10_6(_Unwind_GetIPInfo) +NOT_HERE_BEFORE_10_6(__register_frame) +NOT_HERE_BEFORE_10_6(__deregister_frame) + +// +// symbols in libSystem.dylib for compatibility, but we don't want any new code +// using them +// +NEVER_HERE(__register_frame_info_bases) +NEVER_HERE(__register_frame_info) +NEVER_HERE(__register_frame_info_table_bases) +NEVER_HERE(__register_frame_info_table) +NEVER_HERE(__register_frame_table) +NEVER_HERE(__deregister_frame_info) +NEVER_HERE(__deregister_frame_info_bases) + +#endif // defined(_LIBUNWIND_BUILD_ZERO_COST_APIS) + + + + +#if defined(_LIBUNWIND_BUILD_SJLJ_APIS) +// +// symbols in libSystem.dylib in iOS 5.0 and later, but are in libgcc_s.dylib in +// earlier versions +// +NOT_HERE_BEFORE_5_0(_Unwind_GetLanguageSpecificData) +NOT_HERE_BEFORE_5_0(_Unwind_GetRegionStart) +NOT_HERE_BEFORE_5_0(_Unwind_GetIP) +NOT_HERE_BEFORE_5_0(_Unwind_SetGR) +NOT_HERE_BEFORE_5_0(_Unwind_SetIP) +NOT_HERE_BEFORE_5_0(_Unwind_DeleteException) +NOT_HERE_BEFORE_5_0(_Unwind_SjLj_Register) +NOT_HERE_BEFORE_5_0(_Unwind_GetGR) +NOT_HERE_BEFORE_5_0(_Unwind_GetIPInfo) +NOT_HERE_BEFORE_5_0(_Unwind_GetCFA) +NOT_HERE_BEFORE_5_0(_Unwind_SjLj_Resume) +NOT_HERE_BEFORE_5_0(_Unwind_SjLj_RaiseException) +NOT_HERE_BEFORE_5_0(_Unwind_SjLj_Resume_or_Rethrow) +NOT_HERE_BEFORE_5_0(_Unwind_SjLj_Unregister) + +#endif // defined(_LIBUNWIND_BUILD_SJLJ_APIS) + + +namespace libunwind { + +_LIBUNWIND_HIDDEN +bool checkKeyMgrRegisteredFDEs(uintptr_t pc, void *&fde) { +#if __MAC_OS_X_VERSION_MIN_REQUIRED + // lastly check for old style keymgr registration of dynamically generated + // FDEs acquire exclusive access to libgcc_object_info + libgcc_object_info *head = (libgcc_object_info *) + _keymgr_get_and_lock_processwide_ptr(KEYMGR_GCC3_DW2_OBJ_LIST); + if (head != NULL) { + // look at each FDE in keymgr + for (libgcc_object *ob = head->unseen_objects; ob != NULL; ob = ob->next) { + CFI_Parser::FDE_Info fdeInfo; + CFI_Parser::CIE_Info cieInfo; + const char *msg = CFI_Parser::decodeFDE( + LocalAddressSpace::sThisAddressSpace, + (uintptr_t)ob->fde, &fdeInfo, &cieInfo); + if (msg == NULL) { + // Check if this FDE is for a function that includes the pc + if ((fdeInfo.pcStart <= pc) && (pc < fdeInfo.pcEnd)) { + fde = (void*)fdeInfo.pcStart; + _keymgr_set_and_unlock_processwide_ptr(KEYMGR_GCC3_DW2_OBJ_LIST, + head); + return true; + } + } + } + } + // release libgcc_object_info + _keymgr_set_and_unlock_processwide_ptr(KEYMGR_GCC3_DW2_OBJ_LIST, head); +#else + (void)pc; + (void)fde; +#endif + return false; +} + +} + diff --git a/src/coreclr/src/nativeaot/libunwind/src/assembly.h b/src/coreclr/src/nativeaot/libunwind/src/assembly.h new file mode 100644 index 0000000000000..7132b6c561b0d --- /dev/null +++ b/src/coreclr/src/nativeaot/libunwind/src/assembly.h @@ -0,0 +1,158 @@ +/* ===-- assembly.h - libUnwind assembler support macros -------------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * ===----------------------------------------------------------------------=== + * + * This file defines macros for use in libUnwind assembler source. + * This file is not part of the interface of this library. + * + * ===----------------------------------------------------------------------=== + */ + +#ifndef UNWIND_ASSEMBLY_H +#define UNWIND_ASSEMBLY_H + +#if defined(__powerpc64__) +#define SEPARATOR ; +#define PPC64_OFFS_SRR0 0 +#define PPC64_OFFS_CR 272 +#define PPC64_OFFS_XER 280 +#define PPC64_OFFS_LR 288 +#define PPC64_OFFS_CTR 296 +#define PPC64_OFFS_VRSAVE 304 +#define PPC64_OFFS_FP 312 +#define PPC64_OFFS_V 824 +#ifdef _ARCH_PWR8 +#define PPC64_HAS_VMX +#endif +#elif defined(__arm64__) +#define SEPARATOR %% +#else +#define SEPARATOR ; +#endif + +#if defined(__powerpc64__) && (!defined(_CALL_ELF) || _CALL_ELF == 1) +#define PPC64_OPD1 .section .opd,"aw",@progbits SEPARATOR +#define PPC64_OPD2 SEPARATOR \ + .p2align 3 SEPARATOR \ + .quad .Lfunc_begin0 SEPARATOR \ + .quad .TOC.@tocbase SEPARATOR \ + .quad 0 SEPARATOR \ + .text SEPARATOR \ +.Lfunc_begin0: +#else +#define PPC64_OPD1 +#define PPC64_OPD2 +#endif + +#define GLUE2(a, b) a ## b +#define GLUE(a, b) GLUE2(a, b) +#define SYMBOL_NAME(name) GLUE(__USER_LABEL_PREFIX__, name) + +#if defined(__APPLE__) + +#define SYMBOL_IS_FUNC(name) +#define EXPORT_SYMBOL(name) +#define HIDDEN_SYMBOL(name) .private_extern name +#define WEAK_SYMBOL(name) .weak_reference name +#define WEAK_ALIAS(name, aliasname) \ + .globl SYMBOL_NAME(aliasname) SEPARATOR \ + WEAK_SYMBOL(aliasname) SEPARATOR \ + SYMBOL_NAME(aliasname) = SYMBOL_NAME(name) + +#define NO_EXEC_STACK_DIRECTIVE + +#elif defined(__ELF__) + +#if defined(__arm__) +#define SYMBOL_IS_FUNC(name) .type name,%function +#else +#define SYMBOL_IS_FUNC(name) .type name,@function +#endif +#define EXPORT_SYMBOL(name) +#define HIDDEN_SYMBOL(name) .hidden name +#define WEAK_SYMBOL(name) .weak name +#define WEAK_ALIAS(name, aliasname) \ + WEAK_SYMBOL(aliasname) SEPARATOR \ + SYMBOL_NAME(aliasname) = SYMBOL_NAME(name) + +#if defined(__GNU__) || defined(__FreeBSD__) || defined(__Fuchsia__) || \ + defined(__linux__) +#define NO_EXEC_STACK_DIRECTIVE .section .note.GNU-stack,"",%progbits +#else +#define NO_EXEC_STACK_DIRECTIVE +#endif + +#elif defined(_WIN32) + +#define SYMBOL_IS_FUNC(name) \ + .def name SEPARATOR \ + .scl 2 SEPARATOR \ + .type 32 SEPARATOR \ + .endef +#define EXPORT_SYMBOL2(name) \ + .section .drectve,"yn" SEPARATOR \ + .ascii "-export:", #name, "\0" SEPARATOR \ + .text +#if defined(_LIBUNWIND_DISABLE_VISIBILITY_ANNOTATIONS) +#define EXPORT_SYMBOL(name) +#else +#define EXPORT_SYMBOL(name) EXPORT_SYMBOL2(name) +#endif +#define HIDDEN_SYMBOL(name) + +#if defined(__MINGW32__) +#define WEAK_ALIAS(name, aliasname) \ + .globl SYMBOL_NAME(aliasname) SEPARATOR \ + EXPORT_SYMBOL(aliasname) SEPARATOR \ + SYMBOL_NAME(aliasname) = SYMBOL_NAME(name) +#else +#define WEAK_ALIAS3(name, aliasname) \ + .section .drectve,"yn" SEPARATOR \ + .ascii "-alternatename:", #aliasname, "=", #name, "\0" SEPARATOR \ + .text +#define WEAK_ALIAS2(name, aliasname) \ + WEAK_ALIAS3(name, aliasname) +#define WEAK_ALIAS(name, aliasname) \ + EXPORT_SYMBOL(SYMBOL_NAME(aliasname)) SEPARATOR \ + WEAK_ALIAS2(SYMBOL_NAME(name), SYMBOL_NAME(aliasname)) +#endif + +#define NO_EXEC_STACK_DIRECTIVE + +#elif defined(__sparc__) + +#else + +#error Unsupported target + +#endif + +#define DEFINE_LIBUNWIND_FUNCTION(name) \ + .globl SYMBOL_NAME(name) SEPARATOR \ + HIDDEN_SYMBOL(SYMBOL_NAME(name)) SEPARATOR \ + SYMBOL_IS_FUNC(SYMBOL_NAME(name)) SEPARATOR \ + PPC64_OPD1 \ + SYMBOL_NAME(name): \ + PPC64_OPD2 + +#if defined(__arm__) +#if !defined(__ARM_ARCH) +#define __ARM_ARCH 4 +#endif + +#if defined(__ARM_ARCH_4T__) || __ARM_ARCH >= 5 +#define ARM_HAS_BX +#endif + +#ifdef ARM_HAS_BX +#define JMP(r) bx r +#else +#define JMP(r) mov pc, r +#endif +#endif /* __arm__ */ + +#endif /* UNWIND_ASSEMBLY_H */ diff --git a/src/coreclr/src/nativeaot/libunwind/src/config.h b/src/coreclr/src/nativeaot/libunwind/src/config.h new file mode 100644 index 0000000000000..09bb261647ca5 --- /dev/null +++ b/src/coreclr/src/nativeaot/libunwind/src/config.h @@ -0,0 +1,211 @@ +//===----------------------------- config.h -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// +// Defines macros used within libunwind project. +// +//===----------------------------------------------------------------------===// + + +#ifndef LIBUNWIND_CONFIG_H +#define LIBUNWIND_CONFIG_H + +#include +#include +#include +#include + +// Define static_assert() unless already defined by compiler. +#ifndef __has_feature + #define __has_feature(__x) 0 +#endif +#if !(__has_feature(cxx_static_assert)) && !defined(static_assert) + #define static_assert(__b, __m) \ + extern int compile_time_assert_failed[ ( __b ) ? 1 : -1 ] \ + __attribute__( ( unused ) ); +#endif + +// Platform specific configuration defines. +#ifdef __APPLE__ + #if defined(FOR_DYLD) + #define _LIBUNWIND_SUPPORT_COMPACT_UNWIND + #else + #define _LIBUNWIND_SUPPORT_COMPACT_UNWIND + #define _LIBUNWIND_SUPPORT_DWARF_UNWIND 1 + #endif +#elif defined(_WIN32) + #ifdef __SEH__ + #define _LIBUNWIND_SUPPORT_SEH_UNWIND 1 + #else + #define _LIBUNWIND_SUPPORT_DWARF_UNWIND 1 + #endif +#else + #if defined(__ARM_DWARF_EH__) || !defined(__arm__) + #define _LIBUNWIND_SUPPORT_DWARF_UNWIND 1 + #define _LIBUNWIND_SUPPORT_DWARF_INDEX 1 + #endif +#endif + +#if defined(_LIBUNWIND_DISABLE_VISIBILITY_ANNOTATIONS) + #define _LIBUNWIND_EXPORT + #define _LIBUNWIND_HIDDEN +#else + #if !defined(__ELF__) && !defined(__MACH__) + #define _LIBUNWIND_EXPORT __declspec(dllexport) + #define _LIBUNWIND_HIDDEN + #else + #define _LIBUNWIND_EXPORT __attribute__((visibility("default"))) + #define _LIBUNWIND_HIDDEN __attribute__((visibility("hidden"))) + #endif +#endif + +#define STR(a) #a +#define XSTR(a) STR(a) +#define SYMBOL_NAME(name) XSTR(__USER_LABEL_PREFIX__) #name + +#if defined(__APPLE__) +#define _LIBUNWIND_WEAK_ALIAS(name, aliasname) \ + __asm__(".globl " SYMBOL_NAME(aliasname)); \ + __asm__(SYMBOL_NAME(aliasname) " = " SYMBOL_NAME(name)); \ + extern "C" _LIBUNWIND_EXPORT __typeof(name) aliasname \ + __attribute__((weak_import)); +#elif defined(__ELF__) +#define _LIBUNWIND_WEAK_ALIAS(name, aliasname) \ + extern "C" _LIBUNWIND_EXPORT __typeof(name) aliasname \ + __attribute__((weak, alias(#name))); +#elif defined(_WIN32) +#if defined(__MINGW32__) +#define _LIBUNWIND_WEAK_ALIAS(name, aliasname) \ + extern "C" _LIBUNWIND_EXPORT __typeof(name) aliasname \ + __attribute__((alias(#name))); +#else +#define _LIBUNWIND_WEAK_ALIAS(name, aliasname) \ + __pragma(comment(linker, "/alternatename:" SYMBOL_NAME(aliasname) "=" \ + SYMBOL_NAME(name))) \ + extern "C" _LIBUNWIND_EXPORT __typeof(name) aliasname; +#endif +#else +#error Unsupported target +#endif + +#if (defined(__APPLE__) && defined(__arm__)) || defined(__USING_SJLJ_EXCEPTIONS__) +#define _LIBUNWIND_BUILD_SJLJ_APIS +#endif + +#if defined(__i386__) || defined(__x86_64__) || defined(__ppc__) || defined(__ppc64__) || defined(__powerpc64__) +#define _LIBUNWIND_SUPPORT_FRAME_APIS +#endif + +#if defined(__i386__) || defined(__x86_64__) || \ + defined(__ppc__) || defined(__ppc64__) || defined(__powerpc64__) || \ + (!defined(__APPLE__) && defined(__arm__)) || \ + (defined(__arm64__) || defined(__aarch64__)) || \ + defined(__mips__) +#if !defined(_LIBUNWIND_BUILD_SJLJ_APIS) +#define _LIBUNWIND_BUILD_ZERO_COST_APIS +#endif +#endif + +#if defined(__powerpc64__) && defined(_ARCH_PWR8) +#define PPC64_HAS_VMX +#endif + +#if defined(NDEBUG) && defined(_LIBUNWIND_IS_BAREMETAL) +#define _LIBUNWIND_ABORT(msg) \ + do { \ + abort(); \ + } while (0) +#else +#define _LIBUNWIND_ABORT(msg) \ + do { \ + fprintf(stderr, "libunwind: %s %s:%d - %s\n", __func__, __FILE__, \ + __LINE__, msg); \ + fflush(stderr); \ + abort(); \ + } while (0) +#endif + +#if defined(NDEBUG) && defined(_LIBUNWIND_IS_BAREMETAL) +#define _LIBUNWIND_LOG0(msg) +#define _LIBUNWIND_LOG(msg, ...) +#else +#define _LIBUNWIND_LOG0(msg) \ + fprintf(stderr, "libunwind: " msg "\n") +#define _LIBUNWIND_LOG(msg, ...) \ + fprintf(stderr, "libunwind: " msg "\n", __VA_ARGS__) +#endif + +#if defined(NDEBUG) + #define _LIBUNWIND_LOG_IF_FALSE(x) x +#else + #define _LIBUNWIND_LOG_IF_FALSE(x) \ + do { \ + bool _ret = x; \ + if (!_ret) \ + _LIBUNWIND_LOG("" #x " failed in %s", __FUNCTION__); \ + } while (0) +#endif + +// Macros that define away in non-Debug builds +#ifdef NDEBUG + #define _LIBUNWIND_DEBUG_LOG(msg, ...) + #define _LIBUNWIND_TRACE_API(msg, ...) + #define _LIBUNWIND_TRACING_UNWINDING (0) + #define _LIBUNWIND_TRACING_DWARF (0) + #define _LIBUNWIND_TRACE_UNWINDING(msg, ...) + #define _LIBUNWIND_TRACE_DWARF(...) +#else + #ifdef __cplusplus + extern "C" { + #endif + extern bool logAPIs(); + extern bool logUnwinding(); + extern bool logDWARF(); + #ifdef __cplusplus + } + #endif + #define _LIBUNWIND_DEBUG_LOG(msg, ...) _LIBUNWIND_LOG(msg, __VA_ARGS__) + #define _LIBUNWIND_TRACE_API(msg, ...) \ + do { \ + if (logAPIs()) \ + _LIBUNWIND_LOG(msg, __VA_ARGS__); \ + } while (0) + #define _LIBUNWIND_TRACING_UNWINDING logUnwinding() + #define _LIBUNWIND_TRACING_DWARF logDWARF() + #define _LIBUNWIND_TRACE_UNWINDING(msg, ...) \ + do { \ + if (logUnwinding()) \ + _LIBUNWIND_LOG(msg, __VA_ARGS__); \ + } while (0) + #define _LIBUNWIND_TRACE_DWARF(...) \ + do { \ + if (logDWARF()) \ + fprintf(stderr, __VA_ARGS__); \ + } while (0) +#endif + +#ifdef __cplusplus +// Used to fit UnwindCursor and Registers_xxx types against unw_context_t / +// unw_cursor_t sized memory blocks. +#if defined(_LIBUNWIND_IS_NATIVE_ONLY) +# define COMP_OP == +#else +# define COMP_OP <= +#endif +template +struct check_fit { + template + struct blk_count { + static const size_t count = + (sizeof(T) + sizeof(uint64_t) - 1) / sizeof(uint64_t); + }; + static const bool does_fit = + (blk_count<_Type>::count COMP_OP blk_count<_Mem>::count); +}; +#undef COMP_OP +#endif // __cplusplus + +#endif // LIBUNWIND_CONFIG_H diff --git a/src/coreclr/src/nativeaot/libunwind/src/dwarf2.h b/src/coreclr/src/nativeaot/libunwind/src/dwarf2.h new file mode 100644 index 0000000000000..40f0daf468059 --- /dev/null +++ b/src/coreclr/src/nativeaot/libunwind/src/dwarf2.h @@ -0,0 +1,239 @@ +//===------------------------------- dwarf2.h -----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + + +/* + These constants were taken from version 3 of the DWARF standard, + which is Copyright (c) 2005 Free Standards Group, and + Copyright (c) 1992, 1993 UNIX International, Inc. +*/ + +#ifndef __DWARF2__ +#define __DWARF2__ + +// DWARF unwind instructions +enum { + DW_CFA_nop = 0x0, + DW_CFA_set_loc = 0x1, + DW_CFA_advance_loc1 = 0x2, + DW_CFA_advance_loc2 = 0x3, + DW_CFA_advance_loc4 = 0x4, + DW_CFA_offset_extended = 0x5, + DW_CFA_restore_extended = 0x6, + DW_CFA_undefined = 0x7, + DW_CFA_same_value = 0x8, + DW_CFA_register = 0x9, + DW_CFA_remember_state = 0xA, + DW_CFA_restore_state = 0xB, + DW_CFA_def_cfa = 0xC, + DW_CFA_def_cfa_register = 0xD, + DW_CFA_def_cfa_offset = 0xE, + DW_CFA_def_cfa_expression = 0xF, + DW_CFA_expression = 0x10, + DW_CFA_offset_extended_sf = 0x11, + DW_CFA_def_cfa_sf = 0x12, + DW_CFA_def_cfa_offset_sf = 0x13, + DW_CFA_val_offset = 0x14, + DW_CFA_val_offset_sf = 0x15, + DW_CFA_val_expression = 0x16, + DW_CFA_advance_loc = 0x40, // high 2 bits are 0x1, lower 6 bits are delta + DW_CFA_offset = 0x80, // high 2 bits are 0x2, lower 6 bits are register + DW_CFA_restore = 0xC0, // high 2 bits are 0x3, lower 6 bits are register + + // GNU extensions + DW_CFA_GNU_window_save = 0x2D, + DW_CFA_GNU_args_size = 0x2E, + DW_CFA_GNU_negative_offset_extended = 0x2F, + + // AARCH64 extensions + DW_CFA_AARCH64_negate_ra_state = 0x2D +}; + + +// FSF exception handling Pointer-Encoding constants +// Used in CFI augmentation by GCC +enum { + DW_EH_PE_ptr = 0x00, + DW_EH_PE_uleb128 = 0x01, + DW_EH_PE_udata2 = 0x02, + DW_EH_PE_udata4 = 0x03, + DW_EH_PE_udata8 = 0x04, + DW_EH_PE_signed = 0x08, + DW_EH_PE_sleb128 = 0x09, + DW_EH_PE_sdata2 = 0x0A, + DW_EH_PE_sdata4 = 0x0B, + DW_EH_PE_sdata8 = 0x0C, + DW_EH_PE_absptr = 0x00, + DW_EH_PE_pcrel = 0x10, + DW_EH_PE_textrel = 0x20, + DW_EH_PE_datarel = 0x30, + DW_EH_PE_funcrel = 0x40, + DW_EH_PE_aligned = 0x50, + DW_EH_PE_indirect = 0x80, + DW_EH_PE_omit = 0xFF +}; + + +// DWARF expressions +enum { + DW_OP_addr = 0x03, // constant address (size target specific) + DW_OP_deref = 0x06, + DW_OP_const1u = 0x08, // 1-byte constant + DW_OP_const1s = 0x09, // 1-byte constant + DW_OP_const2u = 0x0A, // 2-byte constant + DW_OP_const2s = 0x0B, // 2-byte constant + DW_OP_const4u = 0x0C, // 4-byte constant + DW_OP_const4s = 0x0D, // 4-byte constant + DW_OP_const8u = 0x0E, // 8-byte constant + DW_OP_const8s = 0x0F, // 8-byte constant + DW_OP_constu = 0x10, // ULEB128 constant + DW_OP_consts = 0x11, // SLEB128 constant + DW_OP_dup = 0x12, + DW_OP_drop = 0x13, + DW_OP_over = 0x14, + DW_OP_pick = 0x15, // 1-byte stack index + DW_OP_swap = 0x16, + DW_OP_rot = 0x17, + DW_OP_xderef = 0x18, + DW_OP_abs = 0x19, + DW_OP_and = 0x1A, + DW_OP_div = 0x1B, + DW_OP_minus = 0x1C, + DW_OP_mod = 0x1D, + DW_OP_mul = 0x1E, + DW_OP_neg = 0x1F, + DW_OP_not = 0x20, + DW_OP_or = 0x21, + DW_OP_plus = 0x22, + DW_OP_plus_uconst = 0x23, // ULEB128 addend + DW_OP_shl = 0x24, + DW_OP_shr = 0x25, + DW_OP_shra = 0x26, + DW_OP_xor = 0x27, + DW_OP_skip = 0x2F, // signed 2-byte constant + DW_OP_bra = 0x28, // signed 2-byte constant + DW_OP_eq = 0x29, + DW_OP_ge = 0x2A, + DW_OP_gt = 0x2B, + DW_OP_le = 0x2C, + DW_OP_lt = 0x2D, + DW_OP_ne = 0x2E, + DW_OP_lit0 = 0x30, // Literal 0 + DW_OP_lit1 = 0x31, // Literal 1 + DW_OP_lit2 = 0x32, // Literal 2 + DW_OP_lit3 = 0x33, // Literal 3 + DW_OP_lit4 = 0x34, // Literal 4 + DW_OP_lit5 = 0x35, // Literal 5 + DW_OP_lit6 = 0x36, // Literal 6 + DW_OP_lit7 = 0x37, // Literal 7 + DW_OP_lit8 = 0x38, // Literal 8 + DW_OP_lit9 = 0x39, // Literal 9 + DW_OP_lit10 = 0x3A, // Literal 10 + DW_OP_lit11 = 0x3B, // Literal 11 + DW_OP_lit12 = 0x3C, // Literal 12 + DW_OP_lit13 = 0x3D, // Literal 13 + DW_OP_lit14 = 0x3E, // Literal 14 + DW_OP_lit15 = 0x3F, // Literal 15 + DW_OP_lit16 = 0x40, // Literal 16 + DW_OP_lit17 = 0x41, // Literal 17 + DW_OP_lit18 = 0x42, // Literal 18 + DW_OP_lit19 = 0x43, // Literal 19 + DW_OP_lit20 = 0x44, // Literal 20 + DW_OP_lit21 = 0x45, // Literal 21 + DW_OP_lit22 = 0x46, // Literal 22 + DW_OP_lit23 = 0x47, // Literal 23 + DW_OP_lit24 = 0x48, // Literal 24 + DW_OP_lit25 = 0x49, // Literal 25 + DW_OP_lit26 = 0x4A, // Literal 26 + DW_OP_lit27 = 0x4B, // Literal 27 + DW_OP_lit28 = 0x4C, // Literal 28 + DW_OP_lit29 = 0x4D, // Literal 29 + DW_OP_lit30 = 0x4E, // Literal 30 + DW_OP_lit31 = 0x4F, // Literal 31 + DW_OP_reg0 = 0x50, // Contents of reg0 + DW_OP_reg1 = 0x51, // Contents of reg1 + DW_OP_reg2 = 0x52, // Contents of reg2 + DW_OP_reg3 = 0x53, // Contents of reg3 + DW_OP_reg4 = 0x54, // Contents of reg4 + DW_OP_reg5 = 0x55, // Contents of reg5 + DW_OP_reg6 = 0x56, // Contents of reg6 + DW_OP_reg7 = 0x57, // Contents of reg7 + DW_OP_reg8 = 0x58, // Contents of reg8 + DW_OP_reg9 = 0x59, // Contents of reg9 + DW_OP_reg10 = 0x5A, // Contents of reg10 + DW_OP_reg11 = 0x5B, // Contents of reg11 + DW_OP_reg12 = 0x5C, // Contents of reg12 + DW_OP_reg13 = 0x5D, // Contents of reg13 + DW_OP_reg14 = 0x5E, // Contents of reg14 + DW_OP_reg15 = 0x5F, // Contents of reg15 + DW_OP_reg16 = 0x60, // Contents of reg16 + DW_OP_reg17 = 0x61, // Contents of reg17 + DW_OP_reg18 = 0x62, // Contents of reg18 + DW_OP_reg19 = 0x63, // Contents of reg19 + DW_OP_reg20 = 0x64, // Contents of reg20 + DW_OP_reg21 = 0x65, // Contents of reg21 + DW_OP_reg22 = 0x66, // Contents of reg22 + DW_OP_reg23 = 0x67, // Contents of reg23 + DW_OP_reg24 = 0x68, // Contents of reg24 + DW_OP_reg25 = 0x69, // Contents of reg25 + DW_OP_reg26 = 0x6A, // Contents of reg26 + DW_OP_reg27 = 0x6B, // Contents of reg27 + DW_OP_reg28 = 0x6C, // Contents of reg28 + DW_OP_reg29 = 0x6D, // Contents of reg29 + DW_OP_reg30 = 0x6E, // Contents of reg30 + DW_OP_reg31 = 0x6F, // Contents of reg31 + DW_OP_breg0 = 0x70, // base register 0 + SLEB128 offset + DW_OP_breg1 = 0x71, // base register 1 + SLEB128 offset + DW_OP_breg2 = 0x72, // base register 2 + SLEB128 offset + DW_OP_breg3 = 0x73, // base register 3 + SLEB128 offset + DW_OP_breg4 = 0x74, // base register 4 + SLEB128 offset + DW_OP_breg5 = 0x75, // base register 5 + SLEB128 offset + DW_OP_breg6 = 0x76, // base register 6 + SLEB128 offset + DW_OP_breg7 = 0x77, // base register 7 + SLEB128 offset + DW_OP_breg8 = 0x78, // base register 8 + SLEB128 offset + DW_OP_breg9 = 0x79, // base register 9 + SLEB128 offset + DW_OP_breg10 = 0x7A, // base register 10 + SLEB128 offset + DW_OP_breg11 = 0x7B, // base register 11 + SLEB128 offset + DW_OP_breg12 = 0x7C, // base register 12 + SLEB128 offset + DW_OP_breg13 = 0x7D, // base register 13 + SLEB128 offset + DW_OP_breg14 = 0x7E, // base register 14 + SLEB128 offset + DW_OP_breg15 = 0x7F, // base register 15 + SLEB128 offset + DW_OP_breg16 = 0x80, // base register 16 + SLEB128 offset + DW_OP_breg17 = 0x81, // base register 17 + SLEB128 offset + DW_OP_breg18 = 0x82, // base register 18 + SLEB128 offset + DW_OP_breg19 = 0x83, // base register 19 + SLEB128 offset + DW_OP_breg20 = 0x84, // base register 20 + SLEB128 offset + DW_OP_breg21 = 0x85, // base register 21 + SLEB128 offset + DW_OP_breg22 = 0x86, // base register 22 + SLEB128 offset + DW_OP_breg23 = 0x87, // base register 23 + SLEB128 offset + DW_OP_breg24 = 0x88, // base register 24 + SLEB128 offset + DW_OP_breg25 = 0x89, // base register 25 + SLEB128 offset + DW_OP_breg26 = 0x8A, // base register 26 + SLEB128 offset + DW_OP_breg27 = 0x8B, // base register 27 + SLEB128 offset + DW_OP_breg28 = 0x8C, // base register 28 + SLEB128 offset + DW_OP_breg29 = 0x8D, // base register 29 + SLEB128 offset + DW_OP_breg30 = 0x8E, // base register 30 + SLEB128 offset + DW_OP_breg31 = 0x8F, // base register 31 + SLEB128 offset + DW_OP_regx = 0x90, // ULEB128 register + DW_OP_fbreg = 0x91, // SLEB128 offset + DW_OP_bregx = 0x92, // ULEB128 register followed by SLEB128 offset + DW_OP_piece = 0x93, // ULEB128 size of piece addressed + DW_OP_deref_size = 0x94, // 1-byte size of data retrieved + DW_OP_xderef_size = 0x95, // 1-byte size of data retrieved + DW_OP_nop = 0x96, + DW_OP_push_object_addres = 0x97, + DW_OP_call2 = 0x98, // 2-byte offset of DIE + DW_OP_call4 = 0x99, // 4-byte offset of DIE + DW_OP_call_ref = 0x9A, // 4- or 8-byte offset of DIE + DW_OP_lo_user = 0xE0, + DW_OP_APPLE_uninit = 0xF0, + DW_OP_hi_user = 0xFF +}; + + +#endif diff --git a/src/coreclr/src/nativeaot/libunwind/src/libunwind.cpp b/src/coreclr/src/nativeaot/libunwind/src/libunwind.cpp new file mode 100644 index 0000000000000..bc68033cd415c --- /dev/null +++ b/src/coreclr/src/nativeaot/libunwind/src/libunwind.cpp @@ -0,0 +1,339 @@ +//===--------------------------- libunwind.cpp ----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// +// Implements unw_* functions from +// +//===----------------------------------------------------------------------===// + +#include + +#include "libunwind_ext.h" +#include "config.h" + +#include + + +#if !defined(__USING_SJLJ_EXCEPTIONS__) +#include "AddressSpace.hpp" +#include "UnwindCursor.hpp" + +using namespace libunwind; + +/// internal object to represent this processes address space +LocalAddressSpace LocalAddressSpace::sThisAddressSpace; + +_LIBUNWIND_EXPORT unw_addr_space_t unw_local_addr_space = + (unw_addr_space_t)&LocalAddressSpace::sThisAddressSpace; + +/// Create a cursor of a thread in this process given 'context' recorded by +/// __unw_getcontext(). +_LIBUNWIND_HIDDEN int __unw_init_local(unw_cursor_t *cursor, + unw_context_t *context) { + _LIBUNWIND_TRACE_API("__unw_init_local(cursor=%p, context=%p)", + static_cast(cursor), + static_cast(context)); +#if defined(__i386__) +# define REGISTER_KIND Registers_x86 +#elif defined(__x86_64__) +# define REGISTER_KIND Registers_x86_64 +#elif defined(__powerpc64__) +# define REGISTER_KIND Registers_ppc64 +#elif defined(__ppc__) +# define REGISTER_KIND Registers_ppc +#elif defined(__aarch64__) +# define REGISTER_KIND Registers_arm64 +#elif defined(__arm__) +# define REGISTER_KIND Registers_arm +#elif defined(__or1k__) +# define REGISTER_KIND Registers_or1k +#elif defined(__mips__) && defined(_ABIO32) && _MIPS_SIM == _ABIO32 +# define REGISTER_KIND Registers_mips_o32 +#elif defined(__mips64) +# define REGISTER_KIND Registers_mips_newabi +#elif defined(__mips__) +# warning The MIPS architecture is not supported with this ABI and environment! +#elif defined(__sparc__) +# define REGISTER_KIND Registers_sparc +#else +# error Architecture not supported +#endif + // Use "placement new" to allocate UnwindCursor in the cursor buffer. + new (reinterpret_cast *>(cursor)) + UnwindCursor( + context, LocalAddressSpace::sThisAddressSpace); +#undef REGISTER_KIND + AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor; + co->setInfoBasedOnIPRegister(); + + return UNW_ESUCCESS; +} +_LIBUNWIND_WEAK_ALIAS(__unw_init_local, unw_init_local) + +/// Get value of specified register at cursor position in stack frame. +_LIBUNWIND_HIDDEN int __unw_get_reg(unw_cursor_t *cursor, unw_regnum_t regNum, + unw_word_t *value) { + _LIBUNWIND_TRACE_API("__unw_get_reg(cursor=%p, regNum=%d, &value=%p)", + static_cast(cursor), regNum, + static_cast(value)); + AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor; + if (co->validReg(regNum)) { + *value = co->getReg(regNum); + return UNW_ESUCCESS; + } + return UNW_EBADREG; +} +_LIBUNWIND_WEAK_ALIAS(__unw_get_reg, unw_get_reg) + +/// Set value of specified register at cursor position in stack frame. +_LIBUNWIND_HIDDEN int __unw_set_reg(unw_cursor_t *cursor, unw_regnum_t regNum, + unw_word_t value, unw_word_t *pos) { + _LIBUNWIND_TRACE_API("__unw_set_reg(cursor=%p, regNum=%d, value=0x%" PRIxPTR + ")", + static_cast(cursor), regNum, (long long)value); + typedef LocalAddressSpace::pint_t pint_t; + AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor; + if (co->validReg(regNum)) { + co->setReg(regNum, (pint_t)value, (pint_t)pos); + // special case altering IP to re-find info (being called by personality + // function) + if (regNum == UNW_REG_IP) { + unw_proc_info_t info; + // First, get the FDE for the old location and then update it. + co->getInfo(&info); + co->setInfoBasedOnIPRegister(false); + // If the original call expects stack adjustment, perform this now. + // Normal frame unwinding would have included the offset already in the + // CFA computation. + // Note: for PA-RISC and other platforms where the stack grows up, + // this should actually be - info.gp. LLVM doesn't currently support + // any such platforms and Clang doesn't export a macro for them. + if (info.gp) + co->setReg(UNW_REG_SP, co->getReg(UNW_REG_SP) + info.gp, 0); + } + return UNW_ESUCCESS; + } + return UNW_EBADREG; +} +_LIBUNWIND_WEAK_ALIAS(__unw_set_reg, unw_set_reg) + +/// Get value of specified float register at cursor position in stack frame. +_LIBUNWIND_HIDDEN int __unw_get_fpreg(unw_cursor_t *cursor, unw_regnum_t regNum, + unw_fpreg_t *value) { + _LIBUNWIND_TRACE_API("__unw_get_fpreg(cursor=%p, regNum=%d, &value=%p)", + static_cast(cursor), regNum, + static_cast(value)); + AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor; + if (co->validFloatReg(regNum)) { + *value = co->getFloatReg(regNum); + return UNW_ESUCCESS; + } + return UNW_EBADREG; +} +_LIBUNWIND_WEAK_ALIAS(__unw_get_fpreg, unw_get_fpreg) + +/// Set value of specified float register at cursor position in stack frame. +_LIBUNWIND_HIDDEN int __unw_set_fpreg(unw_cursor_t *cursor, unw_regnum_t regNum, + unw_fpreg_t value) { +#if defined(_LIBUNWIND_ARM_EHABI) + _LIBUNWIND_TRACE_API("__unw_set_fpreg(cursor=%p, regNum=%d, value=%llX)", + static_cast(cursor), regNum, value); +#else + _LIBUNWIND_TRACE_API("__unw_set_fpreg(cursor=%p, regNum=%d, value=%g)", + static_cast(cursor), regNum, value); +#endif + AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor; + if (co->validFloatReg(regNum)) { + co->setFloatReg(regNum, value); + return UNW_ESUCCESS; + } + return UNW_EBADREG; +} +_LIBUNWIND_WEAK_ALIAS(__unw_set_fpreg, unw_set_fpreg) + +/// Get location of specified register at cursor position in stack frame. +_LIBUNWIND_HIDDEN int __unw_get_save_loc(unw_cursor_t *cursor, int regNum, + unw_save_loc_t* location) +{ + AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor; + if (co->validReg(regNum)) { + // We only support memory locations, not register locations + location->u.addr = co->getRegLocation(regNum); + location->type = (location->u.addr == 0) ? UNW_SLT_NONE : UNW_SLT_MEMORY; + return UNW_ESUCCESS; + } + return UNW_EBADREG; +} +_LIBUNWIND_WEAK_ALIAS(__unw_get_save_loc, unw_get_save_loc) + +/// Move cursor to next frame. +_LIBUNWIND_HIDDEN int __unw_step(unw_cursor_t *cursor) { + _LIBUNWIND_TRACE_API("__unw_step(cursor=%p)", static_cast(cursor)); + AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor; + return co->step(); +} +_LIBUNWIND_WEAK_ALIAS(__unw_step, unw_step) + +/// Get unwind info at cursor position in stack frame. +_LIBUNWIND_HIDDEN int __unw_get_proc_info(unw_cursor_t *cursor, + unw_proc_info_t *info) { + _LIBUNWIND_TRACE_API("__unw_get_proc_info(cursor=%p, &info=%p)", + static_cast(cursor), static_cast(info)); + AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor; + co->getInfo(info); + if (info->end_ip == 0) + return UNW_ENOINFO; + else + return UNW_ESUCCESS; +} +_LIBUNWIND_WEAK_ALIAS(__unw_get_proc_info, unw_get_proc_info) + +/// Resume execution at cursor position (aka longjump). +_LIBUNWIND_HIDDEN int __unw_resume(unw_cursor_t *cursor) { + _LIBUNWIND_TRACE_API("__unw_resume(cursor=%p)", static_cast(cursor)); + AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor; + co->jumpto(); + return UNW_EUNSPEC; +} +_LIBUNWIND_WEAK_ALIAS(__unw_resume, unw_resume) + +/// Get name of function at cursor position in stack frame. +_LIBUNWIND_HIDDEN int __unw_get_proc_name(unw_cursor_t *cursor, char *buf, + size_t bufLen, unw_word_t *offset) { + _LIBUNWIND_TRACE_API("__unw_get_proc_name(cursor=%p, &buf=%p, bufLen=%lu)", + static_cast(cursor), static_cast(buf), + static_cast(bufLen)); + AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor; + if (co->getFunctionName(buf, bufLen, offset)) + return UNW_ESUCCESS; + else + return UNW_EUNSPEC; +} +_LIBUNWIND_WEAK_ALIAS(__unw_get_proc_name, unw_get_proc_name) + +/// Checks if a register is a floating-point register. +_LIBUNWIND_HIDDEN int __unw_is_fpreg(unw_cursor_t *cursor, + unw_regnum_t regNum) { + _LIBUNWIND_TRACE_API("__unw_is_fpreg(cursor=%p, regNum=%d)", + static_cast(cursor), regNum); + AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor; + return co->validFloatReg(regNum); +} +_LIBUNWIND_WEAK_ALIAS(__unw_is_fpreg, unw_is_fpreg) + +/// Checks if a register is a floating-point register. +_LIBUNWIND_HIDDEN const char *__unw_regname(unw_cursor_t *cursor, + unw_regnum_t regNum) { + _LIBUNWIND_TRACE_API("__unw_regname(cursor=%p, regNum=%d)", + static_cast(cursor), regNum); + AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor; + return co->getRegisterName(regNum); +} +_LIBUNWIND_WEAK_ALIAS(__unw_regname, unw_regname) + +/// Checks if current frame is signal trampoline. +_LIBUNWIND_HIDDEN int __unw_is_signal_frame(unw_cursor_t *cursor) { + _LIBUNWIND_TRACE_API("__unw_is_signal_frame(cursor=%p)", + static_cast(cursor)); + AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor; + return co->isSignalFrame(); +} +_LIBUNWIND_WEAK_ALIAS(__unw_is_signal_frame, unw_is_signal_frame) + +#ifdef __arm__ +// Save VFP registers d0-d15 using FSTMIADX instead of FSTMIADD +_LIBUNWIND_HIDDEN void __unw_save_vfp_as_X(unw_cursor_t *cursor) { + _LIBUNWIND_TRACE_API("__unw_get_fpreg_save_vfp_as_X(cursor=%p)", + static_cast(cursor)); + AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor; + return co->saveVFPAsX(); +} +_LIBUNWIND_WEAK_ALIAS(__unw_save_vfp_as_X, unw_save_vfp_as_X) +#endif + + +#if defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) +/// SPI: walks cached DWARF entries +_LIBUNWIND_HIDDEN void __unw_iterate_dwarf_unwind_cache(void (*func)( + unw_word_t ip_start, unw_word_t ip_end, unw_word_t fde, unw_word_t mh)) { + _LIBUNWIND_TRACE_API("__unw_iterate_dwarf_unwind_cache(func=%p)", + reinterpret_cast(func)); + DwarfFDECache::iterateCacheEntries(func); +} +_LIBUNWIND_WEAK_ALIAS(__unw_iterate_dwarf_unwind_cache, + unw_iterate_dwarf_unwind_cache) + +/// IPI: for __register_frame() +void __unw_add_dynamic_fde(unw_word_t fde) { + CFI_Parser::FDE_Info fdeInfo; + CFI_Parser::CIE_Info cieInfo; + const char *message = CFI_Parser::decodeFDE( + LocalAddressSpace::sThisAddressSpace, + (LocalAddressSpace::pint_t) fde, &fdeInfo, &cieInfo); + if (message == NULL) { + // dynamically registered FDEs don't have a mach_header group they are in. + // Use fde as mh_group + unw_word_t mh_group = fdeInfo.fdeStart; + DwarfFDECache::add((LocalAddressSpace::pint_t)mh_group, + fdeInfo.pcStart, fdeInfo.pcEnd, + fdeInfo.fdeStart); + } else { + _LIBUNWIND_DEBUG_LOG("__unw_add_dynamic_fde: bad fde: %s", message); + } +} + +/// IPI: for __deregister_frame() +void __unw_remove_dynamic_fde(unw_word_t fde) { + // fde is own mh_group + DwarfFDECache::removeAllIn((LocalAddressSpace::pint_t)fde); +} +#endif // defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) +#endif // !defined(__USING_SJLJ_EXCEPTIONS__) + + + +// Add logging hooks in Debug builds only +#ifndef NDEBUG +#include + +_LIBUNWIND_HIDDEN +bool logAPIs() { + // do manual lock to avoid use of _cxa_guard_acquire or initializers + static bool checked = false; + static bool log = false; + if (!checked) { + log = (getenv("LIBUNWIND_PRINT_APIS") != NULL); + checked = true; + } + return log; +} + +_LIBUNWIND_HIDDEN +bool logUnwinding() { + // do manual lock to avoid use of _cxa_guard_acquire or initializers + static bool checked = false; + static bool log = false; + if (!checked) { + log = (getenv("LIBUNWIND_PRINT_UNWINDING") != NULL); + checked = true; + } + return log; +} + +_LIBUNWIND_HIDDEN +bool logDWARF() { + // do manual lock to avoid use of _cxa_guard_acquire or initializers + static bool checked = false; + static bool log = false; + if (!checked) { + log = (getenv("LIBUNWIND_PRINT_DWARF") != NULL); + checked = true; + } + return log; +} + +#endif // NDEBUG + diff --git a/src/coreclr/src/nativeaot/libunwind/src/libunwind_ext.h b/src/coreclr/src/nativeaot/libunwind/src/libunwind_ext.h new file mode 100644 index 0000000000000..b240ba7fbcacd --- /dev/null +++ b/src/coreclr/src/nativeaot/libunwind/src/libunwind_ext.h @@ -0,0 +1,66 @@ +//===------------------------ libunwind_ext.h -----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// +// Extensions to libunwind API. +// +//===----------------------------------------------------------------------===// + +#ifndef __LIBUNWIND_EXT__ +#define __LIBUNWIND_EXT__ + +#include "config.h" +#include +#include + +#define UNW_STEP_SUCCESS 1 +#define UNW_STEP_END 0 + +#ifdef __cplusplus +extern "C" { +#endif + +extern int __unw_getcontext(unw_context_t *); +extern int __unw_init_local(unw_cursor_t *, unw_context_t *); +extern int __unw_step(unw_cursor_t *); +extern int __unw_get_reg(unw_cursor_t *, unw_regnum_t, unw_word_t *); +extern int __unw_get_fpreg(unw_cursor_t *, unw_regnum_t, unw_fpreg_t *); +extern int __unw_set_reg(unw_cursor_t *, unw_regnum_t, unw_word_t, unw_word_t *); +extern int __unw_set_fpreg(unw_cursor_t *, unw_regnum_t, unw_fpreg_t); +extern int __unw_resume(unw_cursor_t *); + +#ifdef __arm__ +/* Save VFP registers in FSTMX format (instead of FSTMD). */ +extern void __unw_save_vfp_as_X(unw_cursor_t *); +#endif + +extern const char *__unw_regname(unw_cursor_t *, unw_regnum_t); +extern int __unw_get_proc_info(unw_cursor_t *, unw_proc_info_t *); +extern int __unw_is_fpreg(unw_cursor_t *, unw_regnum_t); +extern int __unw_is_signal_frame(unw_cursor_t *); +extern int __unw_get_proc_name(unw_cursor_t *, char *, size_t, unw_word_t *); +extern int __unw_get_save_loc(unw_cursor_t *, int, unw_save_loc_t *); + +// SPI +extern void __unw_iterate_dwarf_unwind_cache(void (*func)( + unw_word_t ip_start, unw_word_t ip_end, unw_word_t fde, unw_word_t mh)); + +// IPI +extern void __unw_add_dynamic_fde(unw_word_t fde); +extern void __unw_remove_dynamic_fde(unw_word_t fde); + +#if defined(_LIBUNWIND_ARM_EHABI) +extern const uint32_t* decode_eht_entry(const uint32_t*, size_t*, size_t*); +extern _Unwind_Reason_Code _Unwind_VRS_Interpret(_Unwind_Context *context, + const uint32_t *data, + size_t offset, size_t len); +#endif + +#ifdef __cplusplus +} +#endif + +#endif // __LIBUNWIND_EXT__ diff --git a/src/coreclr/src/nativeaot/libunwind/src/unwind_ext.h b/src/coreclr/src/nativeaot/libunwind/src/unwind_ext.h new file mode 100644 index 0000000000000..c40ce6a1610f4 --- /dev/null +++ b/src/coreclr/src/nativeaot/libunwind/src/unwind_ext.h @@ -0,0 +1,37 @@ +//===-------------------------- unwind_ext.h ------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +// +// Extensions to unwind API. +// +//===----------------------------------------------------------------------===// + +#ifndef __UNWIND_EXT__ +#define __UNWIND_EXT__ + +#include "unwind.h" + +#ifdef __cplusplus +extern "C" { +#endif + +// These platform specific functions to get and set the top context are +// implemented elsewhere. + +extern struct _Unwind_FunctionContext * +__Unwind_SjLj_GetTopOfFunctionStack(); + +extern void +__Unwind_SjLj_SetTopOfFunctionStack(struct _Unwind_FunctionContext *fc); + +#ifdef __cplusplus +} +#endif + +#endif // __UNWIND_EXT__ + + diff --git a/src/coreclr/src/nativeaot/libunwind/test/CMakeLists.txt b/src/coreclr/src/nativeaot/libunwind/test/CMakeLists.txt new file mode 100644 index 0000000000000..d902e3e829410 --- /dev/null +++ b/src/coreclr/src/nativeaot/libunwind/test/CMakeLists.txt @@ -0,0 +1,35 @@ +include(AddLLVM) # for add_lit_testsuite +macro(pythonize_bool var) + if (${var}) + set(${var} True) + else() + set(${var} False) + endif() +endmacro() + +if (NOT DEFINED LIBCXX_ENABLE_SHARED) + set(LIBCXX_ENABLE_SHARED ON) +endif() + +pythonize_bool(LIBUNWIND_BUILD_32_BITS) +pythonize_bool(LIBCXX_ENABLE_SHARED) +pythonize_bool(LIBUNWIND_ENABLE_SHARED) +pythonize_bool(LIBUNWIND_ENABLE_THREADS) +pythonize_bool(LIBUNWIND_ENABLE_EXCEPTIONS) +pythonize_bool(LIBUNWIND_USE_COMPILER_RT) +pythonize_bool(LIBUNWIND_BUILD_EXTERNAL_THREAD_LIBRARY) +set(LIBUNWIND_TARGET_INFO "libcxx.test.target_info.LocalTI" CACHE STRING + "TargetInfo to use when setting up test environment.") +set(LIBUNWIND_EXECUTOR "None" CACHE STRING + "Executor to use when running tests.") + +set(AUTO_GEN_COMMENT "## Autogenerated by libunwind configuration.\n# Do not edit!") +configure_file( + ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.in + ${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg + @ONLY) + +add_lit_testsuite(check-unwind "Running libunwind tests" + ${CMAKE_CURRENT_BINARY_DIR} + DEPENDS ${LIBUNWIND_TEST_DEPS} + ) diff --git a/src/coreclr/src/nativeaot/libunwind/test/alignment.pass.cpp b/src/coreclr/src/nativeaot/libunwind/test/alignment.pass.cpp new file mode 100644 index 0000000000000..b0da7f1551346 --- /dev/null +++ b/src/coreclr/src/nativeaot/libunwind/test/alignment.pass.cpp @@ -0,0 +1,28 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// The Itanium ABI requires that _Unwind_Exception objects are "double-word +// aligned". + +#include + +// EHABI : 8-byte aligned +// itanium: largest supported alignment for the system +#if defined(_LIBUNWIND_ARM_EHABI) +static_assert(alignof(_Unwind_Control_Block) == 8, + "_Unwind_Control_Block must be double-word aligned"); +#else +struct MaxAligned {} __attribute__((__aligned__)); +static_assert(alignof(_Unwind_Exception) == alignof(MaxAligned), + "_Unwind_Exception must be maximally aligned"); +#endif + +int main() +{ +} diff --git a/src/coreclr/src/nativeaot/libunwind/test/libunwind/__init__.py b/src/coreclr/src/nativeaot/libunwind/test/libunwind/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/src/coreclr/src/nativeaot/libunwind/test/libunwind/test/__init__.py b/src/coreclr/src/nativeaot/libunwind/test/libunwind/test/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/src/coreclr/src/nativeaot/libunwind/test/libunwind/test/config.py b/src/coreclr/src/nativeaot/libunwind/test/libunwind/test/config.py new file mode 100644 index 0000000000000..05e3f3cc21f31 --- /dev/null +++ b/src/coreclr/src/nativeaot/libunwind/test/libunwind/test/config.py @@ -0,0 +1,68 @@ +#===----------------------------------------------------------------------===## +# +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +#===----------------------------------------------------------------------===## +import os +import sys + +from libcxx.test.config import Configuration as LibcxxConfiguration + + +class Configuration(LibcxxConfiguration): + # pylint: disable=redefined-outer-name + def __init__(self, lit_config, config): + super(Configuration, self).__init__(lit_config, config) + self.libunwind_src_root = None + self.libunwind_obj_root = None + self.abi_library_path = None + self.libcxx_src_root = None + + def configure_src_root(self): + self.libunwind_src_root = (self.get_lit_conf('libunwind_src_root') + or os.path.dirname(self.config.test_source_root)) + self.libcxx_src_root = (self.get_lit_conf('libcxx_src_root') + or os.path.join(self.libunwind_src_root, '..', 'libcxx')) + + def configure_obj_root(self): + self.libunwind_obj_root = self.get_lit_conf('libunwind_obj_root') + super(Configuration, self).configure_obj_root() + + def has_cpp_feature(self, feature, required_value): + return int(self.cxx.dumpMacros().get('__cpp_' + feature, 0)) >= required_value + + def configure_features(self): + super(Configuration, self).configure_features() + if not self.get_lit_bool('enable_exceptions', True): + self.config.available_features.add('libcxxabi-no-exceptions') + + def configure_compile_flags(self): + self.cxx.compile_flags += ['-DLIBUNWIND_NO_TIMER'] + if not self.get_lit_bool('enable_exceptions', True): + self.cxx.compile_flags += ['-fno-exceptions', '-DLIBUNWIND_HAS_NO_EXCEPTIONS'] + # Stack unwinding tests need unwinding tables and these are not + # generated by default on all Targets. + self.cxx.compile_flags += ['-funwind-tables'] + if not self.get_lit_bool('enable_threads', True): + self.cxx.compile_flags += ['-D_LIBUNWIND_HAS_NO_THREADS'] + self.config.available_features.add('libunwind-no-threads') + super(Configuration, self).configure_compile_flags() + + def configure_compile_flags_header_includes(self): + self.configure_config_site_header() + + libunwind_headers = self.get_lit_conf( + 'libunwind_headers', + os.path.join(self.libunwind_src_root, 'include')) + if not os.path.isdir(libunwind_headers): + self.lit_config.fatal("libunwind_headers='%s' is not a directory." + % libunwind_headers) + self.cxx.compile_flags += ['-I' + libunwind_headers] + + def configure_compile_flags_exceptions(self): + pass + + def configure_compile_flags_rtti(self): + pass diff --git a/src/coreclr/src/nativeaot/libunwind/test/libunwind_01.pass.cpp b/src/coreclr/src/nativeaot/libunwind/test/libunwind_01.pass.cpp new file mode 100644 index 0000000000000..6957d98f956d7 --- /dev/null +++ b/src/coreclr/src/nativeaot/libunwind/test/libunwind_01.pass.cpp @@ -0,0 +1,42 @@ +#include +#include + +void backtrace(int lower_bound) { + unw_context_t context; + unw_getcontext(&context); + + unw_cursor_t cursor; + unw_init_local(&cursor, &context); + + int n = 0; + do { + ++n; + if (n > 100) { + abort(); + } + } while (unw_step(&cursor) > 0); + + if (n < lower_bound) { + abort(); + } +} + +void test1(int i) { + backtrace(i); +} + +void test2(int i, int j) { + backtrace(i); + test1(j); +} + +void test3(int i, int j, int k) { + backtrace(i); + test2(j, k); +} + +int main() { + test1(1); + test2(1, 2); + test3(1, 2, 3); +} diff --git a/src/coreclr/src/nativeaot/libunwind/test/libunwind_02.pass.cpp b/src/coreclr/src/nativeaot/libunwind/test/libunwind_02.pass.cpp new file mode 100644 index 0000000000000..a0efd1df79fa4 --- /dev/null +++ b/src/coreclr/src/nativeaot/libunwind/test/libunwind_02.pass.cpp @@ -0,0 +1,38 @@ +#include +#include +#include + +#define EXPECTED_NUM_FRAMES 50 +#define NUM_FRAMES_UPPER_BOUND 100 + +_Unwind_Reason_Code callback(_Unwind_Context *context, void *cnt) { + (void)context; + int *i = (int *)cnt; + ++*i; + if (*i > NUM_FRAMES_UPPER_BOUND) { + abort(); + } + return _URC_NO_REASON; +} + +void test_backtrace() { + int n = 0; + _Unwind_Backtrace(&callback, &n); + if (n < EXPECTED_NUM_FRAMES) { + abort(); + } +} + +int test(int i) { + if (i == 0) { + test_backtrace(); + return 0; + } else { + return i + test(i - 1); + } +} + +int main() { + int total = test(50); + assert(total == 1275); +} diff --git a/src/coreclr/src/nativeaot/libunwind/test/lit.cfg b/src/coreclr/src/nativeaot/libunwind/test/lit.cfg new file mode 100644 index 0000000000000..1d284bdfd771a --- /dev/null +++ b/src/coreclr/src/nativeaot/libunwind/test/lit.cfg @@ -0,0 +1,70 @@ +# -*- Python -*- vim: set ft=python ts=4 sw=4 expandtab tw=79: + +# Configuration file for the 'lit' test runner. + + +import os +import site + +site.addsitedir(os.path.dirname(__file__)) + + +# Tell pylint that we know config and lit_config exist somewhere. +if 'PYLINT_IMPORT' in os.environ: + config = object() + lit_config = object() + +# name: The name of this test suite. +config.name = 'libunwind' + +# suffixes: A list of file extensions to treat as test files. +config.suffixes = ['.cpp', '.s'] + +# test_source_root: The root path where tests are located. +config.test_source_root = os.path.dirname(__file__) + +# needed to test libunwind with code that throws exceptions +config.enable_exceptions = True + +# Infer the libcxx_test_source_root for configuration import. +# If libcxx_source_root isn't specified in the config, assume that the libcxx +# and libunwind source directories are sibling directories. +libcxx_src_root = getattr(config, 'libcxx_src_root', None) +if not libcxx_src_root: + libcxx_src_root = os.path.join(config.test_source_root, '../../libcxx') +libcxx_test_src_root = os.path.join(libcxx_src_root, 'utils') +if os.path.isfile(os.path.join(libcxx_test_src_root, 'libcxx', '__init__.py')): + site.addsitedir(libcxx_test_src_root) +else: + lit_config.fatal('Could not find libcxx test directory for test imports' + ' in: %s' % libcxx_test_src_root) + +# Infer the test_exec_root from the libcxx_object root. +obj_root = getattr(config, 'libunwind_obj_root', None) + +# Check that the test exec root is known. +if obj_root is None: + import libcxx.test.config + libcxx.test.config.loadSiteConfig( + lit_config, config, 'libunwind_site_config', 'LIBUNWIND_SITE_CONFIG') + obj_root = getattr(config, 'libunwind_obj_root', None) + if obj_root is None: + import tempfile + obj_root = tempfile.mkdtemp(prefix='libunwind-testsuite-') + lit_config.warning('Creating temporary directory for object root: %s' % + obj_root) + +config.test_exec_root = os.path.join(obj_root, 'test') + +cfg_variant = getattr(config, 'configuration_variant', 'libunwind') +if cfg_variant: + lit_config.note('Using configuration variant: %s' % cfg_variant) + +# Load the Configuration class from the module name .test.config. +config_module_name = '.'.join([cfg_variant, 'test', 'config']) +config_module = __import__(config_module_name, fromlist=['Configuration']) + +configuration = config_module.Configuration(lit_config, config) +configuration.configure() +configuration.print_config_info() +config.test_format = configuration.get_test_format() diff --git a/src/coreclr/src/nativeaot/libunwind/test/lit.site.cfg.in b/src/coreclr/src/nativeaot/libunwind/test/lit.site.cfg.in new file mode 100644 index 0000000000000..34da72ac10684 --- /dev/null +++ b/src/coreclr/src/nativeaot/libunwind/test/lit.site.cfg.in @@ -0,0 +1,30 @@ +@AUTO_GEN_COMMENT@ +config.cxx_under_test = "@LIBUNWIND_COMPILER@" +config.project_obj_root = "@CMAKE_BINARY_DIR@" +config.libunwind_src_root = "@LIBUNWIND_SOURCE_DIR@" +config.libunwind_obj_root = "@LIBUNWIND_BINARY_DIR@" +config.abi_library_path = "@LIBUNWIND_LIBRARY_DIR@" +config.libcxx_src_root = "@LIBUNWIND_LIBCXX_PATH@" +config.libunwind_headers = "@LIBUNWIND_SOURCE_DIR@/include" +config.cxx_library_root = "@LIBUNWIND_LIBCXX_LIBRARY_PATH@" +config.llvm_unwinder = True +config.builtins_library = "@LIBUNWIND_BUILTINS_LIBRARY@" +config.enable_threads = @LIBUNWIND_ENABLE_THREADS@ +config.use_sanitizer = "@LLVM_USE_SANITIZER@" +config.enable_32bit = @LIBUNWIND_BUILD_32_BITS@ +config.target_info = "@LIBUNWIND_TARGET_INFO@" +config.test_linker_flags = "@LIBUNWIND_TEST_LINKER_FLAGS@" +config.test_compiler_flags = "@LIBUNWIND_TEST_COMPILER_FLAGS@" +config.executor = "@LIBUNWIND_EXECUTOR@" +config.libunwind_shared = @LIBUNWIND_ENABLE_SHARED@ +config.enable_shared = @LIBCXX_ENABLE_SHARED@ +config.enable_exceptions = @LIBUNWIND_ENABLE_EXCEPTIONS@ +config.host_triple = "@LLVM_HOST_TRIPLE@" +config.target_triple = "@TARGET_TRIPLE@" +config.use_target = bool("@LIBUNWIND_TARGET_TRIPLE@") +config.sysroot = "@LIBUNWIND_SYSROOT@" +config.gcc_toolchain = "@LIBUNWIND_GCC_TOOLCHAIN@" +config.cxx_ext_threads = @LIBUNWIND_BUILD_EXTERNAL_THREAD_LIBRARY@ + +# Let the main config do the real work. +lit_config.load_config(config, "@LIBUNWIND_SOURCE_DIR@/test/lit.cfg") diff --git a/src/coreclr/src/nativeaot/libunwind/test/unw_getcontext.pass.cpp b/src/coreclr/src/nativeaot/libunwind/test/unw_getcontext.pass.cpp new file mode 100644 index 0000000000000..b012706a0bf92 --- /dev/null +++ b/src/coreclr/src/nativeaot/libunwind/test/unw_getcontext.pass.cpp @@ -0,0 +1,8 @@ +#include +#include + +int main() { + unw_context_t context; + int ret = unw_getcontext(&context); + assert(ret == UNW_ESUCCESS); +}